You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2017/07/21 06:42:07 UTC
hive git commit: Revert "HIVE-17116: Vectorization: Add
infrastructure for vectorization of ROW__ID struct (Matt McCline,
reviewed by Teddy Choi)"
Repository: hive
Updated Branches:
refs/heads/master 996fa0704 -> aa5e9bfab
Revert "HIVE-17116: Vectorization: Add infrastructure for vectorization of ROW__ID struct (Matt McCline, reviewed by Teddy Choi)"
This reverts commit 996fa070410b673ebd47511b33c78da4c4757723.
Transaction ids vary in Hive QA runs and break vector_row__id.q.out query results.
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/aa5e9bfa
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/aa5e9bfa
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/aa5e9bfa
Branch: refs/heads/master
Commit: aa5e9bfab58de9985239bfc8c13dc1f1b21ff67d
Parents: 996fa07
Author: Matt McCline <mm...@hortonworks.com>
Authored: Fri Jul 21 01:40:22 2017 -0500
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Fri Jul 21 01:40:22 2017 -0500
----------------------------------------------------------------------
.../org/apache/hadoop/hive/conf/HiveConf.java | 4 -
.../test/resources/testconfiguration.properties | 1 -
.../hive/llap/io/api/impl/LlapInputFormat.java | 4 +-
.../hive/ql/exec/vector/VectorMapOperator.java | 41 +-
.../ql/exec/vector/VectorizationContext.java | 2 -
.../ql/exec/vector/VectorizedRowBatchCtx.java | 43 +-
.../hadoop/hive/ql/metadata/VirtualColumn.java | 14 -
.../hive/ql/optimizer/physical/Vectorizer.java | 164 ++---
.../queries/clientpositive/vector_row__id.q | 56 --
.../clientpositive/llap/vector_row__id.q.out | 605 -------------------
.../results/clientpositive/vector_row__id.q.out | 491 ---------------
11 files changed, 48 insertions(+), 1377 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/aa5e9bfa/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index df45f2c..f360dfa 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2856,10 +2856,6 @@ public class HiveConf extends Configuration {
"of aggregations that use complex types.\n",
"For example, AVG uses a complex type (STRUCT) for partial aggregation results" +
"The default value is true."),
- HIVE_VECTORIZATION_ROW_IDENTIFIER_ENABLED("hive.vectorized.row.identifier.enabled", false,
- "This flag should be set to true to enable vectorization\n" +
- "of ROW__ID.\n" +
- "The default value is false."),
HIVE_TYPE_CHECK_ON_INSERT("hive.typecheck.on.insert", true, "This property has been extended to control "
+ "whether to check, convert, and normalize partition value to conform to its column type in "
http://git-wip-us.apache.org/repos/asf/hive/blob/aa5e9bfa/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index f66e19b..cffe245 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -356,7 +356,6 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\
vector_reduce2.q,\
vector_reduce3.q,\
vector_reduce_groupby_decimal.q,\
- vector_row__id.q,\
vector_string_concat.q,\
vector_struct_in.q,\
vector_udf_character_length.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/aa5e9bfa/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java
index 79ec4ed..22ca025 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java
@@ -190,10 +190,8 @@ public class LlapInputFormat implements InputFormat<NullWritable, VectorizedRowB
}
}
}
- // UNDONE: Virtual column support?
return new VectorizedRowBatchCtx(colNames.toArray(new String[colNames.size()]),
- colTypes.toArray(new TypeInfo[colTypes.size()]), null, partitionColumnCount,
- new VirtualColumn[0], new String[0]);
+ colTypes.toArray(new TypeInfo[colTypes.size()]), null, partitionColumnCount, new String[0]);
}
static TableScanOperator findTsOp(MapWork mapWork) throws HiveException {
http://git-wip-us.apache.org/repos/asf/hive/blob/aa5e9bfa/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java
index 1ac8914..ed50df2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java
@@ -39,7 +39,6 @@ import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hadoop.hive.ql.io.orc.OrcSerde;
import org.apache.hadoop.hive.ql.io.orc.OrcStruct;
import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.PartitionDesc;
import org.apache.hadoop.hive.ql.plan.TableDesc;
@@ -126,9 +125,6 @@ public class VectorMapOperator extends AbstractMapOperator {
private transient int dataColumnCount;
private transient int partitionColumnCount;
private transient Object[] partitionValues;
- private transient int virtualColumnCount;
- private transient boolean hasRowIdentifier;
- private transient int rowIdentifierColumnNum;
private transient boolean[] dataColumnsToIncludeTruncated;
@@ -508,19 +504,6 @@ public class VectorMapOperator extends AbstractMapOperator {
dataColumnCount = batchContext.getDataColumnCount();
partitionColumnCount = batchContext.getPartitionColumnCount();
partitionValues = new Object[partitionColumnCount];
- virtualColumnCount = batchContext.getVirtualColumnCount();
- rowIdentifierColumnNum = -1;
- if (virtualColumnCount > 0) {
- final int firstVirtualColumnNum = dataColumnCount + partitionColumnCount;
- VirtualColumn[] neededVirtualColumns = batchContext.getNeededVirtualColumns();
- hasRowIdentifier = (neededVirtualColumns[0] == VirtualColumn.ROWID);
- if (hasRowIdentifier) {
- rowIdentifierColumnNum = firstVirtualColumnNum;
- }
- } else {
- hasRowIdentifier = false;
- }
-
dataColumnNums = batchContext.getDataColumnNums();
Preconditions.checkState(dataColumnNums != null);
@@ -618,13 +601,6 @@ public class VectorMapOperator extends AbstractMapOperator {
currentVectorPartContext.partName);
}
- private void setRowIdentiferToNull(VectorizedRowBatch batch) {
- ColumnVector rowIdentifierColVector = batch.cols[rowIdentifierColumnNum];
- rowIdentifierColVector.isNull[0] = true;
- rowIdentifierColVector.noNulls = false;
- rowIdentifierColVector.isRepeating = true;
- }
-
/*
* Setup the context for reading from the next partition file.
*/
@@ -719,12 +695,6 @@ public class VectorMapOperator extends AbstractMapOperator {
batchContext.addPartitionColsToBatch(deserializerBatch, partitionValues);
}
- if (hasRowIdentifier) {
-
- // No ACID in code path -- set ROW__ID to NULL.
- setRowIdentiferToNull(deserializerBatch);
- }
-
/*
* Set or clear the rest of the reading variables based on {vector|row} deserialization.
*/
@@ -808,16 +778,7 @@ public class VectorMapOperator extends AbstractMapOperator {
*/
batchCounter++;
if (value != null) {
- VectorizedRowBatch batch = (VectorizedRowBatch) value;
- numRows += batch.size;
- if (hasRowIdentifier) {
-
- // UNDONE: Pass ROW__ID STRUCT column through IO Context to get filled in by ACID reader
- // UNDONE: Or, perhaps tell it to do it before calling us, ...
- // UNDONE: For now, set column to NULL.
-
- setRowIdentiferToNull(batch);
- }
+ numRows += ((VectorizedRowBatch) value).size;
}
oneRootOperator.process(value, 0);
if (oneRootOperator.getDone()) {
http://git-wip-us.apache.org/repos/asf/hive/blob/aa5e9bfa/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index fcebb6f..9e026f0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -124,7 +124,6 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.*;
import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor;
import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc;
import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.AggregationDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
@@ -222,7 +221,6 @@ public class VectorizationContext {
projectedColumns.add(i);
projectionColumnMap.put(projectionColumnNames.get(i), i);
}
-
int firstOutputColumnIndex = projectedColumns.size();
this.ocm = new OutputColumnManager(firstOutputColumnIndex);
this.firstOutputColumnIndex = firstOutputColumnIndex;
http://git-wip-us.apache.org/repos/asf/hive/blob/aa5e9bfa/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
index 90d1372..3c12e04 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
@@ -38,7 +38,6 @@ import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils;
import org.apache.hadoop.hive.ql.io.IOPrepareCache;
import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
import org.apache.hadoop.hive.ql.plan.Explain;
import org.apache.hadoop.hive.ql.plan.MapWork;
import org.apache.hadoop.hive.ql.plan.PartitionDesc;
@@ -79,8 +78,6 @@ public class VectorizedRowBatchCtx {
private int[] dataColumnNums;
private int dataColumnCount;
private int partitionColumnCount;
- private int virtualColumnCount;
- private VirtualColumn[] neededVirtualColumns;
private String[] scratchColumnTypeNames;
@@ -91,17 +88,14 @@ public class VectorizedRowBatchCtx {
}
public VectorizedRowBatchCtx(String[] rowColumnNames, TypeInfo[] rowColumnTypeInfos,
- int[] dataColumnNums, int partitionColumnCount, VirtualColumn[] neededVirtualColumns,
- String[] scratchColumnTypeNames) {
+ int[] dataColumnNums, int partitionColumnCount, String[] scratchColumnTypeNames) {
this.rowColumnNames = rowColumnNames;
this.rowColumnTypeInfos = rowColumnTypeInfos;
this.dataColumnNums = dataColumnNums;
this.partitionColumnCount = partitionColumnCount;
- this.neededVirtualColumns = neededVirtualColumns;
- this.virtualColumnCount = neededVirtualColumns.length;
this.scratchColumnTypeNames = scratchColumnTypeNames;
- dataColumnCount = rowColumnTypeInfos.length - partitionColumnCount - virtualColumnCount;
+ dataColumnCount = rowColumnTypeInfos.length - partitionColumnCount;
}
public String[] getRowColumnNames() {
@@ -124,14 +118,6 @@ public class VectorizedRowBatchCtx {
return partitionColumnCount;
}
- public int getVirtualColumnCount() {
- return virtualColumnCount;
- }
-
- public VirtualColumn[] getNeededVirtualColumns() {
- return neededVirtualColumns;
- }
-
public String[] getScratchColumnTypeNames() {
return scratchColumnTypeNames;
}
@@ -152,8 +138,6 @@ public class VectorizedRowBatchCtx {
rowColumnTypeInfos = VectorizedBatchUtil.typeInfosFromStructObjectInspector(structObjectInspector);
dataColumnNums = null;
partitionColumnCount = 0;
- virtualColumnCount = 0;
- neededVirtualColumns = new VirtualColumn[0];
dataColumnCount = rowColumnTypeInfos.length;
// Scratch column information.
@@ -220,14 +204,13 @@ public class VectorizedRowBatchCtx {
*/
public VectorizedRowBatch createVectorizedRowBatch()
{
- final int nonScratchColumnCount = rowColumnTypeInfos.length;
- final int totalColumnCount =
- nonScratchColumnCount + scratchColumnTypeNames.length;
+ final int dataAndPartColumnCount = rowColumnTypeInfos.length;
+ final int totalColumnCount = dataAndPartColumnCount + scratchColumnTypeNames.length;
VectorizedRowBatch result = new VectorizedRowBatch(totalColumnCount);
if (dataColumnNums == null) {
// All data and partition columns.
- for (int i = 0; i < nonScratchColumnCount; i++) {
+ for (int i = 0; i < dataAndPartColumnCount; i++) {
TypeInfo typeInfo = rowColumnTypeInfos[i];
result.cols[i] = VectorizedBatchUtil.createColumnVector(typeInfo);
}
@@ -235,30 +218,24 @@ public class VectorizedRowBatchCtx {
// Create only needed/included columns data columns.
for (int i = 0; i < dataColumnNums.length; i++) {
int columnNum = dataColumnNums[i];
- Preconditions.checkState(columnNum < nonScratchColumnCount);
+ Preconditions.checkState(columnNum < dataAndPartColumnCount);
TypeInfo typeInfo = rowColumnTypeInfos[columnNum];
result.cols[columnNum] = VectorizedBatchUtil.createColumnVector(typeInfo);
}
- // Always create partition and virtual columns.
- final int partitionEndColumnNum = dataColumnCount + partitionColumnCount;
- for (int partitionColumnNum = dataColumnCount; partitionColumnNum < partitionEndColumnNum; partitionColumnNum++) {
+ // Always create partition columns.
+ final int endColumnNum = dataColumnCount + partitionColumnCount;
+ for (int partitionColumnNum = dataColumnCount; partitionColumnNum < endColumnNum; partitionColumnNum++) {
TypeInfo typeInfo = rowColumnTypeInfos[partitionColumnNum];
result.cols[partitionColumnNum] = VectorizedBatchUtil.createColumnVector(typeInfo);
}
- final int virtualEndColumnNum = partitionEndColumnNum + virtualColumnCount;
- for (int virtualColumnNum = partitionEndColumnNum; virtualColumnNum < virtualEndColumnNum; virtualColumnNum++) {
- TypeInfo typeInfo = rowColumnTypeInfos[virtualColumnNum];
- result.cols[virtualColumnNum] = VectorizedBatchUtil.createColumnVector(typeInfo);
- }
}
for (int i = 0; i < scratchColumnTypeNames.length; i++) {
String typeName = scratchColumnTypeNames[i];
- result.cols[nonScratchColumnCount + i] =
+ result.cols[rowColumnTypeInfos.length + i] =
VectorizedBatchUtil.createColumnVector(typeName);
}
- // UNDONE: Also remember virtualColumnCount...
result.setPartitionInfo(dataColumnCount, partitionColumnCount);
result.reset();
http://git-wip-us.apache.org/repos/asf/hive/blob/aa5e9bfa/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java
index 0032305..2435bf1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java
@@ -20,13 +20,10 @@ package org.apache.hadoop.hive.ql.metadata;
import java.util.ArrayList;
import java.util.Collection;
-import java.util.HashMap;
import java.util.List;
import java.util.ListIterator;
-import java.util.Map;
import com.google.common.collect.ImmutableSet;
-import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;
import org.apache.hadoop.conf.Configuration;
@@ -67,17 +64,6 @@ public enum VirtualColumn {
ImmutableSet.of(FILENAME.getName(), BLOCKOFFSET.getName(), ROWOFFSET.getName(),
RAWDATASIZE.getName(), GROUPINGID.getName(), ROWID.getName());
- public static final ImmutableMap<String, VirtualColumn> VIRTUAL_COLUMN_NAME_MAP =
- new ImmutableMap.Builder<String, VirtualColumn>().putAll(getColumnNameMap()).build();
-
- private static Map<String, VirtualColumn> getColumnNameMap() {
- Map<String, VirtualColumn> map = new HashMap<String, VirtualColumn>();
- for (VirtualColumn virtualColumn : values()) {
- map.put(virtualColumn.name, virtualColumn);
- }
- return map;
- }
-
private final String name;
private final TypeInfo typeInfo;
private final boolean isHidden;
http://git-wip-us.apache.org/repos/asf/hive/blob/aa5e9bfa/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index 0913f40..8183194 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -75,7 +75,6 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping;
import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator;
import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator;
import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil;
import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.HiveVectorAdaptorUsageMode;
import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.InConstantType;
import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion;
@@ -230,7 +229,6 @@ import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hive.common.util.AnnotationUtils;
import org.apache.hadoop.util.ReflectionUtils;
-import com.google.common.collect.ImmutableSet;
import com.google.common.base.Preconditions;
public class Vectorizer implements PhysicalPlanResolver {
@@ -276,10 +274,6 @@ public class Vectorizer implements PhysicalPlanResolver {
private Set<String> supportedAggregationUdfs = new HashSet<String>();
- // The set of virtual columns that vectorized readers *MAY* support.
- public static final ImmutableSet<VirtualColumn> vectorizableVirtualColumns =
- ImmutableSet.of(VirtualColumn.ROWID);
-
private HiveConf hiveConf;
private boolean useVectorizedInputFileFormat;
@@ -289,7 +283,6 @@ public class Vectorizer implements PhysicalPlanResolver {
private boolean isPtfVectorizationEnabled;
private boolean isVectorizationComplexTypesEnabled;
private boolean isVectorizationGroupByComplexTypesEnabled;
- private boolean isVectorizedRowIdentifierEnabled;
private boolean isSchemaEvolution;
@@ -323,9 +316,6 @@ public class Vectorizer implements PhysicalPlanResolver {
private long vectorizedVertexNum = -1;
- private Set<VirtualColumn> availableVectorizedVirtualColumnSet = null;
- private Set<VirtualColumn> neededVirtualColumnSet = null;
-
public Vectorizer() {
/*
@@ -463,8 +453,6 @@ public class Vectorizer implements PhysicalPlanResolver {
List<Integer> dataColumnNums;
int partitionColumnCount;
- List<VirtualColumn> availableVirtualColumnList;
- List<VirtualColumn> neededVirtualColumnList;
boolean useVectorizedInputFileFormat;
boolean groupByVectorOutput;
@@ -500,12 +488,6 @@ public class Vectorizer implements PhysicalPlanResolver {
public void setPartitionColumnCount(int partitionColumnCount) {
this.partitionColumnCount = partitionColumnCount;
}
- public void setAvailableVirtualColumnList(List<VirtualColumn> availableVirtualColumnList) {
- this.availableVirtualColumnList = availableVirtualColumnList;
- }
- public void setNeededVirtualColumnList(List<VirtualColumn> neededVirtualColumnList) {
- this.neededVirtualColumnList = neededVirtualColumnList;
- }
public void setScratchTypeNameArray(String[] scratchTypeNameArray) {
this.scratchTypeNameArray = scratchTypeNameArray;
}
@@ -540,16 +522,6 @@ public class Vectorizer implements PhysicalPlanResolver {
public void transferToBaseWork(BaseWork baseWork) {
- final int virtualColumnCount;
- VirtualColumn[] neededVirtualColumns;
- if (neededVirtualColumnList != null && neededVirtualColumnList.size() > 0) {
- virtualColumnCount = neededVirtualColumnList.size();
- neededVirtualColumns = neededVirtualColumnList.toArray(new VirtualColumn[0]);
- } else {
- virtualColumnCount = 0;
- neededVirtualColumns = new VirtualColumn[0];
- }
-
String[] allColumnNameArray = allColumnNames.toArray(new String[0]);
TypeInfo[] allTypeInfoArray = allTypeInfos.toArray(new TypeInfo[0]);
int[] dataColumnNumsArray;
@@ -565,7 +537,6 @@ public class Vectorizer implements PhysicalPlanResolver {
allTypeInfoArray,
dataColumnNumsArray,
partitionColumnCount,
- neededVirtualColumns,
scratchTypeNameArray);
baseWork.setVectorizedRowBatchCtx(vectorizedRowBatchCtx);
@@ -716,41 +687,20 @@ public class Vectorizer implements PhysicalPlanResolver {
}
private void getTableScanOperatorSchemaInfo(TableScanOperator tableScanOperator,
- List<String> logicalColumnNameList, List<TypeInfo> logicalTypeInfoList,
- List<VirtualColumn> availableVirtualColumnList) {
+ List<String> logicalColumnNameList, List<TypeInfo> logicalTypeInfoList) {
- // Add all columns to make a vectorization context for
+ // Add all non-virtual columns to make a vectorization context for
// the TableScan operator.
RowSchema rowSchema = tableScanOperator.getSchema();
for (ColumnInfo c : rowSchema.getSignature()) {
+ // Validation will later exclude vectorization of virtual columns usage (HIVE-5560).
+ if (!isVirtualColumn(c)) {
+ String columnName = c.getInternalName();
+ String typeName = c.getTypeName();
+ TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName);
- // Validation will later exclude vectorization of virtual columns usage if necessary.
- String columnName = c.getInternalName();
-
- // Turns out partition columns get marked as virtual in ColumnInfo, so we need to
- // check the VirtualColumn directly.
- VirtualColumn virtualColumn = VirtualColumn.VIRTUAL_COLUMN_NAME_MAP.get(columnName);
- if (virtualColumn == null) {
logicalColumnNameList.add(columnName);
- logicalTypeInfoList.add(TypeInfoUtils.getTypeInfoFromTypeString(c.getTypeName()));
- } else {
-
- // The planner gives us a subset virtual columns available for this table scan.
- // AND
- // We only support some virtual columns in vectorization.
- //
- // So, create the intersection. Note these are available vectorizable virtual columns.
- // Later we remember which virtual columns were *actually used* in the query so
- // just those will be included in the Map VectorizedRowBatchCtx that has the
- // information for creating the Map VectorizedRowBatch.
- //
- if (!vectorizableVirtualColumns.contains(virtualColumn)) {
- continue;
- }
- if (virtualColumn == VirtualColumn.ROWID && !isVectorizedRowIdentifierEnabled) {
- continue;
- }
- availableVirtualColumnList.add(virtualColumn);
+ logicalTypeInfoList.add(typeInfo);
}
}
}
@@ -943,19 +893,14 @@ public class Vectorizer implements PhysicalPlanResolver {
boolean isAcidTable = tableScanOperator.getConf().isAcidTable();
// These names/types are the data columns plus partition columns.
- final List<String> dataAndPartColumnNameList = new ArrayList<String>();
- final List<TypeInfo> dataAndPartTypeInfoList = new ArrayList<TypeInfo>();
-
- final List<VirtualColumn> availableVirtualColumnList = new ArrayList<VirtualColumn>();
+ final List<String> allColumnNameList = new ArrayList<String>();
+ final List<TypeInfo> allTypeInfoList = new ArrayList<TypeInfo>();
- getTableScanOperatorSchemaInfo(
- tableScanOperator,
- dataAndPartColumnNameList, dataAndPartTypeInfoList,
- availableVirtualColumnList);
+ getTableScanOperatorSchemaInfo(tableScanOperator, allColumnNameList, allTypeInfoList);
final List<Integer> dataColumnNums = new ArrayList<Integer>();
- final int dataAndPartColumnCount = dataAndPartColumnNameList.size();
+ final int allColumnCount = allColumnNameList.size();
/*
* Validate input formats of all the partitions can be vectorized.
@@ -1011,17 +956,17 @@ public class Vectorizer implements PhysicalPlanResolver {
LinkedHashMap<String, String> partSpec = partDesc.getPartSpec();
if (partSpec != null && partSpec.size() > 0) {
partitionColumnCount = partSpec.size();
- dataColumnCount = dataAndPartColumnCount - partitionColumnCount;
+ dataColumnCount = allColumnCount - partitionColumnCount;
} else {
partitionColumnCount = 0;
- dataColumnCount = dataAndPartColumnCount;
+ dataColumnCount = allColumnCount;
}
- determineDataColumnNums(tableScanOperator, dataAndPartColumnNameList, dataColumnCount,
+ determineDataColumnNums(tableScanOperator, allColumnNameList, dataColumnCount,
dataColumnNums);
- tableDataColumnList = dataAndPartColumnNameList.subList(0, dataColumnCount);
- tableDataTypeInfoList = dataAndPartTypeInfoList.subList(0, dataColumnCount);
+ tableDataColumnList = allColumnNameList.subList(0, dataColumnCount);
+ tableDataTypeInfoList = allTypeInfoList.subList(0, dataColumnCount);
isFirst = false;
}
@@ -1093,14 +1038,10 @@ public class Vectorizer implements PhysicalPlanResolver {
vectorPartDesc.setDataTypeInfos(nextDataTypeInfoList);
}
- // For now, we don't know which virtual columns are going to be included. We'll add them
- // later...
- vectorTaskColumnInfo.setAllColumnNames(dataAndPartColumnNameList);
- vectorTaskColumnInfo.setAllTypeInfos(dataAndPartTypeInfoList);
-
+ vectorTaskColumnInfo.setAllColumnNames(allColumnNameList);
+ vectorTaskColumnInfo.setAllTypeInfos(allTypeInfoList);
vectorTaskColumnInfo.setDataColumnNums(dataColumnNums);
vectorTaskColumnInfo.setPartitionColumnCount(partitionColumnCount);
- vectorTaskColumnInfo.setAvailableVirtualColumnList(availableVirtualColumnList);
vectorTaskColumnInfo.setUseVectorizedInputFileFormat(useVectorizedInputFileFormat);
// Always set these so EXPLAIN can see.
@@ -1141,14 +1082,6 @@ public class Vectorizer implements PhysicalPlanResolver {
return false;
}
- // Set global member indicating which virtual columns are possible to be used by
- // the Map vertex.
- availableVectorizedVirtualColumnSet = new HashSet<VirtualColumn>();
- availableVectorizedVirtualColumnSet.addAll(vectorTaskColumnInfo.availableVirtualColumnList);
-
- // And, use set to remember which virtual columns were actually referenced.
- neededVirtualColumnSet = new HashSet<VirtualColumn>();
-
// Now we are enabled and any issues found from here on out are considered
// not vectorized issues.
mapWork.setVectorizationEnabled(true);
@@ -1171,21 +1104,6 @@ public class Vectorizer implements PhysicalPlanResolver {
}
}
}
-
- List<VirtualColumn> neededVirtualColumnList = new ArrayList<VirtualColumn>();
- if (!neededVirtualColumnSet.isEmpty()) {
-
- // Create needed in same order.
- for (VirtualColumn virtualColumn : vectorTaskColumnInfo.availableVirtualColumnList) {
- if (neededVirtualColumnSet.contains(virtualColumn)) {
- neededVirtualColumnList.add(virtualColumn);
- vectorTaskColumnInfo.allColumnNames.add(virtualColumn.getName());
- vectorTaskColumnInfo.allTypeInfos.add(virtualColumn.getTypeInfo());
- }
- }
- }
-
- vectorTaskColumnInfo.setNeededVirtualColumnList(neededVirtualColumnList);
vectorTaskColumnInfo.setNonVectorizedOps(vnp.getNonVectorizedOps());
return true;
}
@@ -1819,10 +1737,6 @@ public class Vectorizer implements PhysicalPlanResolver {
HiveConf.getBoolVar(hiveConf,
HiveConf.ConfVars.HIVE_VECTORIZATION_GROUPBY_COMPLEX_TYPES_ENABLED);
- isVectorizedRowIdentifierEnabled =
- HiveConf.getBoolVar(hiveConf,
- HiveConf.ConfVars.HIVE_VECTORIZATION_ROW_IDENTIFIER_ENABLED);
-
isSchemaEvolution =
HiveConf.getBoolVar(hiveConf,
HiveConf.ConfVars.HIVE_SCHEMA_EVOLUTION);
@@ -2414,24 +2328,10 @@ public class Vectorizer implements PhysicalPlanResolver {
VectorExpressionDescriptor.Mode mode, boolean allowComplex) {
if (desc instanceof ExprNodeColumnDesc) {
ExprNodeColumnDesc c = (ExprNodeColumnDesc) desc;
- String columnName = c.getColumn();
-
- if (availableVectorizedVirtualColumnSet != null) {
-
- // For Map, check for virtual columns.
- VirtualColumn virtualColumn = VirtualColumn.VIRTUAL_COLUMN_NAME_MAP.get(columnName);
- if (virtualColumn != null) {
-
- // We support some virtual columns in vectorization for this table scan.
-
- if (!availableVectorizedVirtualColumnSet.contains(virtualColumn)) {
- setExpressionIssue(expressionTitle, "Virtual column " + columnName + " is not supported");
- return false;
- }
-
- // Remember we used this one in the query.
- neededVirtualColumnSet.add(virtualColumn);
- }
+ // Currently, we do not support vectorized virtual columns (see HIVE-5570).
+ if (VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(c.getColumn())) {
+ setExpressionIssue(expressionTitle, "Virtual columns not supported (" + c.getColumn() + ")");
+ return false;
}
}
String typeName = desc.getTypeInfo().getTypeName();
@@ -4280,20 +4180,28 @@ public class Vectorizer implements PhysicalPlanResolver {
return vectorOp;
}
+ private boolean isVirtualColumn(ColumnInfo column) {
+
+ // Not using method column.getIsVirtualCol() because partitioning columns are also
+ // treated as virtual columns in ColumnInfo.
+ if (VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(column.getInternalName())) {
+ return true;
+ }
+ return false;
+ }
+
public void debugDisplayAllMaps(BaseWork work) {
VectorizedRowBatchCtx vectorizedRowBatchCtx = work.getVectorizedRowBatchCtx();
String[] allColumnNames = vectorizedRowBatchCtx.getRowColumnNames();
- TypeInfo[] columnTypeInfos = vectorizedRowBatchCtx.getRowColumnTypeInfos();
+ Object columnTypeInfos = vectorizedRowBatchCtx.getRowColumnTypeInfos();
int partitionColumnCount = vectorizedRowBatchCtx.getPartitionColumnCount();
- int virtualColumnCount = vectorizedRowBatchCtx.getVirtualColumnCount();
String[] scratchColumnTypeNames =vectorizedRowBatchCtx.getScratchColumnTypeNames();
- LOG.debug("debugDisplayAllMaps rowColumnNames " + Arrays.toString(allColumnNames));
- LOG.debug("debugDisplayAllMaps rowColumnTypeInfos " + Arrays.toString(columnTypeInfos));
+ LOG.debug("debugDisplayAllMaps allColumnNames " + Arrays.toString(allColumnNames));
+ LOG.debug("debugDisplayAllMaps columnTypeInfos " + Arrays.deepToString((Object[]) columnTypeInfos));
LOG.debug("debugDisplayAllMaps partitionColumnCount " + partitionColumnCount);
- LOG.debug("debugDisplayAllMaps virtualColumnCount " + virtualColumnCount);
LOG.debug("debugDisplayAllMaps scratchColumnTypeNames " + Arrays.toString(scratchColumnTypeNames));
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/aa5e9bfa/ql/src/test/queries/clientpositive/vector_row__id.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_row__id.q b/ql/src/test/queries/clientpositive/vector_row__id.q
deleted file mode 100644
index 11eda90..0000000
--- a/ql/src/test/queries/clientpositive/vector_row__id.q
+++ /dev/null
@@ -1,56 +0,0 @@
-set hive.support.concurrency=true;
-set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
-set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
-set hive.mapred.mode=nonstrict;
-set hive.explain.user=false;
-SET hive.vectorized.execution.enabled=true;
-set hive.fetch.task.conversion=none;
-
--- SORT_QUERY_RESULTS
-
-drop table if exists hello_acid;
-create table hello_acid (key int, value int)
-partitioned by (load_date date)
-clustered by(key) into 3 buckets
-stored as orc tblproperties ('transactional'='true');
-
-insert into hello_acid partition (load_date='2016-03-01') values (1, 1);
-insert into hello_acid partition (load_date='2016-03-02') values (2, 2);
-insert into hello_acid partition (load_date='2016-03-03') values (3, 3);
-
-set hive.vectorized.row.identifier.enabled=false;
-
-explain vectorization detail
-select row__id, key, value from hello_acid order by key;
-
-select row__id, key, value from hello_acid order by key;
-
-explain vectorization detail
-select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid;
-
-select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid;
-
-explain vectorization detail
-select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3;
-
-select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3;
-
-
-
-set hive.vectorized.row.identifier.enabled=true;
-
-explain vectorization detail
-select row__id, key, value from hello_acid order by key;
-
-select row__id, key, value from hello_acid order by key;
-
-explain vectorization detail
-select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid;
-
-select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid;
-
-explain vectorization detail
-select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3;
-
-select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3;
-
http://git-wip-us.apache.org/repos/asf/hive/blob/aa5e9bfa/ql/src/test/results/clientpositive/llap/vector_row__id.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_row__id.q.out b/ql/src/test/results/clientpositive/llap/vector_row__id.q.out
deleted file mode 100644
index 850e3a4..0000000
--- a/ql/src/test/results/clientpositive/llap/vector_row__id.q.out
+++ /dev/null
@@ -1,605 +0,0 @@
-PREHOOK: query: drop table if exists hello_acid
-PREHOOK: type: DROPTABLE
-POSTHOOK: query: drop table if exists hello_acid
-POSTHOOK: type: DROPTABLE
-PREHOOK: query: create table hello_acid (key int, value int)
-partitioned by (load_date date)
-clustered by(key) into 3 buckets
-stored as orc tblproperties ('transactional'='true')
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@hello_acid
-POSTHOOK: query: create table hello_acid (key int, value int)
-partitioned by (load_date date)
-clustered by(key) into 3 buckets
-stored as orc tblproperties ('transactional'='true')
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@hello_acid
-PREHOOK: query: insert into hello_acid partition (load_date='2016-03-01') values (1, 1)
-PREHOOK: type: QUERY
-PREHOOK: Output: default@hello_acid@load_date=2016-03-01
-POSTHOOK: query: insert into hello_acid partition (load_date='2016-03-01') values (1, 1)
-POSTHOOK: type: QUERY
-POSTHOOK: Output: default@hello_acid@load_date=2016-03-01
-POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-01).key EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
-POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-01).value EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
-PREHOOK: query: insert into hello_acid partition (load_date='2016-03-02') values (2, 2)
-PREHOOK: type: QUERY
-PREHOOK: Output: default@hello_acid@load_date=2016-03-02
-POSTHOOK: query: insert into hello_acid partition (load_date='2016-03-02') values (2, 2)
-POSTHOOK: type: QUERY
-POSTHOOK: Output: default@hello_acid@load_date=2016-03-02
-POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-02).key EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
-POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-02).value EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
-PREHOOK: query: insert into hello_acid partition (load_date='2016-03-03') values (3, 3)
-PREHOOK: type: QUERY
-PREHOOK: Output: default@hello_acid@load_date=2016-03-03
-POSTHOOK: query: insert into hello_acid partition (load_date='2016-03-03') values (3, 3)
-POSTHOOK: type: QUERY
-POSTHOOK: Output: default@hello_acid@load_date=2016-03-03
-POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-03).key EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
-POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-03).value EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
-PREHOOK: query: explain vectorization detail
-select row__id, key, value from hello_acid order by key
-PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization detail
-select row__id, key, value from hello_acid order by key
-POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Tez
-#### A masked pattern was here ####
- Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: hello_acid
- Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: ROW__ID (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), key (type: int), value (type: int)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col1 (type: int)
- sort order: +
- Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), _col2 (type: int)
- Execution mode: llap
- LLAP IO: may be used (ACID table)
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- notVectorizedReason: Select expression for SELECT operator: Virtual column ROW__ID is not supported
- vectorized: false
- Reducer 2
- Execution mode: vectorized, llap
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- reduceColumnNullOrder: a
- reduceColumnSortOrder: +
- groupByVectorOutput: true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 3
- dataColumns: KEY.reducesinkkey0:int, VALUE._col0:struct<transactionid:bigint,bucketid:int,rowid:bigint>, VALUE._col1:int
- partitionColumnCount: 0
- Reduce Operator Tree:
- Select Operator
- expressions: VALUE._col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), KEY.reducesinkkey0 (type: int), VALUE._col1 (type: int)
- outputColumnNames: _col0, _col1, _col2
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumns: [1, 0, 2]
- Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
- Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: select row__id, key, value from hello_acid order by key
-PREHOOK: type: QUERY
-PREHOOK: Input: default@hello_acid
-PREHOOK: Input: default@hello_acid@load_date=2016-03-01
-PREHOOK: Input: default@hello_acid@load_date=2016-03-02
-PREHOOK: Input: default@hello_acid@load_date=2016-03-03
-#### A masked pattern was here ####
-POSTHOOK: query: select row__id, key, value from hello_acid order by key
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@hello_acid
-POSTHOOK: Input: default@hello_acid@load_date=2016-03-01
-POSTHOOK: Input: default@hello_acid@load_date=2016-03-02
-POSTHOOK: Input: default@hello_acid@load_date=2016-03-03
-#### A masked pattern was here ####
-{"transactionid":3,"bucketid":536870912,"rowid":0} 1 1
-{"transactionid":4,"bucketid":536870912,"rowid":0} 2 2
-{"transactionid":5,"bucketid":536870912,"rowid":0} 3 3
-PREHOOK: query: explain vectorization detail
-select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid
-PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization detail
-select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid
-POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Tez
-#### A masked pattern was here ####
- Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: hello_acid
- Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: ROW__ID.transactionid (type: bigint)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: bigint)
- sort order: +
- Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE
- Execution mode: llap
- LLAP IO: may be used (ACID table)
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- notVectorizedReason: Select expression for SELECT operator: Virtual column ROW__ID is not supported
- vectorized: false
- Reducer 2
- Execution mode: vectorized, llap
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- reduceColumnNullOrder: a
- reduceColumnSortOrder: +
- groupByVectorOutput: true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 1
- dataColumns: KEY.reducesinkkey0:bigint
- partitionColumnCount: 0
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: bigint)
- outputColumnNames: _col0
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumns: [0]
- Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
- Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid
-PREHOOK: type: QUERY
-PREHOOK: Input: default@hello_acid
-PREHOOK: Input: default@hello_acid@load_date=2016-03-01
-PREHOOK: Input: default@hello_acid@load_date=2016-03-02
-PREHOOK: Input: default@hello_acid@load_date=2016-03-03
-#### A masked pattern was here ####
-POSTHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@hello_acid
-POSTHOOK: Input: default@hello_acid@load_date=2016-03-01
-POSTHOOK: Input: default@hello_acid@load_date=2016-03-02
-POSTHOOK: Input: default@hello_acid@load_date=2016-03-03
-#### A masked pattern was here ####
-3
-4
-5
-PREHOOK: query: explain vectorization detail
-select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3
-PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization detail
-select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3
-POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Tez
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: hello_acid
- Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (ROW__ID.transactionid = 3) (type: boolean)
- Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: ROW__ID.transactionid (type: bigint)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Execution mode: llap
- LLAP IO: may be used (ACID table)
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- notVectorizedReason: Predicate expression for FILTER operator: Virtual column ROW__ID is not supported
- vectorized: false
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3
-PREHOOK: type: QUERY
-PREHOOK: Input: default@hello_acid
-PREHOOK: Input: default@hello_acid@load_date=2016-03-01
-PREHOOK: Input: default@hello_acid@load_date=2016-03-02
-PREHOOK: Input: default@hello_acid@load_date=2016-03-03
-#### A masked pattern was here ####
-POSTHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@hello_acid
-POSTHOOK: Input: default@hello_acid@load_date=2016-03-01
-POSTHOOK: Input: default@hello_acid@load_date=2016-03-02
-POSTHOOK: Input: default@hello_acid@load_date=2016-03-03
-#### A masked pattern was here ####
-3
-PREHOOK: query: explain vectorization detail
-select row__id, key, value from hello_acid order by key
-PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization detail
-select row__id, key, value from hello_acid order by key
-POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Tez
-#### A masked pattern was here ####
- Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: hello_acid
- Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
- projectedOutputColumns: [0, 1, 2, 3]
- Select Operator
- expressions: ROW__ID (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), key (type: int), value (type: int)
- outputColumnNames: _col0, _col1, _col2
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumns: [3, 0, 1]
- Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col1 (type: int)
- sort order: +
- Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
- keyColumns: [0]
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumns: [3, 1]
- Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), _col2 (type: int)
- Execution mode: vectorized, llap
- LLAP IO: may be used (ACID table)
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- groupByVectorOutput: true
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 2
- includeColumns: [0, 1]
- dataColumns: key:int, value:int
- partitionColumnCount: 1
- partitionColumns: load_date:date
- Reducer 2
- Execution mode: vectorized, llap
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- reduceColumnNullOrder: a
- reduceColumnSortOrder: +
- groupByVectorOutput: true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 3
- dataColumns: KEY.reducesinkkey0:int, VALUE._col0:struct<transactionid:bigint,bucketid:int,rowid:bigint>, VALUE._col1:int
- partitionColumnCount: 0
- Reduce Operator Tree:
- Select Operator
- expressions: VALUE._col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), KEY.reducesinkkey0 (type: int), VALUE._col1 (type: int)
- outputColumnNames: _col0, _col1, _col2
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumns: [1, 0, 2]
- Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
- Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: select row__id, key, value from hello_acid order by key
-PREHOOK: type: QUERY
-PREHOOK: Input: default@hello_acid
-PREHOOK: Input: default@hello_acid@load_date=2016-03-01
-PREHOOK: Input: default@hello_acid@load_date=2016-03-02
-PREHOOK: Input: default@hello_acid@load_date=2016-03-03
-#### A masked pattern was here ####
-POSTHOOK: query: select row__id, key, value from hello_acid order by key
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@hello_acid
-POSTHOOK: Input: default@hello_acid@load_date=2016-03-01
-POSTHOOK: Input: default@hello_acid@load_date=2016-03-02
-POSTHOOK: Input: default@hello_acid@load_date=2016-03-03
-#### A masked pattern was here ####
-NULL 1 1
-NULL 2 2
-NULL 3 3
-PREHOOK: query: explain vectorization detail
-select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid
-PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization detail
-select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid
-POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Tez
-#### A masked pattern was here ####
- Edges:
- Reducer 2 <- Map 1 (SIMPLE_EDGE)
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: hello_acid
- Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: ROW__ID.transactionid (type: bigint)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: bigint)
- sort order: +
- Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE
- Execution mode: llap
- LLAP IO: may be used (ACID table)
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- notVectorizedReason: Select expression for SELECT operator: Could not vectorize expression (mode = PROJECTION): Column[ROW__ID].transactionid
- vectorized: false
- Reducer 2
- Execution mode: vectorized, llap
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- reduceColumnNullOrder: a
- reduceColumnSortOrder: +
- groupByVectorOutput: true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 1
- dataColumns: KEY.reducesinkkey0:bigint
- partitionColumnCount: 0
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: bigint)
- outputColumnNames: _col0
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumns: [0]
- Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
- Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid
-PREHOOK: type: QUERY
-PREHOOK: Input: default@hello_acid
-PREHOOK: Input: default@hello_acid@load_date=2016-03-01
-PREHOOK: Input: default@hello_acid@load_date=2016-03-02
-PREHOOK: Input: default@hello_acid@load_date=2016-03-03
-#### A masked pattern was here ####
-POSTHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@hello_acid
-POSTHOOK: Input: default@hello_acid@load_date=2016-03-01
-POSTHOOK: Input: default@hello_acid@load_date=2016-03-02
-POSTHOOK: Input: default@hello_acid@load_date=2016-03-03
-#### A masked pattern was here ####
-3
-4
-5
-PREHOOK: query: explain vectorization detail
-select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3
-PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization detail
-select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3
-POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Tez
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: hello_acid
- Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (ROW__ID.transactionid = 3) (type: boolean)
- Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: ROW__ID.transactionid (type: bigint)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Execution mode: llap
- LLAP IO: may be used (ACID table)
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- notVectorizedReason: Predicate expression for FILTER operator: Cannot handle expression type: ExprNodeFieldDesc
- vectorized: false
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3
-PREHOOK: type: QUERY
-PREHOOK: Input: default@hello_acid
-PREHOOK: Input: default@hello_acid@load_date=2016-03-01
-PREHOOK: Input: default@hello_acid@load_date=2016-03-02
-PREHOOK: Input: default@hello_acid@load_date=2016-03-03
-#### A masked pattern was here ####
-POSTHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@hello_acid
-POSTHOOK: Input: default@hello_acid@load_date=2016-03-01
-POSTHOOK: Input: default@hello_acid@load_date=2016-03-02
-POSTHOOK: Input: default@hello_acid@load_date=2016-03-03
-#### A masked pattern was here ####
-3
http://git-wip-us.apache.org/repos/asf/hive/blob/aa5e9bfa/ql/src/test/results/clientpositive/vector_row__id.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_row__id.q.out b/ql/src/test/results/clientpositive/vector_row__id.q.out
deleted file mode 100644
index d48902b..0000000
--- a/ql/src/test/results/clientpositive/vector_row__id.q.out
+++ /dev/null
@@ -1,491 +0,0 @@
-PREHOOK: query: drop table if exists hello_acid
-PREHOOK: type: DROPTABLE
-POSTHOOK: query: drop table if exists hello_acid
-POSTHOOK: type: DROPTABLE
-PREHOOK: query: create table hello_acid (key int, value int)
-partitioned by (load_date date)
-clustered by(key) into 3 buckets
-stored as orc tblproperties ('transactional'='true')
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@hello_acid
-POSTHOOK: query: create table hello_acid (key int, value int)
-partitioned by (load_date date)
-clustered by(key) into 3 buckets
-stored as orc tblproperties ('transactional'='true')
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@hello_acid
-PREHOOK: query: insert into hello_acid partition (load_date='2016-03-01') values (1, 1)
-PREHOOK: type: QUERY
-PREHOOK: Output: default@hello_acid@load_date=2016-03-01
-POSTHOOK: query: insert into hello_acid partition (load_date='2016-03-01') values (1, 1)
-POSTHOOK: type: QUERY
-POSTHOOK: Output: default@hello_acid@load_date=2016-03-01
-POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-01).key EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
-POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-01).value EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
-PREHOOK: query: insert into hello_acid partition (load_date='2016-03-02') values (2, 2)
-PREHOOK: type: QUERY
-PREHOOK: Output: default@hello_acid@load_date=2016-03-02
-POSTHOOK: query: insert into hello_acid partition (load_date='2016-03-02') values (2, 2)
-POSTHOOK: type: QUERY
-POSTHOOK: Output: default@hello_acid@load_date=2016-03-02
-POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-02).key EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
-POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-02).value EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
-PREHOOK: query: insert into hello_acid partition (load_date='2016-03-03') values (3, 3)
-PREHOOK: type: QUERY
-PREHOOK: Output: default@hello_acid@load_date=2016-03-03
-POSTHOOK: query: insert into hello_acid partition (load_date='2016-03-03') values (3, 3)
-POSTHOOK: type: QUERY
-POSTHOOK: Output: default@hello_acid@load_date=2016-03-03
-POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-03).key EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
-POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-03).value EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
-PREHOOK: query: explain vectorization detail
-select row__id, key, value from hello_acid order by key
-PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization detail
-select row__id, key, value from hello_acid order by key
-POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: hello_acid
- Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: ROW__ID (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), key (type: int), value (type: int)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col1 (type: int)
- sort order: +
- Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), _col2 (type: int)
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- notVectorizedReason: Select expression for SELECT operator: Virtual column ROW__ID is not supported
- vectorized: false
- Reduce Vectorization:
- enabled: false
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
- enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Reduce Operator Tree:
- Select Operator
- expressions: VALUE._col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), KEY.reducesinkkey0 (type: int), VALUE._col1 (type: int)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: select row__id, key, value from hello_acid order by key
-PREHOOK: type: QUERY
-PREHOOK: Input: default@hello_acid
-PREHOOK: Input: default@hello_acid@load_date=2016-03-01
-PREHOOK: Input: default@hello_acid@load_date=2016-03-02
-PREHOOK: Input: default@hello_acid@load_date=2016-03-03
-#### A masked pattern was here ####
-POSTHOOK: query: select row__id, key, value from hello_acid order by key
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@hello_acid
-POSTHOOK: Input: default@hello_acid@load_date=2016-03-01
-POSTHOOK: Input: default@hello_acid@load_date=2016-03-02
-POSTHOOK: Input: default@hello_acid@load_date=2016-03-03
-#### A masked pattern was here ####
-{"transactionid":3,"bucketid":536936448,"rowid":0} 1 1
-{"transactionid":4,"bucketid":537001984,"rowid":0} 2 2
-{"transactionid":5,"bucketid":536870912,"rowid":0} 3 3
-PREHOOK: query: explain vectorization detail
-select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid
-PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization detail
-select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid
-POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: hello_acid
- Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE
- Select Operator
- expressions: ROW__ID.transactionid (type: bigint)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: bigint)
- sort order: +
- Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- notVectorizedReason: Select expression for SELECT operator: Virtual column ROW__ID is not supported
- vectorized: false
- Reduce Vectorization:
- enabled: false
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
- enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: bigint)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 3054 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid
-PREHOOK: type: QUERY
-PREHOOK: Input: default@hello_acid
-PREHOOK: Input: default@hello_acid@load_date=2016-03-01
-PREHOOK: Input: default@hello_acid@load_date=2016-03-02
-PREHOOK: Input: default@hello_acid@load_date=2016-03-03
-#### A masked pattern was here ####
-POSTHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@hello_acid
-POSTHOOK: Input: default@hello_acid@load_date=2016-03-01
-POSTHOOK: Input: default@hello_acid@load_date=2016-03-02
-POSTHOOK: Input: default@hello_acid@load_date=2016-03-03
-#### A masked pattern was here ####
-3
-4
-5
-PREHOOK: query: explain vectorization detail
-select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3
-PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization detail
-select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3
-POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: hello_acid
- Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE
- Filter Operator
- predicate: (ROW__ID.transactionid = 3) (type: boolean)
- Statistics: Num rows: 1 Data size: 3054 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: ROW__ID.transactionid (type: bigint)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 3054 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 3054 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- notVectorizedReason: Predicate expression for FILTER operator: Virtual column ROW__ID is not supported
- vectorized: false
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3
-PREHOOK: type: QUERY
-PREHOOK: Input: default@hello_acid
-PREHOOK: Input: default@hello_acid@load_date=2016-03-01
-PREHOOK: Input: default@hello_acid@load_date=2016-03-02
-PREHOOK: Input: default@hello_acid@load_date=2016-03-03
-#### A masked pattern was here ####
-POSTHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@hello_acid
-POSTHOOK: Input: default@hello_acid@load_date=2016-03-01
-POSTHOOK: Input: default@hello_acid@load_date=2016-03-02
-POSTHOOK: Input: default@hello_acid@load_date=2016-03-03
-#### A masked pattern was here ####
-3
-PREHOOK: query: explain vectorization detail
-select row__id, key, value from hello_acid order by key
-PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization detail
-select row__id, key, value from hello_acid order by key
-POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: hello_acid
- Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
- projectedOutputColumns: [0, 1, 2, 3]
- Select Operator
- expressions: ROW__ID (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), key (type: int), value (type: int)
- outputColumnNames: _col0, _col1, _col2
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumns: [3, 0, 1]
- Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col1 (type: int)
- sort order: +
- Reduce Sink Vectorization:
- className: VectorReduceSinkOperator
- native: false
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), _col2 (type: int)
- Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- groupByVectorOutput: true
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 2
- includeColumns: [0, 1]
- dataColumns: key:int, value:int
- partitionColumnCount: 1
- partitionColumns: load_date:date
- Reduce Vectorization:
- enabled: false
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
- enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Reduce Operator Tree:
- Select Operator
- expressions: VALUE._col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), KEY.reducesinkkey0 (type: int), VALUE._col1 (type: int)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: select row__id, key, value from hello_acid order by key
-PREHOOK: type: QUERY
-PREHOOK: Input: default@hello_acid
-PREHOOK: Input: default@hello_acid@load_date=2016-03-01
-PREHOOK: Input: default@hello_acid@load_date=2016-03-02
-PREHOOK: Input: default@hello_acid@load_date=2016-03-03
-#### A masked pattern was here ####
-POSTHOOK: query: select row__id, key, value from hello_acid order by key
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@hello_acid
-POSTHOOK: Input: default@hello_acid@load_date=2016-03-01
-POSTHOOK: Input: default@hello_acid@load_date=2016-03-02
-POSTHOOK: Input: default@hello_acid@load_date=2016-03-03
-#### A masked pattern was here ####
-NULL 1 1
-NULL 2 2
-NULL 3 3
-PREHOOK: query: explain vectorization detail
-select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid
-PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization detail
-select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid
-POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: hello_acid
- Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE
- Select Operator
- expressions: ROW__ID.transactionid (type: bigint)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: bigint)
- sort order: +
- Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- notVectorizedReason: Select expression for SELECT operator: Could not vectorize expression (mode = PROJECTION): Column[ROW__ID].transactionid
- vectorized: false
- Reduce Vectorization:
- enabled: false
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
- enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: bigint)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 3054 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid
-PREHOOK: type: QUERY
-PREHOOK: Input: default@hello_acid
-PREHOOK: Input: default@hello_acid@load_date=2016-03-01
-PREHOOK: Input: default@hello_acid@load_date=2016-03-02
-PREHOOK: Input: default@hello_acid@load_date=2016-03-03
-#### A masked pattern was here ####
-POSTHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@hello_acid
-POSTHOOK: Input: default@hello_acid@load_date=2016-03-01
-POSTHOOK: Input: default@hello_acid@load_date=2016-03-02
-POSTHOOK: Input: default@hello_acid@load_date=2016-03-03
-#### A masked pattern was here ####
-3
-4
-5
-PREHOOK: query: explain vectorization detail
-select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3
-PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization detail
-select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3
-POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
-
-STAGE PLANS:
- Stage: Stage-1
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: hello_acid
- Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE
- Filter Operator
- predicate: (ROW__ID.transactionid = 3) (type: boolean)
- Statistics: Num rows: 1 Data size: 3054 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: ROW__ID.transactionid (type: bigint)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 3054 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 3054 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- notVectorizedReason: Predicate expression for FILTER operator: Cannot handle expression type: ExprNodeFieldDesc
- vectorized: false
-
- Stage: Stage-0
- Fetch Operator
- limit: -1
- Processor Tree:
- ListSink
-
-PREHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3
-PREHOOK: type: QUERY
-PREHOOK: Input: default@hello_acid
-PREHOOK: Input: default@hello_acid@load_date=2016-03-01
-PREHOOK: Input: default@hello_acid@load_date=2016-03-02
-PREHOOK: Input: default@hello_acid@load_date=2016-03-03
-#### A masked pattern was here ####
-POSTHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@hello_acid
-POSTHOOK: Input: default@hello_acid@load_date=2016-03-01
-POSTHOOK: Input: default@hello_acid@load_date=2016-03-02
-POSTHOOK: Input: default@hello_acid@load_date=2016-03-03
-#### A masked pattern was here ####
-3