You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2017/07/21 01:55:31 UTC
hive git commit: HIVE-17116: Vectorization: Add infrastructure for
vectorization of ROW__ID struct (Matt McCline, reviewed by Teddy Choi)
Repository: hive
Updated Branches:
refs/heads/master b9fc5fc10 -> 996fa0704
HIVE-17116: Vectorization: Add infrastructure for vectorization of ROW__ID struct (Matt McCline, reviewed by Teddy Choi)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/996fa070
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/996fa070
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/996fa070
Branch: refs/heads/master
Commit: 996fa070410b673ebd47511b33c78da4c4757723
Parents: b9fc5fc
Author: Matt McCline <mm...@hortonworks.com>
Authored: Thu Jul 20 20:55:24 2017 -0500
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Thu Jul 20 20:55:24 2017 -0500
----------------------------------------------------------------------
.../org/apache/hadoop/hive/conf/HiveConf.java | 4 +
.../test/resources/testconfiguration.properties | 1 +
.../hive/llap/io/api/impl/LlapInputFormat.java | 4 +-
.../hive/ql/exec/vector/VectorMapOperator.java | 41 +-
.../ql/exec/vector/VectorizationContext.java | 2 +
.../ql/exec/vector/VectorizedRowBatchCtx.java | 43 +-
.../hadoop/hive/ql/metadata/VirtualColumn.java | 14 +
.../hive/ql/optimizer/physical/Vectorizer.java | 164 +++--
.../queries/clientpositive/vector_row__id.q | 56 ++
.../clientpositive/llap/vector_row__id.q.out | 605 +++++++++++++++++++
.../results/clientpositive/vector_row__id.q.out | 491 +++++++++++++++
11 files changed, 1377 insertions(+), 48 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/996fa070/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index f360dfa..df45f2c 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2856,6 +2856,10 @@ public class HiveConf extends Configuration {
"of aggregations that use complex types.\n",
"For example, AVG uses a complex type (STRUCT) for partial aggregation results" +
"The default value is true."),
+ HIVE_VECTORIZATION_ROW_IDENTIFIER_ENABLED("hive.vectorized.row.identifier.enabled", false,
+ "This flag should be set to true to enable vectorization\n" +
+ "of ROW__ID.\n" +
+ "The default value is false."),
HIVE_TYPE_CHECK_ON_INSERT("hive.typecheck.on.insert", true, "This property has been extended to control "
+ "whether to check, convert, and normalize partition value to conform to its column type in "
http://git-wip-us.apache.org/repos/asf/hive/blob/996fa070/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index cffe245..f66e19b 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -356,6 +356,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\
vector_reduce2.q,\
vector_reduce3.q,\
vector_reduce_groupby_decimal.q,\
+ vector_row__id.q,\
vector_string_concat.q,\
vector_struct_in.q,\
vector_udf_character_length.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/996fa070/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java
index 22ca025..79ec4ed 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java
@@ -190,8 +190,10 @@ public class LlapInputFormat implements InputFormat<NullWritable, VectorizedRowB
}
}
}
+ // UNDONE: Virtual column support?
return new VectorizedRowBatchCtx(colNames.toArray(new String[colNames.size()]),
- colTypes.toArray(new TypeInfo[colTypes.size()]), null, partitionColumnCount, new String[0]);
+ colTypes.toArray(new TypeInfo[colTypes.size()]), null, partitionColumnCount,
+ new VirtualColumn[0], new String[0]);
}
static TableScanOperator findTsOp(MapWork mapWork) throws HiveException {
http://git-wip-us.apache.org/repos/asf/hive/blob/996fa070/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java
index ed50df2..1ac8914 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java
@@ -39,6 +39,7 @@ import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hadoop.hive.ql.io.orc.OrcSerde;
import org.apache.hadoop.hive.ql.io.orc.OrcStruct;
import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.PartitionDesc;
import org.apache.hadoop.hive.ql.plan.TableDesc;
@@ -125,6 +126,9 @@ public class VectorMapOperator extends AbstractMapOperator {
private transient int dataColumnCount;
private transient int partitionColumnCount;
private transient Object[] partitionValues;
+ private transient int virtualColumnCount;
+ private transient boolean hasRowIdentifier;
+ private transient int rowIdentifierColumnNum;
private transient boolean[] dataColumnsToIncludeTruncated;
@@ -504,6 +508,19 @@ public class VectorMapOperator extends AbstractMapOperator {
dataColumnCount = batchContext.getDataColumnCount();
partitionColumnCount = batchContext.getPartitionColumnCount();
partitionValues = new Object[partitionColumnCount];
+ virtualColumnCount = batchContext.getVirtualColumnCount();
+ rowIdentifierColumnNum = -1;
+ if (virtualColumnCount > 0) {
+ final int firstVirtualColumnNum = dataColumnCount + partitionColumnCount;
+ VirtualColumn[] neededVirtualColumns = batchContext.getNeededVirtualColumns();
+ hasRowIdentifier = (neededVirtualColumns[0] == VirtualColumn.ROWID);
+ if (hasRowIdentifier) {
+ rowIdentifierColumnNum = firstVirtualColumnNum;
+ }
+ } else {
+ hasRowIdentifier = false;
+ }
+
dataColumnNums = batchContext.getDataColumnNums();
Preconditions.checkState(dataColumnNums != null);
@@ -601,6 +618,13 @@ public class VectorMapOperator extends AbstractMapOperator {
currentVectorPartContext.partName);
}
+ private void setRowIdentiferToNull(VectorizedRowBatch batch) {
+ ColumnVector rowIdentifierColVector = batch.cols[rowIdentifierColumnNum];
+ rowIdentifierColVector.isNull[0] = true;
+ rowIdentifierColVector.noNulls = false;
+ rowIdentifierColVector.isRepeating = true;
+ }
+
/*
* Setup the context for reading from the next partition file.
*/
@@ -695,6 +719,12 @@ public class VectorMapOperator extends AbstractMapOperator {
batchContext.addPartitionColsToBatch(deserializerBatch, partitionValues);
}
+ if (hasRowIdentifier) {
+
+ // No ACID in code path -- set ROW__ID to NULL.
+ setRowIdentiferToNull(deserializerBatch);
+ }
+
/*
* Set or clear the rest of the reading variables based on {vector|row} deserialization.
*/
@@ -778,7 +808,16 @@ public class VectorMapOperator extends AbstractMapOperator {
*/
batchCounter++;
if (value != null) {
- numRows += ((VectorizedRowBatch) value).size;
+ VectorizedRowBatch batch = (VectorizedRowBatch) value;
+ numRows += batch.size;
+ if (hasRowIdentifier) {
+
+ // UNDONE: Pass ROW__ID STRUCT column through IO Context to get filled in by ACID reader
+ // UNDONE: Or, perhaps tell it to do it before calling us, ...
+ // UNDONE: For now, set column to NULL.
+
+ setRowIdentiferToNull(batch);
+ }
}
oneRootOperator.process(value, 0);
if (oneRootOperator.getDone()) {
http://git-wip-us.apache.org/repos/asf/hive/blob/996fa070/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index 9e026f0..fcebb6f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -124,6 +124,7 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.*;
import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor;
import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc;
import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.AggregationDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
@@ -221,6 +222,7 @@ public class VectorizationContext {
projectedColumns.add(i);
projectionColumnMap.put(projectionColumnNames.get(i), i);
}
+
int firstOutputColumnIndex = projectedColumns.size();
this.ocm = new OutputColumnManager(firstOutputColumnIndex);
this.firstOutputColumnIndex = firstOutputColumnIndex;
http://git-wip-us.apache.org/repos/asf/hive/blob/996fa070/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
index 3c12e04..90d1372 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
@@ -38,6 +38,7 @@ import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils;
import org.apache.hadoop.hive.ql.io.IOPrepareCache;
import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
import org.apache.hadoop.hive.ql.plan.Explain;
import org.apache.hadoop.hive.ql.plan.MapWork;
import org.apache.hadoop.hive.ql.plan.PartitionDesc;
@@ -78,6 +79,8 @@ public class VectorizedRowBatchCtx {
private int[] dataColumnNums;
private int dataColumnCount;
private int partitionColumnCount;
+ private int virtualColumnCount;
+ private VirtualColumn[] neededVirtualColumns;
private String[] scratchColumnTypeNames;
@@ -88,14 +91,17 @@ public class VectorizedRowBatchCtx {
}
public VectorizedRowBatchCtx(String[] rowColumnNames, TypeInfo[] rowColumnTypeInfos,
- int[] dataColumnNums, int partitionColumnCount, String[] scratchColumnTypeNames) {
+ int[] dataColumnNums, int partitionColumnCount, VirtualColumn[] neededVirtualColumns,
+ String[] scratchColumnTypeNames) {
this.rowColumnNames = rowColumnNames;
this.rowColumnTypeInfos = rowColumnTypeInfos;
this.dataColumnNums = dataColumnNums;
this.partitionColumnCount = partitionColumnCount;
+ this.neededVirtualColumns = neededVirtualColumns;
+ this.virtualColumnCount = neededVirtualColumns.length;
this.scratchColumnTypeNames = scratchColumnTypeNames;
- dataColumnCount = rowColumnTypeInfos.length - partitionColumnCount;
+ dataColumnCount = rowColumnTypeInfos.length - partitionColumnCount - virtualColumnCount;
}
public String[] getRowColumnNames() {
@@ -118,6 +124,14 @@ public class VectorizedRowBatchCtx {
return partitionColumnCount;
}
+ public int getVirtualColumnCount() {
+ return virtualColumnCount;
+ }
+
+ public VirtualColumn[] getNeededVirtualColumns() {
+ return neededVirtualColumns;
+ }
+
public String[] getScratchColumnTypeNames() {
return scratchColumnTypeNames;
}
@@ -138,6 +152,8 @@ public class VectorizedRowBatchCtx {
rowColumnTypeInfos = VectorizedBatchUtil.typeInfosFromStructObjectInspector(structObjectInspector);
dataColumnNums = null;
partitionColumnCount = 0;
+ virtualColumnCount = 0;
+ neededVirtualColumns = new VirtualColumn[0];
dataColumnCount = rowColumnTypeInfos.length;
// Scratch column information.
@@ -204,13 +220,14 @@ public class VectorizedRowBatchCtx {
*/
public VectorizedRowBatch createVectorizedRowBatch()
{
- final int dataAndPartColumnCount = rowColumnTypeInfos.length;
- final int totalColumnCount = dataAndPartColumnCount + scratchColumnTypeNames.length;
+ final int nonScratchColumnCount = rowColumnTypeInfos.length;
+ final int totalColumnCount =
+ nonScratchColumnCount + scratchColumnTypeNames.length;
VectorizedRowBatch result = new VectorizedRowBatch(totalColumnCount);
if (dataColumnNums == null) {
// All data and partition columns.
- for (int i = 0; i < dataAndPartColumnCount; i++) {
+ for (int i = 0; i < nonScratchColumnCount; i++) {
TypeInfo typeInfo = rowColumnTypeInfos[i];
result.cols[i] = VectorizedBatchUtil.createColumnVector(typeInfo);
}
@@ -218,24 +235,30 @@ public class VectorizedRowBatchCtx {
// Create only needed/included columns data columns.
for (int i = 0; i < dataColumnNums.length; i++) {
int columnNum = dataColumnNums[i];
- Preconditions.checkState(columnNum < dataAndPartColumnCount);
+ Preconditions.checkState(columnNum < nonScratchColumnCount);
TypeInfo typeInfo = rowColumnTypeInfos[columnNum];
result.cols[columnNum] = VectorizedBatchUtil.createColumnVector(typeInfo);
}
- // Always create partition columns.
- final int endColumnNum = dataColumnCount + partitionColumnCount;
- for (int partitionColumnNum = dataColumnCount; partitionColumnNum < endColumnNum; partitionColumnNum++) {
+ // Always create partition and virtual columns.
+ final int partitionEndColumnNum = dataColumnCount + partitionColumnCount;
+ for (int partitionColumnNum = dataColumnCount; partitionColumnNum < partitionEndColumnNum; partitionColumnNum++) {
TypeInfo typeInfo = rowColumnTypeInfos[partitionColumnNum];
result.cols[partitionColumnNum] = VectorizedBatchUtil.createColumnVector(typeInfo);
}
+ final int virtualEndColumnNum = partitionEndColumnNum + virtualColumnCount;
+ for (int virtualColumnNum = partitionEndColumnNum; virtualColumnNum < virtualEndColumnNum; virtualColumnNum++) {
+ TypeInfo typeInfo = rowColumnTypeInfos[virtualColumnNum];
+ result.cols[virtualColumnNum] = VectorizedBatchUtil.createColumnVector(typeInfo);
+ }
}
for (int i = 0; i < scratchColumnTypeNames.length; i++) {
String typeName = scratchColumnTypeNames[i];
- result.cols[rowColumnTypeInfos.length + i] =
+ result.cols[nonScratchColumnCount + i] =
VectorizedBatchUtil.createColumnVector(typeName);
}
+ // UNDONE: Also remember virtualColumnCount...
result.setPartitionInfo(dataColumnCount, partitionColumnCount);
result.reset();
http://git-wip-us.apache.org/repos/asf/hive/blob/996fa070/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java
index 2435bf1..0032305 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java
@@ -20,10 +20,13 @@ package org.apache.hadoop.hive.ql.metadata;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.HashMap;
import java.util.List;
import java.util.ListIterator;
+import java.util.Map;
import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;
import org.apache.hadoop.conf.Configuration;
@@ -64,6 +67,17 @@ public enum VirtualColumn {
ImmutableSet.of(FILENAME.getName(), BLOCKOFFSET.getName(), ROWOFFSET.getName(),
RAWDATASIZE.getName(), GROUPINGID.getName(), ROWID.getName());
+ public static final ImmutableMap<String, VirtualColumn> VIRTUAL_COLUMN_NAME_MAP =
+ new ImmutableMap.Builder<String, VirtualColumn>().putAll(getColumnNameMap()).build();
+
+ private static Map<String, VirtualColumn> getColumnNameMap() {
+ Map<String, VirtualColumn> map = new HashMap<String, VirtualColumn>();
+ for (VirtualColumn virtualColumn : values()) {
+ map.put(virtualColumn.name, virtualColumn);
+ }
+ return map;
+ }
+
private final String name;
private final TypeInfo typeInfo;
private final boolean isHidden;
http://git-wip-us.apache.org/repos/asf/hive/blob/996fa070/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index 8183194..0913f40 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -75,6 +75,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping;
import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator;
import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator;
import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil;
import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.HiveVectorAdaptorUsageMode;
import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.InConstantType;
import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion;
@@ -229,6 +230,7 @@ import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hive.common.util.AnnotationUtils;
import org.apache.hadoop.util.ReflectionUtils;
+import com.google.common.collect.ImmutableSet;
import com.google.common.base.Preconditions;
public class Vectorizer implements PhysicalPlanResolver {
@@ -274,6 +276,10 @@ public class Vectorizer implements PhysicalPlanResolver {
private Set<String> supportedAggregationUdfs = new HashSet<String>();
+ // The set of virtual columns that vectorized readers *MAY* support.
+ public static final ImmutableSet<VirtualColumn> vectorizableVirtualColumns =
+ ImmutableSet.of(VirtualColumn.ROWID);
+
private HiveConf hiveConf;
private boolean useVectorizedInputFileFormat;
@@ -283,6 +289,7 @@ public class Vectorizer implements PhysicalPlanResolver {
private boolean isPtfVectorizationEnabled;
private boolean isVectorizationComplexTypesEnabled;
private boolean isVectorizationGroupByComplexTypesEnabled;
+ private boolean isVectorizedRowIdentifierEnabled;
private boolean isSchemaEvolution;
@@ -316,6 +323,9 @@ public class Vectorizer implements PhysicalPlanResolver {
private long vectorizedVertexNum = -1;
+ private Set<VirtualColumn> availableVectorizedVirtualColumnSet = null;
+ private Set<VirtualColumn> neededVirtualColumnSet = null;
+
public Vectorizer() {
/*
@@ -453,6 +463,8 @@ public class Vectorizer implements PhysicalPlanResolver {
List<Integer> dataColumnNums;
int partitionColumnCount;
+ List<VirtualColumn> availableVirtualColumnList;
+ List<VirtualColumn> neededVirtualColumnList;
boolean useVectorizedInputFileFormat;
boolean groupByVectorOutput;
@@ -488,6 +500,12 @@ public class Vectorizer implements PhysicalPlanResolver {
public void setPartitionColumnCount(int partitionColumnCount) {
this.partitionColumnCount = partitionColumnCount;
}
+ public void setAvailableVirtualColumnList(List<VirtualColumn> availableVirtualColumnList) {
+ this.availableVirtualColumnList = availableVirtualColumnList;
+ }
+ public void setNeededVirtualColumnList(List<VirtualColumn> neededVirtualColumnList) {
+ this.neededVirtualColumnList = neededVirtualColumnList;
+ }
public void setScratchTypeNameArray(String[] scratchTypeNameArray) {
this.scratchTypeNameArray = scratchTypeNameArray;
}
@@ -522,6 +540,16 @@ public class Vectorizer implements PhysicalPlanResolver {
public void transferToBaseWork(BaseWork baseWork) {
+ final int virtualColumnCount;
+ VirtualColumn[] neededVirtualColumns;
+ if (neededVirtualColumnList != null && neededVirtualColumnList.size() > 0) {
+ virtualColumnCount = neededVirtualColumnList.size();
+ neededVirtualColumns = neededVirtualColumnList.toArray(new VirtualColumn[0]);
+ } else {
+ virtualColumnCount = 0;
+ neededVirtualColumns = new VirtualColumn[0];
+ }
+
String[] allColumnNameArray = allColumnNames.toArray(new String[0]);
TypeInfo[] allTypeInfoArray = allTypeInfos.toArray(new TypeInfo[0]);
int[] dataColumnNumsArray;
@@ -537,6 +565,7 @@ public class Vectorizer implements PhysicalPlanResolver {
allTypeInfoArray,
dataColumnNumsArray,
partitionColumnCount,
+ neededVirtualColumns,
scratchTypeNameArray);
baseWork.setVectorizedRowBatchCtx(vectorizedRowBatchCtx);
@@ -687,20 +716,41 @@ public class Vectorizer implements PhysicalPlanResolver {
}
private void getTableScanOperatorSchemaInfo(TableScanOperator tableScanOperator,
- List<String> logicalColumnNameList, List<TypeInfo> logicalTypeInfoList) {
+ List<String> logicalColumnNameList, List<TypeInfo> logicalTypeInfoList,
+ List<VirtualColumn> availableVirtualColumnList) {
- // Add all non-virtual columns to make a vectorization context for
+ // Add all columns to make a vectorization context for
// the TableScan operator.
RowSchema rowSchema = tableScanOperator.getSchema();
for (ColumnInfo c : rowSchema.getSignature()) {
- // Validation will later exclude vectorization of virtual columns usage (HIVE-5560).
- if (!isVirtualColumn(c)) {
- String columnName = c.getInternalName();
- String typeName = c.getTypeName();
- TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName);
+ // Validation will later exclude vectorization of virtual columns usage if necessary.
+ String columnName = c.getInternalName();
+
+ // Turns out partition columns get marked as virtual in ColumnInfo, so we need to
+ // check the VirtualColumn directly.
+ VirtualColumn virtualColumn = VirtualColumn.VIRTUAL_COLUMN_NAME_MAP.get(columnName);
+ if (virtualColumn == null) {
logicalColumnNameList.add(columnName);
- logicalTypeInfoList.add(typeInfo);
+ logicalTypeInfoList.add(TypeInfoUtils.getTypeInfoFromTypeString(c.getTypeName()));
+ } else {
+
+ // The planner gives us a subset virtual columns available for this table scan.
+ // AND
+ // We only support some virtual columns in vectorization.
+ //
+ // So, create the intersection. Note these are available vectorizable virtual columns.
+ // Later we remember which virtual columns were *actually used* in the query so
+ // just those will be included in the Map VectorizedRowBatchCtx that has the
+ // information for creating the Map VectorizedRowBatch.
+ //
+ if (!vectorizableVirtualColumns.contains(virtualColumn)) {
+ continue;
+ }
+ if (virtualColumn == VirtualColumn.ROWID && !isVectorizedRowIdentifierEnabled) {
+ continue;
+ }
+ availableVirtualColumnList.add(virtualColumn);
}
}
}
@@ -893,14 +943,19 @@ public class Vectorizer implements PhysicalPlanResolver {
boolean isAcidTable = tableScanOperator.getConf().isAcidTable();
// These names/types are the data columns plus partition columns.
- final List<String> allColumnNameList = new ArrayList<String>();
- final List<TypeInfo> allTypeInfoList = new ArrayList<TypeInfo>();
+ final List<String> dataAndPartColumnNameList = new ArrayList<String>();
+ final List<TypeInfo> dataAndPartTypeInfoList = new ArrayList<TypeInfo>();
+
+ final List<VirtualColumn> availableVirtualColumnList = new ArrayList<VirtualColumn>();
- getTableScanOperatorSchemaInfo(tableScanOperator, allColumnNameList, allTypeInfoList);
+ getTableScanOperatorSchemaInfo(
+ tableScanOperator,
+ dataAndPartColumnNameList, dataAndPartTypeInfoList,
+ availableVirtualColumnList);
final List<Integer> dataColumnNums = new ArrayList<Integer>();
- final int allColumnCount = allColumnNameList.size();
+ final int dataAndPartColumnCount = dataAndPartColumnNameList.size();
/*
* Validate input formats of all the partitions can be vectorized.
@@ -956,17 +1011,17 @@ public class Vectorizer implements PhysicalPlanResolver {
LinkedHashMap<String, String> partSpec = partDesc.getPartSpec();
if (partSpec != null && partSpec.size() > 0) {
partitionColumnCount = partSpec.size();
- dataColumnCount = allColumnCount - partitionColumnCount;
+ dataColumnCount = dataAndPartColumnCount - partitionColumnCount;
} else {
partitionColumnCount = 0;
- dataColumnCount = allColumnCount;
+ dataColumnCount = dataAndPartColumnCount;
}
- determineDataColumnNums(tableScanOperator, allColumnNameList, dataColumnCount,
+ determineDataColumnNums(tableScanOperator, dataAndPartColumnNameList, dataColumnCount,
dataColumnNums);
- tableDataColumnList = allColumnNameList.subList(0, dataColumnCount);
- tableDataTypeInfoList = allTypeInfoList.subList(0, dataColumnCount);
+ tableDataColumnList = dataAndPartColumnNameList.subList(0, dataColumnCount);
+ tableDataTypeInfoList = dataAndPartTypeInfoList.subList(0, dataColumnCount);
isFirst = false;
}
@@ -1038,10 +1093,14 @@ public class Vectorizer implements PhysicalPlanResolver {
vectorPartDesc.setDataTypeInfos(nextDataTypeInfoList);
}
- vectorTaskColumnInfo.setAllColumnNames(allColumnNameList);
- vectorTaskColumnInfo.setAllTypeInfos(allTypeInfoList);
+ // For now, we don't know which virtual columns are going to be included. We'll add them
+ // later...
+ vectorTaskColumnInfo.setAllColumnNames(dataAndPartColumnNameList);
+ vectorTaskColumnInfo.setAllTypeInfos(dataAndPartTypeInfoList);
+
vectorTaskColumnInfo.setDataColumnNums(dataColumnNums);
vectorTaskColumnInfo.setPartitionColumnCount(partitionColumnCount);
+ vectorTaskColumnInfo.setAvailableVirtualColumnList(availableVirtualColumnList);
vectorTaskColumnInfo.setUseVectorizedInputFileFormat(useVectorizedInputFileFormat);
// Always set these so EXPLAIN can see.
@@ -1082,6 +1141,14 @@ public class Vectorizer implements PhysicalPlanResolver {
return false;
}
+ // Set global member indicating which virtual columns are possible to be used by
+ // the Map vertex.
+ availableVectorizedVirtualColumnSet = new HashSet<VirtualColumn>();
+ availableVectorizedVirtualColumnSet.addAll(vectorTaskColumnInfo.availableVirtualColumnList);
+
+ // And, use set to remember which virtual columns were actually referenced.
+ neededVirtualColumnSet = new HashSet<VirtualColumn>();
+
// Now we are enabled and any issues found from here on out are considered
// not vectorized issues.
mapWork.setVectorizationEnabled(true);
@@ -1104,6 +1171,21 @@ public class Vectorizer implements PhysicalPlanResolver {
}
}
}
+
+ List<VirtualColumn> neededVirtualColumnList = new ArrayList<VirtualColumn>();
+ if (!neededVirtualColumnSet.isEmpty()) {
+
+ // Create needed in same order.
+ for (VirtualColumn virtualColumn : vectorTaskColumnInfo.availableVirtualColumnList) {
+ if (neededVirtualColumnSet.contains(virtualColumn)) {
+ neededVirtualColumnList.add(virtualColumn);
+ vectorTaskColumnInfo.allColumnNames.add(virtualColumn.getName());
+ vectorTaskColumnInfo.allTypeInfos.add(virtualColumn.getTypeInfo());
+ }
+ }
+ }
+
+ vectorTaskColumnInfo.setNeededVirtualColumnList(neededVirtualColumnList);
vectorTaskColumnInfo.setNonVectorizedOps(vnp.getNonVectorizedOps());
return true;
}
@@ -1737,6 +1819,10 @@ public class Vectorizer implements PhysicalPlanResolver {
HiveConf.getBoolVar(hiveConf,
HiveConf.ConfVars.HIVE_VECTORIZATION_GROUPBY_COMPLEX_TYPES_ENABLED);
+ isVectorizedRowIdentifierEnabled =
+ HiveConf.getBoolVar(hiveConf,
+ HiveConf.ConfVars.HIVE_VECTORIZATION_ROW_IDENTIFIER_ENABLED);
+
isSchemaEvolution =
HiveConf.getBoolVar(hiveConf,
HiveConf.ConfVars.HIVE_SCHEMA_EVOLUTION);
@@ -2328,10 +2414,24 @@ public class Vectorizer implements PhysicalPlanResolver {
VectorExpressionDescriptor.Mode mode, boolean allowComplex) {
if (desc instanceof ExprNodeColumnDesc) {
ExprNodeColumnDesc c = (ExprNodeColumnDesc) desc;
- // Currently, we do not support vectorized virtual columns (see HIVE-5570).
- if (VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(c.getColumn())) {
- setExpressionIssue(expressionTitle, "Virtual columns not supported (" + c.getColumn() + ")");
- return false;
+ String columnName = c.getColumn();
+
+ if (availableVectorizedVirtualColumnSet != null) {
+
+ // For Map, check for virtual columns.
+ VirtualColumn virtualColumn = VirtualColumn.VIRTUAL_COLUMN_NAME_MAP.get(columnName);
+ if (virtualColumn != null) {
+
+ // We support some virtual columns in vectorization for this table scan.
+
+ if (!availableVectorizedVirtualColumnSet.contains(virtualColumn)) {
+ setExpressionIssue(expressionTitle, "Virtual column " + columnName + " is not supported");
+ return false;
+ }
+
+ // Remember we used this one in the query.
+ neededVirtualColumnSet.add(virtualColumn);
+ }
}
}
String typeName = desc.getTypeInfo().getTypeName();
@@ -4180,28 +4280,20 @@ public class Vectorizer implements PhysicalPlanResolver {
return vectorOp;
}
- private boolean isVirtualColumn(ColumnInfo column) {
-
- // Not using method column.getIsVirtualCol() because partitioning columns are also
- // treated as virtual columns in ColumnInfo.
- if (VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(column.getInternalName())) {
- return true;
- }
- return false;
- }
-
public void debugDisplayAllMaps(BaseWork work) {
VectorizedRowBatchCtx vectorizedRowBatchCtx = work.getVectorizedRowBatchCtx();
String[] allColumnNames = vectorizedRowBatchCtx.getRowColumnNames();
- Object columnTypeInfos = vectorizedRowBatchCtx.getRowColumnTypeInfos();
+ TypeInfo[] columnTypeInfos = vectorizedRowBatchCtx.getRowColumnTypeInfos();
int partitionColumnCount = vectorizedRowBatchCtx.getPartitionColumnCount();
+ int virtualColumnCount = vectorizedRowBatchCtx.getVirtualColumnCount();
String[] scratchColumnTypeNames =vectorizedRowBatchCtx.getScratchColumnTypeNames();
- LOG.debug("debugDisplayAllMaps allColumnNames " + Arrays.toString(allColumnNames));
- LOG.debug("debugDisplayAllMaps columnTypeInfos " + Arrays.deepToString((Object[]) columnTypeInfos));
+ LOG.debug("debugDisplayAllMaps rowColumnNames " + Arrays.toString(allColumnNames));
+ LOG.debug("debugDisplayAllMaps rowColumnTypeInfos " + Arrays.toString(columnTypeInfos));
LOG.debug("debugDisplayAllMaps partitionColumnCount " + partitionColumnCount);
+ LOG.debug("debugDisplayAllMaps virtualColumnCount " + virtualColumnCount);
LOG.debug("debugDisplayAllMaps scratchColumnTypeNames " + Arrays.toString(scratchColumnTypeNames));
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/996fa070/ql/src/test/queries/clientpositive/vector_row__id.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_row__id.q b/ql/src/test/queries/clientpositive/vector_row__id.q
new file mode 100644
index 0000000..11eda90
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_row__id.q
@@ -0,0 +1,56 @@
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+set hive.mapred.mode=nonstrict;
+set hive.explain.user=false;
+SET hive.vectorized.execution.enabled=true;
+set hive.fetch.task.conversion=none;
+
+-- SORT_QUERY_RESULTS
+
+drop table if exists hello_acid;
+create table hello_acid (key int, value int)
+partitioned by (load_date date)
+clustered by(key) into 3 buckets
+stored as orc tblproperties ('transactional'='true');
+
+insert into hello_acid partition (load_date='2016-03-01') values (1, 1);
+insert into hello_acid partition (load_date='2016-03-02') values (2, 2);
+insert into hello_acid partition (load_date='2016-03-03') values (3, 3);
+
+set hive.vectorized.row.identifier.enabled=false;
+
+explain vectorization detail
+select row__id, key, value from hello_acid order by key;
+
+select row__id, key, value from hello_acid order by key;
+
+explain vectorization detail
+select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid;
+
+select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid;
+
+explain vectorization detail
+select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3;
+
+select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3;
+
+
+
+set hive.vectorized.row.identifier.enabled=true;
+
+explain vectorization detail
+select row__id, key, value from hello_acid order by key;
+
+select row__id, key, value from hello_acid order by key;
+
+explain vectorization detail
+select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid;
+
+select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid;
+
+explain vectorization detail
+select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3;
+
+select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3;
+
http://git-wip-us.apache.org/repos/asf/hive/blob/996fa070/ql/src/test/results/clientpositive/llap/vector_row__id.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_row__id.q.out b/ql/src/test/results/clientpositive/llap/vector_row__id.q.out
new file mode 100644
index 0000000..850e3a4
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vector_row__id.q.out
@@ -0,0 +1,605 @@
+PREHOOK: query: drop table if exists hello_acid
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists hello_acid
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table hello_acid (key int, value int)
+partitioned by (load_date date)
+clustered by(key) into 3 buckets
+stored as orc tblproperties ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@hello_acid
+POSTHOOK: query: create table hello_acid (key int, value int)
+partitioned by (load_date date)
+clustered by(key) into 3 buckets
+stored as orc tblproperties ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@hello_acid
+PREHOOK: query: insert into hello_acid partition (load_date='2016-03-01') values (1, 1)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@hello_acid@load_date=2016-03-01
+POSTHOOK: query: insert into hello_acid partition (load_date='2016-03-01') values (1, 1)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@hello_acid@load_date=2016-03-01
+POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-01).key EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-01).value EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: insert into hello_acid partition (load_date='2016-03-02') values (2, 2)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@hello_acid@load_date=2016-03-02
+POSTHOOK: query: insert into hello_acid partition (load_date='2016-03-02') values (2, 2)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@hello_acid@load_date=2016-03-02
+POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-02).key EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-02).value EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: insert into hello_acid partition (load_date='2016-03-03') values (3, 3)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@hello_acid@load_date=2016-03-03
+POSTHOOK: query: insert into hello_acid partition (load_date='2016-03-03') values (3, 3)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@hello_acid@load_date=2016-03-03
+POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-03).key EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-03).value EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: explain vectorization detail
+select row__id, key, value from hello_acid order by key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select row__id, key, value from hello_acid order by key
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: hello_acid
+ Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ROW__ID (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), key (type: int), value (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), _col2 (type: int)
+ Execution mode: llap
+ LLAP IO: may be used (ACID table)
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ notVectorizedReason: Select expression for SELECT operator: Virtual column ROW__ID is not supported
+ vectorized: false
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: a
+ reduceColumnSortOrder: +
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 3
+ dataColumns: KEY.reducesinkkey0:int, VALUE._col0:struct<transactionid:bigint,bucketid:int,rowid:bigint>, VALUE._col1:int
+ partitionColumnCount: 0
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), KEY.reducesinkkey0 (type: int), VALUE._col1 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [1, 0, 2]
+ Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select row__id, key, value from hello_acid order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hello_acid
+PREHOOK: Input: default@hello_acid@load_date=2016-03-01
+PREHOOK: Input: default@hello_acid@load_date=2016-03-02
+PREHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+POSTHOOK: query: select row__id, key, value from hello_acid order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hello_acid
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-01
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-02
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+{"transactionid":3,"bucketid":536870912,"rowid":0} 1 1
+{"transactionid":4,"bucketid":536870912,"rowid":0} 2 2
+{"transactionid":5,"bucketid":536870912,"rowid":0} 3 3
+PREHOOK: query: explain vectorization detail
+select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: hello_acid
+ Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ROW__ID.transactionid (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ sort order: +
+ Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: llap
+ LLAP IO: may be used (ACID table)
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ notVectorizedReason: Select expression for SELECT operator: Virtual column ROW__ID is not supported
+ vectorized: false
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: a
+ reduceColumnSortOrder: +
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 1
+ dataColumns: KEY.reducesinkkey0:bigint
+ partitionColumnCount: 0
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: bigint)
+ outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0]
+ Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hello_acid
+PREHOOK: Input: default@hello_acid@load_date=2016-03-01
+PREHOOK: Input: default@hello_acid@load_date=2016-03-02
+PREHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+POSTHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hello_acid
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-01
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-02
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+3
+4
+5
+PREHOOK: query: explain vectorization detail
+select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: hello_acid
+ Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ROW__ID.transactionid = 3) (type: boolean)
+ Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ROW__ID.transactionid (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: llap
+ LLAP IO: may be used (ACID table)
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ notVectorizedReason: Predicate expression for FILTER operator: Virtual column ROW__ID is not supported
+ vectorized: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hello_acid
+PREHOOK: Input: default@hello_acid@load_date=2016-03-01
+PREHOOK: Input: default@hello_acid@load_date=2016-03-02
+PREHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+POSTHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hello_acid
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-01
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-02
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+3
+PREHOOK: query: explain vectorization detail
+select row__id, key, value from hello_acid order by key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select row__id, key, value from hello_acid order by key
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: hello_acid
+ Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3]
+ Select Operator
+ expressions: ROW__ID (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), key (type: int), value (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [3, 0, 1]
+ Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumns: [0]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumns: [3, 1]
+ Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), _col2 (type: int)
+ Execution mode: vectorized, llap
+ LLAP IO: may be used (ACID table)
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: key:int, value:int
+ partitionColumnCount: 1
+ partitionColumns: load_date:date
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: a
+ reduceColumnSortOrder: +
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 3
+ dataColumns: KEY.reducesinkkey0:int, VALUE._col0:struct<transactionid:bigint,bucketid:int,rowid:bigint>, VALUE._col1:int
+ partitionColumnCount: 0
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), KEY.reducesinkkey0 (type: int), VALUE._col1 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [1, 0, 2]
+ Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 231 Data size: 3696 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select row__id, key, value from hello_acid order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hello_acid
+PREHOOK: Input: default@hello_acid@load_date=2016-03-01
+PREHOOK: Input: default@hello_acid@load_date=2016-03-02
+PREHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+POSTHOOK: query: select row__id, key, value from hello_acid order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hello_acid
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-01
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-02
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+NULL 1 1
+NULL 2 2
+NULL 3 3
+PREHOOK: query: explain vectorization detail
+select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: hello_acid
+ Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ROW__ID.transactionid (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ sort order: +
+ Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: llap
+ LLAP IO: may be used (ACID table)
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ notVectorizedReason: Select expression for SELECT operator: Could not vectorize expression (mode = PROJECTION): Column[ROW__ID].transactionid
+ vectorized: false
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: a
+ reduceColumnSortOrder: +
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 1
+ dataColumns: KEY.reducesinkkey0:bigint
+ partitionColumnCount: 0
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: bigint)
+ outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0]
+ Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hello_acid
+PREHOOK: Input: default@hello_acid@load_date=2016-03-01
+PREHOOK: Input: default@hello_acid@load_date=2016-03-02
+PREHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+POSTHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hello_acid
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-01
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-02
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+3
+4
+5
+PREHOOK: query: explain vectorization detail
+select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: hello_acid
+ Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (ROW__ID.transactionid = 3) (type: boolean)
+ Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ROW__ID.transactionid (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 1856 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: llap
+ LLAP IO: may be used (ACID table)
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ notVectorizedReason: Predicate expression for FILTER operator: Cannot handle expression type: ExprNodeFieldDesc
+ vectorized: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hello_acid
+PREHOOK: Input: default@hello_acid@load_date=2016-03-01
+PREHOOK: Input: default@hello_acid@load_date=2016-03-02
+PREHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+POSTHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hello_acid
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-01
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-02
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+3
http://git-wip-us.apache.org/repos/asf/hive/blob/996fa070/ql/src/test/results/clientpositive/vector_row__id.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_row__id.q.out b/ql/src/test/results/clientpositive/vector_row__id.q.out
new file mode 100644
index 0000000..d48902b
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_row__id.q.out
@@ -0,0 +1,491 @@
+PREHOOK: query: drop table if exists hello_acid
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists hello_acid
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table hello_acid (key int, value int)
+partitioned by (load_date date)
+clustered by(key) into 3 buckets
+stored as orc tblproperties ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@hello_acid
+POSTHOOK: query: create table hello_acid (key int, value int)
+partitioned by (load_date date)
+clustered by(key) into 3 buckets
+stored as orc tblproperties ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@hello_acid
+PREHOOK: query: insert into hello_acid partition (load_date='2016-03-01') values (1, 1)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@hello_acid@load_date=2016-03-01
+POSTHOOK: query: insert into hello_acid partition (load_date='2016-03-01') values (1, 1)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@hello_acid@load_date=2016-03-01
+POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-01).key EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-01).value EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: insert into hello_acid partition (load_date='2016-03-02') values (2, 2)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@hello_acid@load_date=2016-03-02
+POSTHOOK: query: insert into hello_acid partition (load_date='2016-03-02') values (2, 2)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@hello_acid@load_date=2016-03-02
+POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-02).key EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-02).value EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: insert into hello_acid partition (load_date='2016-03-03') values (3, 3)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@hello_acid@load_date=2016-03-03
+POSTHOOK: query: insert into hello_acid partition (load_date='2016-03-03') values (3, 3)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@hello_acid@load_date=2016-03-03
+POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-03).key EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: hello_acid PARTITION(load_date=2016-03-03).value EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: explain vectorization detail
+select row__id, key, value from hello_acid order by key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select row__id, key, value from hello_acid order by key
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: hello_acid
+ Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ROW__ID (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), key (type: int), value (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), _col2 (type: int)
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ notVectorizedReason: Select expression for SELECT operator: Virtual column ROW__ID is not supported
+ vectorized: false
+ Reduce Vectorization:
+ enabled: false
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+ enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), KEY.reducesinkkey0 (type: int), VALUE._col1 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select row__id, key, value from hello_acid order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hello_acid
+PREHOOK: Input: default@hello_acid@load_date=2016-03-01
+PREHOOK: Input: default@hello_acid@load_date=2016-03-02
+PREHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+POSTHOOK: query: select row__id, key, value from hello_acid order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hello_acid
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-01
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-02
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+{"transactionid":3,"bucketid":536936448,"rowid":0} 1 1
+{"transactionid":4,"bucketid":537001984,"rowid":0} 2 2
+{"transactionid":5,"bucketid":536870912,"rowid":0} 3 3
+PREHOOK: query: explain vectorization detail
+select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: hello_acid
+ Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: ROW__ID.transactionid (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ sort order: +
+ Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ notVectorizedReason: Select expression for SELECT operator: Virtual column ROW__ID is not supported
+ vectorized: false
+ Reduce Vectorization:
+ enabled: false
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+ enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 3054 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hello_acid
+PREHOOK: Input: default@hello_acid@load_date=2016-03-01
+PREHOOK: Input: default@hello_acid@load_date=2016-03-02
+PREHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+POSTHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hello_acid
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-01
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-02
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+3
+4
+5
+PREHOOK: query: explain vectorization detail
+select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: hello_acid
+ Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: (ROW__ID.transactionid = 3) (type: boolean)
+ Statistics: Num rows: 1 Data size: 3054 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ROW__ID.transactionid (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 3054 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 3054 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ notVectorizedReason: Predicate expression for FILTER operator: Virtual column ROW__ID is not supported
+ vectorized: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hello_acid
+PREHOOK: Input: default@hello_acid@load_date=2016-03-01
+PREHOOK: Input: default@hello_acid@load_date=2016-03-02
+PREHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+POSTHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hello_acid
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-01
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-02
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+3
+PREHOOK: query: explain vectorization detail
+select row__id, key, value from hello_acid order by key
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select row__id, key, value from hello_acid order by key
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: hello_acid
+ Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3]
+ Select Operator
+ expressions: ROW__ID (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), key (type: int), value (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [3, 0, 1]
+ Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: int)
+ sort order: +
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), _col2 (type: int)
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: key:int, value:int
+ partitionColumnCount: 1
+ partitionColumns: load_date:date
+ Reduce Vectorization:
+ enabled: false
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+ enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>), KEY.reducesinkkey0 (type: int), VALUE._col1 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 380 Data size: 3054 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select row__id, key, value from hello_acid order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hello_acid
+PREHOOK: Input: default@hello_acid@load_date=2016-03-01
+PREHOOK: Input: default@hello_acid@load_date=2016-03-02
+PREHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+POSTHOOK: query: select row__id, key, value from hello_acid order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hello_acid
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-01
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-02
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+NULL 1 1
+NULL 2 2
+NULL 3 3
+PREHOOK: query: explain vectorization detail
+select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: hello_acid
+ Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: ROW__ID.transactionid (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ sort order: +
+ Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ notVectorizedReason: Select expression for SELECT operator: Could not vectorize expression (mode = PROJECTION): Column[ROW__ID].transactionid
+ vectorized: false
+ Reduce Vectorization:
+ enabled: false
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+ enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 3054 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hello_acid
+PREHOOK: Input: default@hello_acid@load_date=2016-03-01
+PREHOOK: Input: default@hello_acid@load_date=2016-03-02
+PREHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+POSTHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub order by tid
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hello_acid
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-01
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-02
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+3
+4
+5
+PREHOOK: query: explain vectorization detail
+select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: hello_acid
+ Statistics: Num rows: 1 Data size: 3054 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: (ROW__ID.transactionid = 3) (type: boolean)
+ Statistics: Num rows: 1 Data size: 3054 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ROW__ID.transactionid (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 3054 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 3054 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ notVectorizedReason: Predicate expression for FILTER operator: Cannot handle expression type: ExprNodeFieldDesc
+ vectorized: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@hello_acid
+PREHOOK: Input: default@hello_acid@load_date=2016-03-01
+PREHOOK: Input: default@hello_acid@load_date=2016-03-02
+PREHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+POSTHOOK: query: select tid from (select row__id.transactionid as tid from hello_acid) sub where tid = 3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@hello_acid
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-01
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-02
+POSTHOOK: Input: default@hello_acid@load_date=2016-03-03
+#### A masked pattern was here ####
+3