You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ga...@apache.org on 2017/12/18 23:25:54 UTC
[11/50] [abbrv] hive git commit: HIVE-18258: Vectorization:
Reduce-Side GROUP BY MERGEPARTIAL with duplicate columns is broken (Matt
McCline, reviewed by Teddy Choi)
HIVE-18258: Vectorization: Reduce-Side GROUP BY MERGEPARTIAL with duplicate columns is broken (Matt McCline, reviewed by Teddy Choi)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f52e8b4b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f52e8b4b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f52e8b4b
Branch: refs/heads/standalone-metastore
Commit: f52e8b4ba38f2a1141650d99efb12c923cee7cd0
Parents: 856d88d
Author: Matt McCline <mm...@hortonworks.com>
Authored: Fri Dec 15 11:14:20 2017 -0600
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Fri Dec 15 11:14:20 2017 -0600
----------------------------------------------------------------------
.../test/resources/testconfiguration.properties | 1 +
.../ql/exec/vector/VectorGroupKeyHelper.java | 54 +++--
.../vector_reduce_groupby_duplicate_cols.q | 29 +++
.../vector_reduce_groupby_duplicate_cols.q.out | 211 +++++++++++++++++++
.../vector_reduce_groupby_duplicate_cols.q.out | 180 ++++++++++++++++
5 files changed, 454 insertions(+), 21 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/f52e8b4b/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 37079b7..2bf64dc 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -381,6 +381,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\
vector_reduce2.q,\
vector_reduce3.q,\
vector_reduce_groupby_decimal.q,\
+ vector_reduce_grpupby_duplicate_cols.q,\
vector_row__id.q,\
vector_string_concat.q,\
vector_struct_in.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/f52e8b4b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java
index 13a929b..02b0e5c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java
@@ -32,7 +32,7 @@ import org.apache.hadoop.io.DataOutputBuffer;
*/
public class VectorGroupKeyHelper extends VectorColumnSetInfo {
- private int[] outputColumnNums;
+ private int[] inputColumnNums;
public VectorGroupKeyHelper(int keyCount) {
super(keyCount);
@@ -44,13 +44,18 @@ public class VectorGroupKeyHelper extends VectorColumnSetInfo {
// case, we use the keyCount passed to the constructor and not keyExpressions.length.
// Inspect the output type of each key expression. And, remember the output columns.
- outputColumnNums = new int[keyCount];
+ inputColumnNums = new int[keyCount];
for(int i = 0; i < keyCount; ++i) {
VectorExpression keyExpression = keyExpressions[i];
+
TypeInfo typeInfo = keyExpression.getOutputTypeInfo();
Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo);
addKey(columnVectorType);
- outputColumnNums[i] = keyExpression.getOutputColumnNum();
+
+ // The output of the key expression is the input column.
+ final int inputColumnNum = keyExpression.getOutputColumnNum();
+
+ inputColumnNums[i] = inputColumnNum;
}
finishAdding();
}
@@ -64,10 +69,12 @@ public class VectorGroupKeyHelper extends VectorColumnSetInfo {
*/
public void copyGroupKey(VectorizedRowBatch inputBatch, VectorizedRowBatch outputBatch,
DataOutputBuffer buffer) throws HiveException {
+
for(int i = 0; i< longIndices.length; ++i) {
- final int columnIndex = outputColumnNums[longIndices[i]];
- LongColumnVector inputColumnVector = (LongColumnVector) inputBatch.cols[columnIndex];
- LongColumnVector outputColumnVector = (LongColumnVector) outputBatch.cols[columnIndex];
+ final int outputColumnNum = longIndices[i];
+ final int inputColumnNum = inputColumnNums[outputColumnNum];
+ LongColumnVector inputColumnVector = (LongColumnVector) inputBatch.cols[inputColumnNum];
+ LongColumnVector outputColumnVector = (LongColumnVector) outputBatch.cols[outputColumnNum];
// This vectorized code pattern says:
// If the input batch has no nulls at all (noNulls is true) OR
@@ -91,9 +98,10 @@ public class VectorGroupKeyHelper extends VectorColumnSetInfo {
}
}
for(int i=0;i<doubleIndices.length; ++i) {
- final int columnIndex = outputColumnNums[doubleIndices[i]];
- DoubleColumnVector inputColumnVector = (DoubleColumnVector) inputBatch.cols[columnIndex];
- DoubleColumnVector outputColumnVector = (DoubleColumnVector) outputBatch.cols[columnIndex];
+ final int outputColumnNum = doubleIndices[i];
+ final int inputColumnNum = inputColumnNums[outputColumnNum];
+ DoubleColumnVector inputColumnVector = (DoubleColumnVector) inputBatch.cols[inputColumnNum];
+ DoubleColumnVector outputColumnVector = (DoubleColumnVector) outputBatch.cols[outputColumnNum];
if (inputColumnVector.noNulls || !inputColumnVector.isNull[0]) {
outputColumnVector.vector[outputBatch.size] = inputColumnVector.vector[0];
} else {
@@ -102,9 +110,10 @@ public class VectorGroupKeyHelper extends VectorColumnSetInfo {
}
}
for(int i=0;i<stringIndices.length; ++i) {
- final int columnIndex = outputColumnNums[stringIndices[i]];
- BytesColumnVector inputColumnVector = (BytesColumnVector) inputBatch.cols[columnIndex];
- BytesColumnVector outputColumnVector = (BytesColumnVector) outputBatch.cols[columnIndex];
+ final int outputColumnNum = stringIndices[i];
+ final int inputColumnNum = inputColumnNums[outputColumnNum];
+ BytesColumnVector inputColumnVector = (BytesColumnVector) inputBatch.cols[inputColumnNum];
+ BytesColumnVector outputColumnVector = (BytesColumnVector) outputBatch.cols[outputColumnNum];
if (inputColumnVector.noNulls || !inputColumnVector.isNull[0]) {
// Copy bytes into scratch buffer.
int start = buffer.getLength();
@@ -121,9 +130,10 @@ public class VectorGroupKeyHelper extends VectorColumnSetInfo {
}
}
for(int i=0;i<decimalIndices.length; ++i) {
- final int columnIndex = outputColumnNums[decimalIndices[i]];
- DecimalColumnVector inputColumnVector = (DecimalColumnVector) inputBatch.cols[columnIndex];
- DecimalColumnVector outputColumnVector = (DecimalColumnVector) outputBatch.cols[columnIndex];
+ final int outputColumnNum = decimalIndices[i];
+ final int inputColumnNum = inputColumnNums[outputColumnNum];
+ DecimalColumnVector inputColumnVector = (DecimalColumnVector) inputBatch.cols[inputColumnNum];
+ DecimalColumnVector outputColumnVector = (DecimalColumnVector) outputBatch.cols[outputColumnNum];
if (inputColumnVector.noNulls || !inputColumnVector.isNull[0]) {
// Since we store references to HiveDecimalWritable instances, we must use the update method instead
@@ -135,9 +145,10 @@ public class VectorGroupKeyHelper extends VectorColumnSetInfo {
}
}
for(int i=0;i<timestampIndices.length; ++i) {
- final int columnIndex = outputColumnNums[timestampIndices[i]];
- TimestampColumnVector inputColumnVector = (TimestampColumnVector) inputBatch.cols[columnIndex];
- TimestampColumnVector outputColumnVector = (TimestampColumnVector) outputBatch.cols[columnIndex];
+ final int outputColumnNum = timestampIndices[i];
+ final int inputColumnNum = inputColumnNums[outputColumnNum];
+ TimestampColumnVector inputColumnVector = (TimestampColumnVector) inputBatch.cols[inputColumnNum];
+ TimestampColumnVector outputColumnVector = (TimestampColumnVector) outputBatch.cols[outputColumnNum];
if (inputColumnVector.noNulls || !inputColumnVector.isNull[0]) {
outputColumnVector.setElement(outputBatch.size, 0, inputColumnVector);
@@ -147,9 +158,10 @@ public class VectorGroupKeyHelper extends VectorColumnSetInfo {
}
}
for(int i=0;i<intervalDayTimeIndices.length; ++i) {
- final int columnIndex = outputColumnNums[intervalDayTimeIndices[i]];
- IntervalDayTimeColumnVector inputColumnVector = (IntervalDayTimeColumnVector) inputBatch.cols[columnIndex];
- IntervalDayTimeColumnVector outputColumnVector = (IntervalDayTimeColumnVector) outputBatch.cols[columnIndex];
+ final int outputColumnNum = intervalDayTimeIndices[i];
+ final int inputColumnNum = inputColumnNums[outputColumnNum];
+ IntervalDayTimeColumnVector inputColumnVector = (IntervalDayTimeColumnVector) inputBatch.cols[inputColumnNum];
+ IntervalDayTimeColumnVector outputColumnVector = (IntervalDayTimeColumnVector) outputBatch.cols[outputColumnNum];
if (inputColumnVector.noNulls || !inputColumnVector.isNull[0]) {
outputColumnVector.setElement(outputBatch.size, 0, inputColumnVector);
http://git-wip-us.apache.org/repos/asf/hive/blob/f52e8b4b/ql/src/test/queries/clientpositive/vector_reduce_groupby_duplicate_cols.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_reduce_groupby_duplicate_cols.q b/ql/src/test/queries/clientpositive/vector_reduce_groupby_duplicate_cols.q
new file mode 100644
index 0000000..c82c960
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_reduce_groupby_duplicate_cols.q
@@ -0,0 +1,29 @@
+set hive.cli.print.header=true;
+set hive.explain.user=false;
+set hive.vectorized.execution.enabled=true;
+set hive.vectorized.execution.reduce.enabled=true;
+set hive.vectorized.execution.reducesink.new.enabled=false;
+SET hive.auto.convert.join=true;
+SET hive.auto.convert.join.noconditionaltask=true;
+SET hive.auto.convert.join.noconditionaltask.size=1000000000;
+set hive.fetch.task.conversion=none;
+set hive.strict.checks.cartesian.product=false;
+set hive.cbo.enable=false;
+
+-- HIVE-18258
+
+create table demo (one int, two int);
+insert into table demo values (1, 2);
+
+explain vectorization detail
+select one as one_0, two, one as one_1
+from demo a
+join (select 1 as one, 2 as two) b
+on a.one = b.one and a.two = b.two
+group by a.one, a.two, a.one;
+
+select one as one_0, two, one as one_1
+from demo a
+join (select 1 as one, 2 as two) b
+on a.one = b.one and a.two = b.two
+group by a.one, a.two, a.one;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/f52e8b4b/ql/src/test/results/clientpositive/llap/vector_reduce_groupby_duplicate_cols.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_reduce_groupby_duplicate_cols.q.out b/ql/src/test/results/clientpositive/llap/vector_reduce_groupby_duplicate_cols.q.out
new file mode 100644
index 0000000..afca3df
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vector_reduce_groupby_duplicate_cols.q.out
@@ -0,0 +1,211 @@
+PREHOOK: query: create table demo (one int, two int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@demo
+POSTHOOK: query: create table demo (one int, two int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@demo
+PREHOOK: query: insert into table demo values (1, 2)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@demo
+POSTHOOK: query: insert into table demo values (1, 2)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@demo
+POSTHOOK: Lineage: demo.one EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: demo.two EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+_col0 _col1
+PREHOOK: query: explain vectorization detail
+select one as one_0, two, one as one_1
+from demo a
+join (select 1 as one, 2 as two) b
+on a.one = b.one and a.two = b.two
+group by a.one, a.two, a.one
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select one as one_0, two, one as one_1
+from demo a
+join (select 1 as one, 2 as two) b
+on a.one = b.one and a.two = b.two
+group by a.one, a.two, a.one
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 2 <- Map 1 (BROADCAST_EDGE)
+ Reducer 3 <- Map 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: _dummy_table
+ Row Limit Per Split: 1
+ Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: 1 (type: int), 2 (type: int)
+ sort order: ++
+ Map-reduce partition columns: 1 (type: int), 2 (type: int)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map Vectorization:
+ enabled: false
+#### A masked pattern was here ####
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:one:int, 1:two:int, 2:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
+ Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 1:int))
+ predicate: (one is not null and two is not null) (type: boolean)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 one (type: int), two (type: int)
+ 1 1 (type: int), 2 (type: int)
+ Map Join Vectorization:
+ bigTableKeyColumnNums: [0, 1]
+ bigTableRetainedColumnNums: [0, 1]
+ bigTableValueColumnNums: [0, 1]
+ className: VectorMapJoinInnerBigOnlyMultiKeyOperator
+ native: true
+ nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true
+ projectedOutputColumnNums: [0, 1]
+ outputColumnNames: _col0, _col1
+ input vertices:
+ 1 Map 1
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 0:int, col 1:int
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: []
+ keys: _col0 (type: int), _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: hive.vectorized.execution.reducesink.new.enabled IS false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled]
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: one:int, two:int
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: aa
+ reduceColumnSortOrder: ++
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ dataColumns: KEY._col0:int, KEY._col1:int
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reduce Operator Tree:
+ Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ keyExpressions: col 0:int, col 1:int, col 0:int
+ native: false
+ vectorProcessingMode: MERGE_PARTIAL
+ projectedOutputColumnNums: []
+ keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col2 (type: int), _col1 (type: int), _col2 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [2, 1, 2]
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select one as one_0, two, one as one_1
+from demo a
+join (select 1 as one, 2 as two) b
+on a.one = b.one and a.two = b.two
+group by a.one, a.two, a.one
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Input: default@demo
+#### A masked pattern was here ####
+POSTHOOK: query: select one as one_0, two, one as one_1
+from demo a
+join (select 1 as one, 2 as two) b
+on a.one = b.one and a.two = b.two
+group by a.one, a.two, a.one
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Input: default@demo
+#### A masked pattern was here ####
+one_0 two one_1
+1 2 1
http://git-wip-us.apache.org/repos/asf/hive/blob/f52e8b4b/ql/src/test/results/clientpositive/vector_reduce_groupby_duplicate_cols.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_reduce_groupby_duplicate_cols.q.out b/ql/src/test/results/clientpositive/vector_reduce_groupby_duplicate_cols.q.out
new file mode 100644
index 0000000..eaa4031
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_reduce_groupby_duplicate_cols.q.out
@@ -0,0 +1,180 @@
+PREHOOK: query: create table demo (one int, two int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@demo
+POSTHOOK: query: create table demo (one int, two int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@demo
+PREHOOK: query: insert into table demo values (1, 2)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@demo
+POSTHOOK: query: insert into table demo values (1, 2)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@demo
+POSTHOOK: Lineage: demo.one EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: demo.two EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+_col0 _col1
+PREHOOK: query: explain vectorization detail
+select one as one_0, two, one as one_1
+from demo a
+join (select 1 as one, 2 as two) b
+on a.one = b.one and a.two = b.two
+group by a.one, a.two, a.one
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select one as one_0, two, one as one_1
+from demo a
+join (select 1 as one, 2 as two) b
+on a.one = b.one and a.two = b.two
+group by a.one, a.two, a.one
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-5 is a root stage
+ Stage-2 depends on stages: Stage-5
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-5
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ b:_dummy_table
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ b:_dummy_table
+ TableScan
+ alias: _dummy_table
+ Row Limit Per Split: 1
+ Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ HashTable Sink Operator
+ keys:
+ 0 one (type: int), two (type: int)
+ 1 1 (type: int), 2 (type: int)
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:one:int, 1:two:int, 2:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
+ Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 1:int))
+ predicate: (one is not null and two is not null) (type: boolean)
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 one (type: int), two (type: int)
+ 1 1 (type: int), 2 (type: int)
+ Map Join Vectorization:
+ bigTableKeyExpressions: col 0:int, col 1:int
+ bigTableValueExpressions: col 0:int, col 1:int
+ className: VectorMapJoinOperator
+ native: false
+ nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true
+ nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 0:int, col 1:int
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: []
+ keys: _col0 (type: int), _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: hive.vectorized.execution.reducesink.new.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: one:int, two:int
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Local Work:
+ Map Reduce Local Work
+ Reduce Vectorization:
+ enabled: false
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
+ enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col2 (type: int), _col1 (type: int), _col2 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select one as one_0, two, one as one_1
+from demo a
+join (select 1 as one, 2 as two) b
+on a.one = b.one and a.two = b.two
+group by a.one, a.two, a.one
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Input: default@demo
+#### A masked pattern was here ####
+POSTHOOK: query: select one as one_0, two, one as one_1
+from demo a
+join (select 1 as one, 2 as two) b
+on a.one = b.one and a.two = b.two
+group by a.one, a.two, a.one
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Input: default@demo
+#### A masked pattern was here ####
+one_0 two one_1
+1 2 1