You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2016/10/13 10:50:25 UTC
[06/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN
display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)
http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/spark/vector_between_in.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vector_between_in.q.out b/ql/src/test/results/clientpositive/spark/vector_between_in.q.out
index fbb43c4..7d722d0 100644
--- a/ql/src/test/results/clientpositive/spark/vector_between_in.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_between_in.q.out
@@ -12,10 +12,14 @@ POSTHOOK: Lineage: decimal_date_test.cdate EXPRESSION [(alltypesorc)alltypesorc.
POSTHOOK: Lineage: decimal_date_test.cdecimal1 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ]
POSTHOOK: Lineage: decimal_date_test.cdecimal2 EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ]
POSTHOOK: Lineage: decimal_date_test.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ]
-PREHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) ORDER BY cdate
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -32,27 +36,65 @@ STAGE PLANS:
TableScan
alias: decimal_date_test
Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3]
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterLongColumnInList(col 3, values [-67, -171]) -> boolean
predicate: (cdate) IN (1969-10-26, 1969-07-14) (type: boolean)
Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cdate (type: date)
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [3]
Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: date)
sort order: +
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: date)
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0]
Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -65,10 +107,14 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE))
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE))
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE))
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdate NOT IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE), CAST("1970-01-21" AS DATE))
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -85,31 +131,77 @@ STAGE PLANS:
TableScan
alias: decimal_date_test
Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3]
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: SelectColumnIsFalse(col 4)(children: LongColumnInList(col 3, values [-67, -171, 20]) -> 4:boolean) -> boolean
predicate: (not (cdate) IN (1969-10-26, 1969-07-14, 1970-01-21)) (type: boolean)
Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE
Select Operator
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: []
Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
+ Group By Vectorization:
+ aggregators: VectorUDAFCountStar(*) -> bigint
+ className: VectorGroupByOperator
+ vectorOutput: true
+ native: false
+ projectedOutputColumns: [0]
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: bigint)
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFCountMerge(col 0) -> bigint
+ className: VectorGroupByOperator
+ vectorOutput: true
+ native: false
+ projectedOutputColumns: [0]
mode: mergepartial
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -122,10 +214,14 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) ORDER BY cdecimal1
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -142,27 +238,65 @@ STAGE PLANS:
TableScan
alias: decimal_date_test
Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3]
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterDecimalColumnInList(col 1, values [2365.8945945946, 881.0135135135, -3367.6517567568]) -> boolean
predicate: (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568) (type: boolean)
Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cdecimal1 (type: decimal(20,10))
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [1]
Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: decimal(20,10))
sort order: +
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: decimal(20,10))
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0]
Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -175,10 +309,14 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568)
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568)
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568)
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT IN (2365.8945945946, 881.0135135135, -3367.6517567568)
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -195,31 +333,77 @@ STAGE PLANS:
TableScan
alias: decimal_date_test
Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3]
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: SelectColumnIsFalse(col 4)(children: DecimalColumnInList(col 1, values [2365.8945945946, 881.0135135135, -3367.6517567568]) -> 4:boolean) -> boolean
predicate: (not (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568)) (type: boolean)
Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE
Select Operator
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: []
Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
+ Group By Vectorization:
+ aggregators: VectorUDAFCountStar(*) -> bigint
+ className: VectorGroupByOperator
+ vectorOutput: true
+ native: false
+ projectedOutputColumns: [0]
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: bigint)
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFCountMerge(col 0) -> bigint
+ className: VectorGroupByOperator
+ vectorOutput: true
+ native: false
+ projectedOutputColumns: [0]
mode: mergepartial
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -232,10 +416,14 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) ORDER BY cdate
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -252,27 +440,65 @@ STAGE PLANS:
TableScan
alias: decimal_date_test
Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3]
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterLongColumnBetween(col 3, left -2, right 1) -> boolean
predicate: cdate BETWEEN 1969-12-30 AND 1970-01-02 (type: boolean)
Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cdate (type: date)
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [3]
Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: date)
sort order: +
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: date)
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0]
Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -285,10 +511,14 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate FROM decimal_date_test WHERE cdate NOT BETWEEN CAST("1968-05-01" AS DATE) AND CAST("1971-09-01" AS DATE) ORDER BY cdate
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -305,27 +535,65 @@ STAGE PLANS:
TableScan
alias: decimal_date_test
Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3]
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterLongColumnNotBetween(col 3, left -610, right 608) -> boolean
predicate: cdate NOT BETWEEN 1968-05-01 AND 1971-09-01 (type: boolean)
Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cdate (type: date)
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [3]
Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: date)
sort order: +
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: date)
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0]
Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -338,10 +606,14 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdecimal1 FROM decimal_date_test WHERE cdecimal1 BETWEEN -20 AND 45.9918918919 ORDER BY cdecimal1
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -358,27 +630,65 @@ STAGE PLANS:
TableScan
alias: decimal_date_test
Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3]
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterDecimalColumnBetween(col 1, left -20, right 45.9918918919) -> boolean
predicate: cdecimal1 BETWEEN -20 AND 45.9918918919 (type: boolean)
Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cdecimal1 (type: decimal(20,10))
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [1]
Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: decimal(20,10))
sort order: +
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: decimal(20,10))
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0]
Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -391,10 +701,14 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT COUNT(*) FROM decimal_date_test WHERE cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -411,31 +725,77 @@ STAGE PLANS:
TableScan
alias: decimal_date_test
Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3]
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterDecimalColumnNotBetween(col 1, left -2000, right 4390.1351351351) -> boolean
predicate: cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 (type: boolean)
Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE
Select Operator
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: []
Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
+ Group By Vectorization:
+ aggregators: VectorUDAFCountStar(*) -> bigint
+ className: VectorGroupByOperator
+ vectorOutput: true
+ native: false
+ projectedOutputColumns: [0]
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: bigint)
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFCountMerge(col 0) -> bigint
+ className: VectorGroupByOperator
+ vectorOutput: true
+ native: false
+ projectedOutputColumns: [0]
mode: mergepartial
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -695,12 +1055,16 @@ POSTHOOK: Input: default@decimal_date_test
6172
PREHOOK: query: -- projections
-EXPLAIN SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0
+EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0
PREHOOK: type: QUERY
POSTHOOK: query: -- projections
-EXPLAIN SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0
+EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdate IN (CAST("1969-10-26" AS DATE), CAST("1969-07-14" AS DATE)) as c0 FROM decimal_date_test) tab GROUP BY c0
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -717,12 +1081,27 @@ STAGE PLANS:
TableScan
alias: decimal_date_test
Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3]
Select Operator
expressions: (cdate) IN (1969-10-26, 1969-07-14) (type: boolean)
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [4]
+ selectExpressions: LongColumnInList(col 3, values [-67, -171]) -> 4:boolean
Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
+ Group By Vectorization:
+ aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 5:long) -> bigint
+ className: VectorGroupByOperator
+ vectorOutput: true
+ keyExpressions: col 4
+ native: false
+ projectedOutputColumns: [0]
keys: _col0 (type: boolean)
mode: hash
outputColumnNames: _col0, _col1
@@ -731,20 +1110,50 @@ STAGE PLANS:
key expressions: _col0 (type: boolean)
sort order: +
Map-reduce partition columns: _col0 (type: boolean)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint)
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFCountMerge(col 1) -> bigint
+ className: VectorGroupByOperator
+ vectorOutput: true
+ keyExpressions: col 0
+ native: false
+ projectedOutputColumns: [0]
keys: KEY._col0 (type: boolean)
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -757,10 +1166,14 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdecimal1 IN (2365.8945945946, 881.0135135135, -3367.6517567568) as c0 FROM decimal_date_test) tab GROUP BY c0
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -777,12 +1190,27 @@ STAGE PLANS:
TableScan
alias: decimal_date_test
Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3]
Select Operator
expressions: (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568) (type: boolean)
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [4]
+ selectExpressions: DecimalColumnInList(col 1, values [2365.8945945946, 881.0135135135, -3367.6517567568]) -> 4:boolean
Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
+ Group By Vectorization:
+ aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 5:long) -> bigint
+ className: VectorGroupByOperator
+ vectorOutput: true
+ keyExpressions: col 4
+ native: false
+ projectedOutputColumns: [0]
keys: _col0 (type: boolean)
mode: hash
outputColumnNames: _col0, _col1
@@ -791,20 +1219,50 @@ STAGE PLANS:
key expressions: _col0 (type: boolean)
sort order: +
Map-reduce partition columns: _col0 (type: boolean)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint)
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFCountMerge(col 1) -> bigint
+ className: VectorGroupByOperator
+ vectorOutput: true
+ keyExpressions: col 0
+ native: false
+ projectedOutputColumns: [0]
keys: KEY._col0 (type: boolean)
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -817,10 +1275,14 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdate BETWEEN CAST("1969-12-30" AS DATE) AND CAST("1970-01-02" AS DATE) as c0 FROM decimal_date_test) tab GROUP BY c0
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -837,12 +1299,27 @@ STAGE PLANS:
TableScan
alias: decimal_date_test
Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3]
Select Operator
expressions: cdate BETWEEN 1969-12-30 AND 1970-01-02 (type: boolean)
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [4]
+ selectExpressions: VectorUDFAdaptor(cdate BETWEEN 1969-12-30 AND 1970-01-02) -> 4:Long
Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
+ Group By Vectorization:
+ aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 5:long) -> bigint
+ className: VectorGroupByOperator
+ vectorOutput: true
+ keyExpressions: col 4
+ native: false
+ projectedOutputColumns: [0]
keys: _col0 (type: boolean)
mode: hash
outputColumnNames: _col0, _col1
@@ -851,20 +1328,50 @@ STAGE PLANS:
key expressions: _col0 (type: boolean)
sort order: +
Map-reduce partition columns: _col0 (type: boolean)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint)
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: true
+ vectorized: true
Reducer 2
Execution mode: vectorized
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFCountMerge(col 1) -> bigint
+ className: VectorGroupByOperator
+ vectorOutput: true
+ keyExpressions: col 0
+ native: false
+ projectedOutputColumns: [0]
keys: KEY._col0 (type: boolean)
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -877,10 +1384,14 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT c0, count(1) from (SELECT cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 as c0 FROM decimal_date_test) tab GROUP BY c0
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -897,12 +1408,27 @@ STAGE PLANS:
TableScan
alias: decimal_date_test
Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3]
Select Operator
expressions: cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 (type: boolean)
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [4]
+ selectExpressions: VectorUDFAdaptor(cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351) -> 4:Long
Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
+ Group By Vectorization:
+ aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 5:long) -> bigint
+ className: VectorGroupByOperator
+ vectorOutput: true
+ keyExpressions: col 4
+ native: false
+ projectedOutputColumns: [0]
keys: _col0 (type: boolean)
mode: hash
outputColumnNames: _col0, _col1
@@ -911,20 +1437,50 @@ STAGE PLANS:
key expressions: _col0 (type: boolean)
sort order: +
Map-reduce partition columns: _col0 (type: boolean)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 12288 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint)
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: true
+ vectorized: true
Reducer 2
Execution mode: vectorized
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFCountMerge(col 1) -> bigint
+ className: VectorGroupByOperator
+ vectorOutput: true
+ keyExpressions: col 0
+ native: false
+ projectedOutputColumns: [0]
keys: KEY._col0 (type: boolean)
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 6144 Data size: 1233808 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out b/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out
index 424a2c9..bf6c494 100644
--- a/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out
@@ -97,20 +97,24 @@ POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:s
POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ]
POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ]
POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ]
-PREHOOK: query: EXPLAIN SELECT
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT
i,
AVG(CAST(50 AS INT)) AS `avg_int_ok`,
AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`,
AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok`
FROM over1korc GROUP BY i ORDER BY i LIMIT 10
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT
i,
AVG(CAST(50 AS INT)) AS `avg_int_ok`,
AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`,
AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok`
FROM over1korc GROUP BY i ORDER BY i LIMIT 10
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -128,12 +132,27 @@ STAGE PLANS:
TableScan
alias: over1korc
Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
Select Operator
expressions: i (type: int)
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [2]
Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: avg(50), avg(50.0), avg(50)
+ Group By Vectorization:
+ aggregators: VectorUDAFAvgLong(ConstantVectorExpression(val 50) -> 11:long) -> struct<count:bigint,sum:double>, VectorUDAFAvgDouble(ConstantVectorExpression(val 50.0) -> 12:double) -> struct<count:bigint,sum:double>, VectorUDAFAvgDecimal(ConstantVectorExpression(val 50) -> 13:decimal(10,0)) -> struct<count:bigint,sum:decimal(20,0)>
+ className: VectorGroupByOperator
+ vectorOutput: false
+ keyExpressions: col 2
+ native: false
+ projectedOutputColumns: [0, 1, 2]
+ vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(ConstantVectorExpression(val 50) -> 11:long) -> struct<count:bigint,sum:double> output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDouble(ConstantVectorExpression(val 50.0) -> 12:double) -> struct<count:bigint,sum:double> output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDecimal(ConstantVectorExpression(val 50) -> 13:decimal(10,0)) -> struct<count:bigint,sum:decimal(20,0)> output type STRUCT requires PRIMITIVE IS false
keys: _col0 (type: int)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
@@ -146,7 +165,20 @@ STAGE PLANS:
TopN Hash Memory Usage: 0.1
value expressions: _col1 (type: struct<count:bigint,sum:double,input:int>), _col2 (type: struct<count:bigint,sum:double,input:double>), _col3 (type: struct<count:bigint,sum:decimal(12,0),input:decimal(10,0)>)
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: false
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct<count:bigint,sum:double,input:int> of Column[VALUE._col0] not supported
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2)
@@ -162,16 +194,33 @@ STAGE PLANS:
value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4))
Reducer 3
Execution mode: vectorized
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: decimal(14,4))
outputColumnNames: _col0, _col1, _col2, _col3
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3]
Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 10
+ Limit Vectorization:
+ className: VectorLimitOperator
+ native: true
Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/spark/vector_char_4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vector_char_4.q.out b/ql/src/test/results/clientpositive/spark/vector_char_4.q.out
index 3e551bb..943a4b1 100644
--- a/ql/src/test/results/clientpositive/spark/vector_char_4.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_char_4.q.out
@@ -121,12 +121,16 @@ POSTHOOK: query: create table char_lazy_binary_columnar(ct char(10), csi char(10
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@char_lazy_binary_columnar
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -142,12 +146,23 @@ STAGE PLANS:
TableScan
alias: vectortab2korc
Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
Select Operator
expressions: CAST( t AS CHAR(10) (type: char(10)), CAST( si AS CHAR(10) (type: char(10)), CAST( i AS CHAR(20) (type: char(20)), CAST( b AS CHAR(30) (type: char(30)), CAST( f AS CHAR(20) (type: char(20)), CAST( d AS CHAR(20) (type: char(20)), CAST( s AS CHAR(50) (type: char(50))
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [13, 14, 15, 16, 17, 18, 19]
+ selectExpressions: CastLongToChar(col 0, maxLength 10) -> 13:Char, CastLongToChar(col 1, maxLength 10) -> 14:Char, CastLongToChar(col 2, maxLength 20) -> 15:Char, CastLongToChar(col 3, maxLength 30) -> 16:Char, VectorUDFAdaptor(CAST( f AS CHAR(20)) -> 17:char(20), VectorUDFAdaptor(CAST( d AS CHAR(20)) -> 18:char(20), CastStringGroupToChar(col 8, maxLength 50) -> 19:Char
Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
@@ -155,6 +170,14 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe
name: default.char_lazy_binary_columnar
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: true
+ vectorized: true
Stage: Stage-0
Move Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out b/ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out
index 73272fb..4322dc7 100644
--- a/ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out
@@ -1231,14 +1231,18 @@ POSTHOOK: Lineage: web_sales PARTITION(ws_web_site_sk=9).ws_web_page_sk SIMPLE [
POSTHOOK: Lineage: web_sales PARTITION(ws_web_site_sk=9).ws_wholesale_cost SIMPLE [(web_sales_txt)web_sales_txt.FieldSchema(name:ws_wholesale_cost, type:decimal(7,2), comment:null), ]
PREHOOK: query: ------------------------------------------------------------------------------------------
-explain
+explain vectorization expression
select count(distinct ws_order_number) from web_sales
PREHOOK: type: QUERY
POSTHOOK: query: ------------------------------------------------------------------------------------------
-explain
+explain vectorization expression
select count(distinct ws_order_number) from web_sales
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -1256,11 +1260,24 @@ STAGE PLANS:
TableScan
alias: web_sales
Statistics: Num rows: 2000 Data size: 3504000 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33]
Select Operator
expressions: ws_order_number (type: int)
outputColumnNames: ws_order_number
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [16]
Statistics: Num rows: 2000 Data size: 3504000 Basic stats: COMPLETE Column stats: NONE
Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ vectorOutput: true
+ keyExpressions: col 16
+ native: false
+ projectedOutputColumns: []
keys: ws_order_number (type: int)
mode: hash
outputColumnNames: _col0
@@ -1269,35 +1286,88 @@ STAGE PLANS:
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 2000 Data size: 3504000 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ vectorOutput: true
+ keyExpressions: col 0
+ native: false
+ projectedOutputColumns: []
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0
Statistics: Num rows: 1000 Data size: 1752000 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(_col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFCount(col 0) -> bigint
+ className: VectorGroupByOperator
+ vectorOutput: true
+ native: false
+ projectedOutputColumns: [0]
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: bigint)
Reducer 3
Execution mode: vectorized
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFCountMerge(col 0) -> bigint
+ className: VectorGroupByOperator
+ vectorOutput: true
+ native: false
+ projectedOutputColumns: [0]
mode: mergepartial
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat