You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2016/10/13 10:50:49 UTC
[30/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN
display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)
http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out b/ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out
index 882e83d..5d28d22 100644
--- a/ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out
@@ -35,7 +35,7 @@ PREHOOK: query: -- EXPLAIN
-- round(1.0/0.0, 0), round(power(-1.0,0.5), 0)
-- FROM decimal_tbl_1_orc ORDER BY dec;
-EXPLAIN
+EXPLAIN VECTORIZATION EXPRESSION
SELECT
round(dec) as d, round(dec, 0), round(dec, 1), round(dec, 2), round(dec, 3),
round(dec, -1), round(dec, -2), round(dec, -3), round(dec, -4),
@@ -51,13 +51,17 @@ POSTHOOK: query: -- EXPLAIN
-- round(1.0/0.0, 0), round(power(-1.0,0.5), 0)
-- FROM decimal_tbl_1_orc ORDER BY dec;
-EXPLAIN
+EXPLAIN VECTORIZATION EXPRESSION
SELECT
round(dec) as d, round(dec, 0), round(dec, 1), round(dec, 2), round(dec, 3),
round(dec, -1), round(dec, -2), round(dec, -3), round(dec, -4),
round(dec, -5), round(dec, -6), round(dec, -7), round(dec, -8)
FROM decimal_tbl_1_orc ORDER BY d
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -75,26 +79,61 @@ STAGE PLANS:
TableScan
alias: decimal_tbl_1_orc
Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0]
Select Operator
expressions: round(dec) (type: decimal(21,0)), round(dec, 0) (type: decimal(21,0)), round(dec, 1) (type: decimal(22,1)), round(dec, 2) (type: decimal(23,2)), round(dec, 3) (type: decimal(24,3)), round(dec, -1) (type: decimal(21,0)), round(dec, -2) (type: decimal(21,0)), round(dec, -3) (type: decimal(21,0)), round(dec, -4) (type: decimal(21,0)), round(dec, -5) (type: decimal(21,0)), round(dec, -6) (type: decimal(21,0)), round(dec, -7) (type: decimal(21,0)), round(dec, -8) (type: decimal(21,0))
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
+ selectExpressions: FuncRoundDecimalToDecimal(col 0) -> 1:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 0) -> 2:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 1) -> 3:decimal(22,1), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 2) -> 4:decimal(23,2), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 3) -> 5:decimal(24,3), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -1) -> 6:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -2) -> 7:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -3) -> 8:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -4) -> 9:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -5) -> 10:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -6) -> 11:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -7)
-> 12:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -8) -> 13:decimal(21,0)
Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: decimal(21,0))
sort order: +
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: decimal(21,0)), _col2 (type: decimal(22,1)), _col3 (type: decimal(23,2)), _col4 (type: decimal(24,3)), _col5 (type: decimal(21,0)), _col6 (type: decimal(21,0)), _col7 (type: decimal(21,0)), _col8 (type: decimal(21,0)), _col9 (type: decimal(21,0)), _col10 (type: decimal(21,0)), _col11 (type: decimal(21,0)), _col12 (type: decimal(21,0))
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: decimal(21,0)), VALUE._col0 (type: decimal(21,0)), VALUE._col1 (type: decimal(22,1)), VALUE._col2 (type: decimal(23,2)), VALUE._col3 (type: decimal(24,3)), VALUE._col4 (type: decimal(21,0)), VALUE._col5 (type: decimal(21,0)), VALUE._col6 (type: decimal(21,0)), VALUE._col7 (type: decimal(21,0)), VALUE._col8 (type: decimal(21,0)), VALUE._col9 (type: decimal(21,0)), VALUE._col10 (type: decimal(21,0)), VALUE._col11 (type: decimal(21,0))
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -153,7 +192,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@decimal_tbl_2_orc
#### A masked pattern was here ####
125.315000000000000000 -125.315000000000000000
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT
round(pos) as p, round(pos, 0),
round(pos, 1), round(pos, 2), round(pos, 3), round(pos, 4),
@@ -163,7 +202,7 @@ SELECT
round(neg, -1), round(neg, -2), round(neg, -3), round(neg, -4)
FROM decimal_tbl_2_orc ORDER BY p
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT
round(pos) as p, round(pos, 0),
round(pos, 1), round(pos, 2), round(pos, 3), round(pos, 4),
@@ -173,6 +212,10 @@ SELECT
round(neg, -1), round(neg, -2), round(neg, -3), round(neg, -4)
FROM decimal_tbl_2_orc ORDER BY p
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -190,26 +233,61 @@ STAGE PLANS:
TableScan
alias: decimal_tbl_2_orc
Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1]
Select Operator
expressions: round(pos) (type: decimal(21,0)), round(pos, 0) (type: decimal(21,0)), round(pos, 1) (type: decimal(22,1)), round(pos, 2) (type: decimal(23,2)), round(pos, 3) (type: decimal(24,3)), round(pos, 4) (type: decimal(25,4)), round(pos, -1) (type: decimal(21,0)), round(pos, -2) (type: decimal(21,0)), round(pos, -3) (type: decimal(21,0)), round(pos, -4) (type: decimal(21,0)), round(neg) (type: decimal(21,0)), round(neg, 0) (type: decimal(21,0)), round(neg, 1) (type: decimal(22,1)), round(neg, 2) (type: decimal(23,2)), round(neg, 3) (type: decimal(24,3)), round(neg, 4) (type: decimal(25,4)), round(neg, -1) (type: decimal(21,0)), round(neg, -2) (type: decimal(21,0)), round(neg, -3) (type: decimal(21,0)), round(neg, -4) (type: decimal(21,0))
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
+ selectExpressions: FuncRoundDecimalToDecimal(col 0) -> 2:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 0) -> 3:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 1) -> 4:decimal(22,1), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 2) -> 5:decimal(23,2), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 3) -> 6:decimal(24,3), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 4) -> 7:decimal(25,4), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -1) -> 8:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -2) -> 9:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -3) -> 10:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -4) -> 11:decimal(21,0), FuncRoundDecimalToDecimal(col 1) -> 12:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces 0) -> 13:decimal(21,0), FuncRoundW
ithNumDigitsDecimalToDecimal(col 1, decimalPlaces 1) -> 14:decimal(22,1), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces 2) -> 15:decimal(23,2), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces 3) -> 16:decimal(24,3), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces 4) -> 17:decimal(25,4), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces -1) -> 18:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces -2) -> 19:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces -3) -> 20:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces -4) -> 21:decimal(21,0)
Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: decimal(21,0))
sort order: +
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: decimal(21,0)), _col2 (type: decimal(22,1)), _col3 (type: decimal(23,2)), _col4 (type: decimal(24,3)), _col5 (type: decimal(25,4)), _col6 (type: decimal(21,0)), _col7 (type: decimal(21,0)), _col8 (type: decimal(21,0)), _col9 (type: decimal(21,0)), _col10 (type: decimal(21,0)), _col11 (type: decimal(21,0)), _col12 (type: decimal(22,1)), _col13 (type: decimal(23,2)), _col14 (type: decimal(24,3)), _col15 (type: decimal(25,4)), _col16 (type: decimal(21,0)), _col17 (type: decimal(21,0)), _col18 (type: decimal(21,0)), _col19 (type: decimal(21,0))
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: decimal(21,0)), VALUE._col0 (type: decimal(21,0)), VALUE._col1 (type: decimal(22,1)), VALUE._col2 (type: decimal(23,2)), VALUE._col3 (type: decimal(24,3)), VALUE._col4 (type: decimal(25,4)), VALUE._col5 (type: decimal(21,0)), VALUE._col6 (type: decimal(21,0)), VALUE._col7 (type: decimal(21,0)), VALUE._col8 (type: decimal(21,0)), VALUE._col9 (type: decimal(21,0)), VALUE._col10 (type: decimal(21,0)), VALUE._col11 (type: decimal(22,1)), VALUE._col12 (type: decimal(23,2)), VALUE._col13 (type: decimal(24,3)), VALUE._col14 (type: decimal(25,4)), VALUE._col15 (type: decimal(21,0)), VALUE._col16 (type: decimal(21,0)), VALUE._col17 (type: decimal(21,0)), VALUE._col18 (type: decimal(21,0))
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -273,7 +351,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@decimal_tbl_3_orc
#### A masked pattern was here ####
3.141592653589793000
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT
round(dec, -15) as d, round(dec, -16),
round(dec, -13), round(dec, -14),
@@ -294,7 +372,7 @@ SELECT
round(dec, 15), round(dec, 16)
FROM decimal_tbl_3_orc ORDER BY d
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT
round(dec, -15) as d, round(dec, -16),
round(dec, -13), round(dec, -14),
@@ -315,6 +393,10 @@ SELECT
round(dec, 15), round(dec, 16)
FROM decimal_tbl_3_orc ORDER BY d
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -332,26 +414,61 @@ STAGE PLANS:
TableScan
alias: decimal_tbl_3_orc
Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0]
Select Operator
expressions: round(dec, -15) (type: decimal(21,0)), round(dec, -16) (type: decimal(21,0)), round(dec, -5) (type: decimal(21,0)), round(dec, -6) (type: decimal(21,0)), round(dec, -3) (type: decimal(21,0)), round(dec, -4) (type: decimal(21,0)), round(dec, -1) (type: decimal(21,0)), round(dec, -2) (type: decimal(21,0)), round(dec, 0) (type: decimal(21,0)), round(dec, 1) (type: decimal(22,1)), round(dec, 2) (type: decimal(23,2)), round(dec, 3) (type: decimal(24,3)), round(dec, -13) (type: decimal(21,0)), round(dec, 4) (type: decimal(25,4)), round(dec, 5) (type: decimal(26,5)), round(dec, 6) (type: decimal(27,6)), round(dec, 7) (type: decimal(28,7)), round(dec, 8) (type: decimal(29,8)), round(dec, 9) (type: decimal(30,9)), round(dec, 10) (type: decimal(31,10)), round(dec, 11) (type: decimal(32,11)), round(dec, 12) (type: decimal(33,12)), round(dec, 13) (type: decimal(34,13)), round(dec, -14) (type: decimal(21,0)), round(dec, 14) (type: decimal(35,14)), round(dec, 15)
(type: decimal(36,15)), round(dec, 16) (type: decimal(37,16)), round(dec, -11) (type: decimal(21,0)), round(dec, -12) (type: decimal(21,0)), round(dec, -9) (type: decimal(21,0)), round(dec, -10) (type: decimal(21,0)), round(dec, -7) (type: decimal(21,0)), round(dec, -8) (type: decimal(21,0))
outputColumnNames: _col0, _col1, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col2, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col3, _col31, _col32, _col33, _col4, _col5, _col6, _col7, _col8, _col9
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33]
+ selectExpressions: FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -15) -> 1:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -16) -> 2:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -5) -> 3:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -6) -> 4:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -3) -> 5:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -4) -> 6:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -1) -> 7:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -2) -> 8:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 0) -> 9:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 1) -> 10:decimal(22,1), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 2) -> 11:decimal(23,2), FuncRoundWithNumDigitsDecimal
ToDecimal(col 0, decimalPlaces 3) -> 12:decimal(24,3), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -13) -> 13:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 4) -> 14:decimal(25,4), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 5) -> 15:decimal(26,5), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 6) -> 16:decimal(27,6), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 7) -> 17:decimal(28,7), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 8) -> 18:decimal(29,8), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 9) -> 19:decimal(30,9), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 10) -> 20:decimal(31,10), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 11) -> 21:decimal(32,11), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 12) -> 22:decimal(33,12), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 13) -> 23:decimal(34,13), FuncRoun
dWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -14) -> 24:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 14) -> 25:decimal(35,14), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 15) -> 26:decimal(36,15), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 16) -> 27:decimal(37,16), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -11) -> 28:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -12) -> 29:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -9) -> 30:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -10) -> 31:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -7) -> 32:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -8) -> 33:decimal(21,0)
Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: decimal(21,0))
sort order: +
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: decimal(21,0)), _col2 (type: decimal(21,0)), _col3 (type: decimal(21,0)), _col4 (type: decimal(21,0)), _col5 (type: decimal(21,0)), _col6 (type: decimal(21,0)), _col7 (type: decimal(21,0)), _col8 (type: decimal(21,0)), _col9 (type: decimal(21,0)), _col10 (type: decimal(21,0)), _col11 (type: decimal(21,0)), _col12 (type: decimal(21,0)), _col13 (type: decimal(21,0)), _col14 (type: decimal(21,0)), _col15 (type: decimal(21,0)), _col16 (type: decimal(21,0)), _col17 (type: decimal(22,1)), _col18 (type: decimal(23,2)), _col19 (type: decimal(24,3)), _col20 (type: decimal(25,4)), _col21 (type: decimal(26,5)), _col22 (type: decimal(27,6)), _col23 (type: decimal(28,7)), _col24 (type: decimal(29,8)), _col25 (type: decimal(30,9)), _col26 (type: decimal(31,10)), _col27 (type: decimal(32,11)), _col28 (type: decimal(33,12)), _col29 (type: decimal(34,13)), _col31 (type: decimal(35,14)), _col32 (type: decimal(36,15)), _col33 (type: decimal(37,16))
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: decimal(21,0)), VALUE._col0 (type: decimal(21,0)), VALUE._col1 (type: decimal(21,0)), VALUE._col2 (type: decimal(21,0)), VALUE._col3 (type: decimal(21,0)), VALUE._col4 (type: decimal(21,0)), VALUE._col5 (type: decimal(21,0)), VALUE._col6 (type: decimal(21,0)), VALUE._col7 (type: decimal(21,0)), VALUE._col8 (type: decimal(21,0)), VALUE._col9 (type: decimal(21,0)), VALUE._col10 (type: decimal(21,0)), VALUE._col11 (type: decimal(21,0)), VALUE._col12 (type: decimal(21,0)), VALUE._col13 (type: decimal(21,0)), VALUE._col14 (type: decimal(21,0)), VALUE._col15 (type: decimal(21,0)), VALUE._col16 (type: decimal(22,1)), VALUE._col17 (type: decimal(23,2)), VALUE._col18 (type: decimal(24,3)), VALUE._col19 (type: decimal(25,4)), VALUE._col20 (type: decimal(26,5)), VALUE._col21 (type: decimal(27,6)), VALUE._col22 (type: decimal(28,7)), VALUE._col23 (type: decimal(29,8)), VALUE._col24 (type: decimal(30,9)), VALUE._col25 (type: decimal(31,10)),
VALUE._col26 (type: decimal(32,11)), VALUE._col27 (type: decimal(33,12)), VALUE._col28 (type: decimal(34,13)), VALUE._col28 (type: decimal(34,13)), VALUE._col29 (type: decimal(35,14)), VALUE._col30 (type: decimal(36,15)), VALUE._col31 (type: decimal(37,16))
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 29, 30, 31, 32]
Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -438,14 +555,18 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@decimal_tbl_4_orc
#### A masked pattern was here ####
1809242.315111134400000000 -1809242.315111134400000000
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT round(pos, 9) as p, round(neg, 9), round(1809242.3151111344BD, 9), round(-1809242.3151111344BD, 9)
FROM decimal_tbl_4_orc ORDER BY p
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT round(pos, 9) as p, round(neg, 9), round(1809242.3151111344BD, 9), round(-1809242.3151111344BD, 9)
FROM decimal_tbl_4_orc ORDER BY p
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -463,26 +584,62 @@ STAGE PLANS:
TableScan
alias: decimal_tbl_4_orc
Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1]
Select Operator
expressions: round(pos, 9) (type: decimal(30,9)), round(neg, 9) (type: decimal(30,9))
outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [2, 3]
+ selectExpressions: FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 9) -> 2:decimal(30,9), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces 9) -> 3:decimal(30,9)
Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: decimal(30,9))
sort order: +
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: decimal(30,9))
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: decimal(30,9)), VALUE._col0 (type: decimal(30,9)), 1809242.315111134 (type: decimal(17,9)), -1809242.315111134 (type: decimal(17,9))
outputColumnNames: _col0, _col1, _col2, _col3
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3]
+ selectExpressions: ConstantVectorExpression(val 1809242.315111134) -> 2:decimal(17,9), ConstantVectorExpression(val -1809242.315111134) -> 3:decimal(17,9)
Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_decimal_udf2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_udf2.q.out b/ql/src/test/results/clientpositive/llap/vector_decimal_udf2.q.out
index 5ea9f4d..90f3371 100644
--- a/ql/src/test/results/clientpositive/llap/vector_decimal_udf2.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_decimal_udf2.q.out
@@ -48,14 +48,18 @@ POSTHOOK: Input: default@decimal_udf2_txt
POSTHOOK: Output: default@decimal_udf2
POSTHOOK: Lineage: decimal_udf2.key SIMPLE [(decimal_udf2_txt)decimal_udf2_txt.FieldSchema(name:key, type:decimal(20,10), comment:null), ]
POSTHOOK: Lineage: decimal_udf2.value SIMPLE [(decimal_udf2_txt)decimal_udf2_txt.FieldSchema(name:value, type:int, comment:null), ]
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT acos(key), asin(key), atan(key), cos(key), sin(key), tan(key), radians(key)
FROM DECIMAL_UDF2 WHERE key = 10
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT acos(key), asin(key), atan(key), cos(key), sin(key), tan(key), radians(key)
FROM DECIMAL_UDF2 WHERE key = 10
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -70,15 +74,30 @@ STAGE PLANS:
TableScan
alias: decimal_udf2
Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1]
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterDecimalColEqualDecimalScalar(col 0, val 10) -> boolean
predicate: (key = 10) (type: boolean)
Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: NaN (type: double), NaN (type: double), 1.4711276743037347 (type: double), -0.8390715290764524 (type: double), -0.5440211108893698 (type: double), 0.6483608274590866 (type: double), 0.17453292519943295 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8]
+ selectExpressions: ConstantVectorExpression(val NaN) -> 2:double, ConstantVectorExpression(val NaN) -> 3:double, ConstantVectorExpression(val 1.4711276743037347) -> 4:double, ConstantVectorExpression(val -0.8390715290764524) -> 5:double, ConstantVectorExpression(val -0.5440211108893698) -> 6:double, ConstantVectorExpression(val 0.6483608274590866) -> 7:double, ConstantVectorExpression(val 0.17453292519943295) -> 8:double
Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -86,6 +105,14 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Stage: Stage-0
Fetch Operator
@@ -104,20 +131,24 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@decimal_udf2
#### A masked pattern was here ####
NaN NaN 1.4711276743037347 -0.8390715290764524 -0.5440211108893698 0.6483608274590866 0.17453292519943295
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT
exp(key), ln(key),
log(key), log(key, key), log(key, value), log(value, key),
log10(key), sqrt(key)
FROM DECIMAL_UDF2 WHERE key = 10
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT
exp(key), ln(key),
log(key), log(key, key), log(key, value), log(value, key),
log10(key), sqrt(key)
FROM DECIMAL_UDF2 WHERE key = 10
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -132,15 +163,30 @@ STAGE PLANS:
TableScan
alias: decimal_udf2
Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1]
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterDecimalColEqualDecimalScalar(col 0, val 10) -> boolean
predicate: (key = 10) (type: boolean)
Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: 22026.465794806718 (type: double), 2.302585092994046 (type: double), 2.302585092994046 (type: double), 1.0 (type: double), log(10, value) (type: double), log(value, 10) (type: double), 1.0 (type: double), 3.1622776601683795 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8, 9]
+ selectExpressions: ConstantVectorExpression(val 22026.465794806718) -> 2:double, ConstantVectorExpression(val 2.302585092994046) -> 3:double, ConstantVectorExpression(val 2.302585092994046) -> 4:double, ConstantVectorExpression(val 1.0) -> 5:double, FuncLogWithBaseLongToDouble(col 1) -> 6:double, VectorUDFAdaptor(log(value, 10)) -> 7:Double, ConstantVectorExpression(val 1.0) -> 8:double, ConstantVectorExpression(val 3.1622776601683795) -> 9:double
Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -148,6 +194,14 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: true
+ vectorized: true
Stage: Stage-0
Fetch Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_distinct_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_distinct_2.q.out b/ql/src/test/results/clientpositive/llap/vector_distinct_2.q.out
index 188c624..c63c486 100644
--- a/ql/src/test/results/clientpositive/llap/vector_distinct_2.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_distinct_2.q.out
@@ -105,12 +105,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem
POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ]
POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ]
POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ]
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
select distinct s, t from vectortab2korc
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
select distinct s, t from vectortab2korc
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -128,11 +132,24 @@ STAGE PLANS:
TableScan
alias: vectortab2korc
Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
Select Operator
expressions: t (type: tinyint), s (type: string)
outputColumnNames: t, s
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 8]
Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE
Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ vectorOutput: true
+ keyExpressions: col 0, col 8
+ native: false
+ projectedOutputColumns: []
keys: t (type: tinyint), s (type: string)
mode: hash
outputColumnNames: _col0, _col1
@@ -141,13 +158,38 @@ STAGE PLANS:
key expressions: _col0 (type: tinyint), _col1 (type: string)
sort order: ++
Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkMultiKeyOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ vectorOutput: true
+ keyExpressions: col 0, col 1
+ native: false
+ projectedOutputColumns: []
keys: KEY._col0 (type: tinyint), KEY._col1 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1
@@ -155,9 +197,16 @@ STAGE PLANS:
Select Operator
expressions: _col1 (type: string), _col0 (type: tinyint)
outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [1, 0]
Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_elt.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_elt.q.out b/ql/src/test/results/clientpositive/llap/vector_elt.q.out
index bb66867..44ba6de 100644
--- a/ql/src/test/results/clientpositive/llap/vector_elt.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_elt.q.out
@@ -1,29 +1,80 @@
-PREHOOK: query: EXPLAIN SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint)
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint)
FROM alltypesorc
WHERE ctinyint > 0 LIMIT 10
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint)
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint)
FROM alltypesorc
WHERE ctinyint > 0 LIMIT 10
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 935842 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
+ Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterLongColGreaterLongScalar(col 0, val 0) -> boolean
+ predicate: (ctinyint > 0) (type: boolean)
+ Statistics: Num rows: 4096 Data size: 312018 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: ((UDFToInteger(ctinyint) % 2) + 1) (type: int), cstring1 (type: string), cint (type: int), elt(((UDFToInteger(ctinyint) % 2) + 1), cstring1, cint) (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [13, 6, 2, 16]
+ selectExpressions: LongColAddLongScalar(col 12, val 1)(children: LongColModuloLongScalar(col 0, val 2)(children: col 0) -> 12:long) -> 13:long, VectorElt(columns [14, 6, 15])(children: LongColAddLongScalar(col 12, val 1)(children: LongColModuloLongScalar(col 0, val 2)(children: col 0) -> 12:long) -> 14:long, col 6, CastLongToString(col 2) -> 15:String) -> 16:string
+ Statistics: Num rows: 4096 Data size: 1069830 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 10
+ Limit Vectorization:
+ className: VectorLimitOperator
+ native: true
+ Statistics: Num rows: 10 Data size: 2664 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 10 Data size: 2664 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+
Stage: Stage-0
Fetch Operator
limit: 10
Processor Tree:
- TableScan
- alias: alltypesorc
- Filter Operator
- predicate: (ctinyint > 0) (type: boolean)
- Select Operator
- expressions: ((UDFToInteger(ctinyint) % 2) + 1) (type: int), cstring1 (type: string), cint (type: int), elt(((UDFToInteger(ctinyint) % 2) + 1), cstring1, cint) (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Limit
- Number of rows: 10
- ListSink
+ ListSink
PREHOOK: query: SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint)
FROM alltypesorc
@@ -47,7 +98,7 @@ POSTHOOK: Input: default@alltypesorc
1 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p
2 cvLH6Eat2yFsyy7p 528534767 528534767
1 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT elt(2, 'abc', 'defg'),
elt(3, 'aa', 'bb', 'cc', 'dd', 'ee', 'ff', 'gg'),
elt('1', 'abc', 'defg'),
@@ -60,7 +111,7 @@ SELECT elt(2, 'abc', 'defg'),
elt(3, 'abc', 'defg')
FROM alltypesorc LIMIT 1
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT elt(2, 'abc', 'defg'),
elt(3, 'aa', 'bb', 'cc', 'dd', 'ee', 'ff', 'gg'),
elt('1', 'abc', 'defg'),
@@ -73,22 +124,68 @@ SELECT elt(2, 'abc', 'defg'),
elt(3, 'abc', 'defg')
FROM alltypesorc LIMIT 1
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
+ Select Operator
+ expressions: 'defg' (type: string), 'cc' (type: string), 'abc' (type: string), '2' (type: string), '12345' (type: string), '123456789012' (type: string), '1.25' (type: string), '16.0' (type: string), null (type: string), null (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
+ selectExpressions: ConstantVectorExpression(val defg) -> 12:string, ConstantVectorExpression(val cc) -> 13:string, ConstantVectorExpression(val abc) -> 14:string, ConstantVectorExpression(val 2) -> 15:string, ConstantVectorExpression(val 12345) -> 16:string, ConstantVectorExpression(val 123456789012) -> 17:string, ConstantVectorExpression(val 1.25) -> 18:string, ConstantVectorExpression(val 16.0) -> 19:string, ConstantVectorExpression(val null) -> 20:string, ConstantVectorExpression(val null) -> 21:string
+ Statistics: Num rows: 12288 Data size: 8687784 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 1
+ Limit Vectorization:
+ className: VectorLimitOperator
+ native: true
+ Statistics: Num rows: 1 Data size: 875 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 1 Data size: 875 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+
Stage: Stage-0
Fetch Operator
limit: 1
Processor Tree:
- TableScan
- alias: alltypesorc
- Select Operator
- expressions: 'defg' (type: string), 'cc' (type: string), 'abc' (type: string), '2' (type: string), '12345' (type: string), '123456789012' (type: string), '1.25' (type: string), '16.0' (type: string), null (type: string), null (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
- Limit
- Number of rows: 1
- ListSink
+ ListSink
PREHOOK: query: SELECT elt(2, 'abc', 'defg'),
elt(3, 'aa', 'bb', 'cc', 'dd', 'ee', 'ff', 'gg'),
http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_groupby4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby4.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby4.q.out
index 1e24e81..258c00c 100644
--- a/ql/src/test/results/clientpositive/llap/vector_groupby4.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_groupby4.q.out
@@ -22,14 +22,18 @@ CREATE TABLE dest1(c1 STRING) STORED AS ORC
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@dest1
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
FROM srcorc
INSERT OVERWRITE TABLE dest1 SELECT substr(srcorc.key,1,1) GROUP BY substr(srcorc.key,1,1)
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
FROM srcorc
INSERT OVERWRITE TABLE dest1 SELECT substr(srcorc.key,1,1) GROUP BY substr(srcorc.key,1,1)
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-2 depends on stages: Stage-1
@@ -50,21 +54,55 @@ STAGE PLANS:
TableScan
alias: srcorc
Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1]
Select Operator
expressions: substr(key, 1, 1) (type: string)
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [2]
+ selectExpressions: StringSubstrColStartLen(col 0, start 0, length 1) -> 2:string
Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: rand() (type: double)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ vectorOutput: true
+ keyExpressions: col 0
+ native: false
+ projectedOutputColumns: []
keys: KEY._col0 (type: string)
mode: partial1
outputColumnNames: _col0
@@ -73,17 +111,37 @@ STAGE PLANS:
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkStringOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
Reducer 3
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ vectorOutput: true
+ keyExpressions: col 0
+ native: false
+ projectedOutputColumns: []
keys: KEY._col0 (type: string)
mode: final
outputColumnNames: _col0
Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_groupby6.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby6.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby6.q.out
index 758f70c..9ed40df 100644
--- a/ql/src/test/results/clientpositive/llap/vector_groupby6.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_groupby6.q.out
@@ -22,14 +22,18 @@ CREATE TABLE dest1(c1 STRING) STORED AS ORC
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@dest1
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
FROM srcorc
INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(srcorc.value,5,1)
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
FROM srcorc
INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(srcorc.value,5,1)
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-2 depends on stages: Stage-1
@@ -50,21 +54,55 @@ STAGE PLANS:
TableScan
alias: srcorc
Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1]
Select Operator
expressions: substr(value, 5, 1) (type: string)
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [2]
+ selectExpressions: StringSubstrColStartLen(col 1, start 4, length 1) -> 2:string
Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: rand() (type: double)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ vectorOutput: true
+ keyExpressions: col 0
+ native: false
+ projectedOutputColumns: []
keys: KEY._col0 (type: string)
mode: partial1
outputColumnNames: _col0
@@ -73,17 +111,37 @@ STAGE PLANS:
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkStringOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE
Reducer 3
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ vectorOutput: true
+ keyExpressions: col 0
+ native: false
+ projectedOutputColumns: []
keys: KEY._col0 (type: string)
mode: final
outputColumnNames: _col0
Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_groupby_3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_3.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_3.q.out
index fb5dfe6..c97863a 100644
--- a/ql/src/test/results/clientpositive/llap/vector_groupby_3.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_groupby_3.q.out
@@ -105,12 +105,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem
POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ]
POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ]
POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ]
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
select s, t, max(b) from vectortab2korc group by s, t
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
select s, t, max(b) from vectortab2korc group by s, t
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -128,12 +132,26 @@ STAGE PLANS:
TableScan
alias: vectortab2korc
Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
Select Operator
expressions: t (type: tinyint), s (type: string), b (type: bigint)
outputColumnNames: t, s, b
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 8, 3]
Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: max(b)
+ Group By Vectorization:
+ aggregators: VectorUDAFMaxLong(col 3) -> bigint
+ className: VectorGroupByOperator
+ vectorOutput: true
+ keyExpressions: col 0, col 8
+ native: false
+ projectedOutputColumns: [0]
keys: t (type: tinyint), s (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
@@ -142,15 +160,41 @@ STAGE PLANS:
key expressions: _col0 (type: tinyint), _col1 (type: string)
sort order: ++
Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkMultiKeyOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE
value expressions: _col2 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: max(VALUE._col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFMaxLong(col 2) -> bigint
+ className: VectorGroupByOperator
+ vectorOutput: true
+ keyExpressions: col 0, col 1
+ native: false
+ projectedOutputColumns: [0]
keys: KEY._col0 (type: tinyint), KEY._col1 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
@@ -158,9 +202,16 @@ STAGE PLANS:
Select Operator
expressions: _col1 (type: string), _col0 (type: tinyint), _col2 (type: bigint)
outputColumnNames: _col0, _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [1, 0, 2]
Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat