You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2018/02/21 06:34:17 UTC
[1/6] hive git commit: HIVE-18756: Vectorization: VectorUDAFVarFinal
produces Wrong Results (Matt McCline, reviewed by Sergey Shelukhin)
Repository: hive
Updated Branches:
refs/heads/master 3df6bc28b -> 8975924ec
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out b/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out
index 6b63764..9683efa 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out
@@ -140,40 +140,23 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
Reducer 2
- Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: avg(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_samp(VALUE._col3), var_samp(VALUE._col4), avg(VALUE._col5), stddev_samp(VALUE._col6), min(VALUE._col7), count(VALUE._col8)
- Group By Vectorization:
- aggregators: VectorUDAFAvgFinal(col 0:struct<count:bigint,sum:double,input:int>) -> double, VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFVarFinal(col 2:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFVarFinal(col 3:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFVarFinal(col 4:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_samp, VectorUDAFAvgFinal(col 5:struct<count:bigint,sum:double,input:float>) -> double, VectorUDAFVarFinal(col 6:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFMinLong(col 7:tinyint) -> tinyint, VectorUDAFCountMerge(col 8:bigint) -> bigint
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- native: false
- vectorProcessingMode: GLOBAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8]
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: double), (_col0 + -3728.0) (type: double), (- (_col0 + -3728.0)) (type: double), (- (- (_col0 + -3728.0))) (type: double), ((- (- (_col0 + -3728.0))) * (_col0 + -3728.0)) (type: double), _col1 (type: double), (- _col0) (type: double), _col2 (type: double), (((- (- (_col0 + -3728.0))) * (_col0 + -3728.0)) * (- (- (_col0 + -3728.0)))) (type: double), _col3 (type: double), (- _col2) (type: double), (_col2 - (- (- (_col0 + -3728.0)))) (type: double), ((_col2 - (- (- (_col0 + -3728.0)))) * _col2) (type: double), _col4 (type: double), _col5 (type: double), (10.175 - _col4) (type: double), (- (10.175 - _col4)) (type: double), ((- _col2) / -563.0) (type: double), _col6 (type: double), (- ((- _col2) / -563.0)) (type: double), (_col0 / _col1) (type: double), _col7 (type: tinyint), _col8 (type: bigint), (UDFToDouble(_col7) / ((- _col2) / -563.0)) (type: double), (- (_col0 / _col1)) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 9, 11, 10, 14, 1, 12, 2, 15, 3, 13, 17, 16, 4, 5, 18, 20, 21, 6, 19, 22, 7, 8, 24, 25]
- selectExpressions: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 9:double, DoubleColUnaryMinus(col 10:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 10:double) -> 11:double, DoubleColUnaryMinus(col 12:double)(children: DoubleColUnaryMinus(col 10:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 10:double) -> 12:double) -> 10:double, DoubleColMultiplyDoubleColumn(col 12:double, col 13:double)(children: DoubleColUnaryMinus(col 13:double)(children: DoubleColUnaryMinus(col 12:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 12:double) -> 13:double) -> 12:double, DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 13:double) -> 14:double, DoubleColUnaryMinus(col 0:double) -> 12:double, DoubleColMultiplyDoubleColumn(col 16:double, col 13:double)(children: DoubleColMultiplyDoubleColumn(col 13:double, col 15:double)(children: DoubleColUnaryMinus(col 15:double)(children: DoubleColUnaryM
inus(col 13:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 13:double) -> 15:double) -> 13:double, DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 15:double) -> 16:double, DoubleColUnaryMinus(col 15:double)(children: DoubleColUnaryMinus(col 13:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 13:double) -> 15:double) -> 13:double) -> 15:double, DoubleColUnaryMinus(col 2:double) -> 13:double, DoubleColSubtractDoubleColumn(col 2:double, col 16:double)(children: DoubleColUnaryMinus(col 17:double)(children: DoubleColUnaryMinus(col 16:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 16:double) -> 17:double) -> 16:double) -> 17:double, DoubleColMultiplyDoubleColumn(col 18:double, col 2:double)(children: DoubleColSubtractDoubleColumn(col 2:double, col 16:double)(children: DoubleColUnaryMinus(col 18:double)(children: DoubleColUnaryMinus(col 16:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0)
-> 16:double) -> 18:double) -> 16:double) -> 18:double) -> 16:double, DoubleScalarSubtractDoubleColumn(val 10.175, col 4:double) -> 18:double, DoubleColUnaryMinus(col 19:double)(children: DoubleScalarSubtractDoubleColumn(val 10.175, col 4:double) -> 19:double) -> 20:double, DoubleColDivideDoubleScalar(col 19:double, val -563.0)(children: DoubleColUnaryMinus(col 2:double) -> 19:double) -> 21:double, DoubleColUnaryMinus(col 22:double)(children: DoubleColDivideDoubleScalar(col 19:double, val -563.0)(children: DoubleColUnaryMinus(col 2:double) -> 19:double) -> 22:double) -> 19:double, DoubleColDivideDoubleColumn(col 0:double, col 1:double) -> 22:double, DoubleColDivideDoubleColumn(col 23:double, col 25:double)(children: CastLongToDouble(col 7:tinyint) -> 23:double, DoubleColDivideDoubleScalar(col 24:double, val -563.0)(children: DoubleColUnaryMinus(col 2:double) -> 24:double) -> 25:double) -> 24:double, DoubleColUnaryMinus(col 23:double)(children: DoubleColDivideDoubleColumn(col 0:doubl
e, col 1:double) -> 23:double) -> 25:double
Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -399,40 +382,23 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
Reducer 2
- Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "var_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: max(VALUE._col0), var_pop(VALUE._col1), stddev_pop(VALUE._col2), max(VALUE._col3), avg(VALUE._col4), min(VALUE._col5), min(VALUE._col6), stddev_samp(VALUE._col7), var_samp(VALUE._col8)
- Group By Vectorization:
- aggregators: VectorUDAFMaxLong(col 0:int) -> int, VectorUDAFVarFinal(col 1:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_pop, VectorUDAFVarFinal(col 2:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFMaxDouble(col 3:double) -> double, VectorUDAFAvgFinal(col 4:struct<count:bigint,sum:double,input:tinyint>) -> double, VectorUDAFMinLong(col 5:int) -> int, VectorUDAFMinDouble(col 6:double) -> double, VectorUDAFVarFinal(col 7:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFVarFinal(col 8:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_samp
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- native: false
- vectorProcessingMode: GLOBAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8]
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Statistics: Num rows: 1 Data size: 420 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: int), (UDFToDouble(_col0) / -3728.0) (type: double), (_col0 * -3728) (type: int), _col1 (type: double), (- (_col0 * -3728)) (type: int), _col2 (type: double), (-563 % (_col0 * -3728)) (type: int), (_col1 / _col2) (type: double), (- _col2) (type: double), _col3 (type: double), _col4 (type: double), (_col2 - 10.175) (type: double), _col5 (type: int), (UDFToDouble((_col0 * -3728)) % (_col2 - 10.175)) (type: double), (- _col3) (type: double), _col6 (type: double), (_col3 % -26.28) (type: double), _col7 (type: double), (- (UDFToDouble(_col0) / -3728.0)) (type: double), ((- (_col0 * -3728)) % (-563 % (_col0 * -3728))) (type: int), ((UDFToDouble(_col0) / -3728.0) - _col4) (type: double), (- (_col0 * -3728)) (type: int), _col8 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 10, 11, 1, 13, 2, 14, 9, 15, 3, 4, 16, 5, 19, 17, 6, 18, 7, 20, 12, 21, 23, 8]
- selectExpressions: DoubleColDivideDoubleScalar(col 9:double, val -3728.0)(children: CastLongToDouble(col 0:int) -> 9:double) -> 10:double, LongColMultiplyLongScalar(col 0:int, val -3728) -> 11:int, LongColUnaryMinus(col 12:int)(children: LongColMultiplyLongScalar(col 0:int, val -3728) -> 12:int) -> 13:int, LongScalarModuloLongColumn(val -563, col 12:int)(children: LongColMultiplyLongScalar(col 0:int, val -3728) -> 12:int) -> 14:int, DoubleColDivideDoubleColumn(col 1:double, col 2:double) -> 9:double, DoubleColUnaryMinus(col 2:double) -> 15:double, DoubleColSubtractDoubleScalar(col 2:double, val 10.175) -> 16:double, DoubleColModuloDoubleColumn(col 17:double, col 18:double)(children: CastLongToDouble(col 12:int)(children: LongColMultiplyLongScalar(col 0:int, val -3728) -> 12:int) -> 17:double, DoubleColSubtractDoubleScalar(col 2:double, val 10.175) -> 18:double) -> 19:double, DoubleColUnaryMinus(col 3:double) -> 17:double, DoubleColModuloDoubleScalar(col 3:doubl
e, val -26.28) -> 18:double, DoubleColUnaryMinus(col 21:double)(children: DoubleColDivideDoubleScalar(col 20:double, val -3728.0)(children: CastLongToDouble(col 0:int) -> 20:double) -> 21:double) -> 20:double, LongColModuloLongColumn(col 22:int, col 23:int)(children: LongColUnaryMinus(col 12:int)(children: LongColMultiplyLongScalar(col 0:int, val -3728) -> 12:int) -> 22:int, LongScalarModuloLongColumn(val -563, col 12:int)(children: LongColMultiplyLongScalar(col 0:int, val -3728) -> 12:int) -> 23:int) -> 12:int, DoubleColSubtractDoubleColumn(col 24:double, col 4:double)(children: DoubleColDivideDoubleScalar(col 21:double, val -3728.0)(children: CastLongToDouble(col 0:int) -> 21:double) -> 24:double) -> 21:double, LongColUnaryMinus(col 22:int)(children: LongColMultiplyLongScalar(col 0:int, val -3728) -> 22:int) -> 23:int
Statistics: Num rows: 1 Data size: 420 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 1 Data size: 420 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -515,7 +481,7 @@ WHERE (((cbigint <= 197)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@alltypesorc
#### A masked pattern was here ####
--20301111 5445.576984978541 -1626869520 7.9684972882908944E16 1626869520 NULL -563 NULL NULL NULL -8.935323383084578 NULL -1069736047 NULL NULL NULL NULL NULL -5445.576984978541 -58 5454.512308361625 1626869520 7.2647256545687792E16
+-20301111 5445.576984978541 -1626869520 7.9684972882908944E16 1626869520 NULL -563 NULL NULL NULL -8.935323383084578 NULL -1069736047 NULL NULL NULL NULL NULL -5445.576984978541 511 5454.512308361625 1626869520 7.2647256545687792E16
PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT VAR_POP(cbigint),
(-(VAR_POP(cbigint))),
@@ -650,40 +616,23 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
Reducer 2
- Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "var_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: var_pop(VALUE._col0), count(VALUE._col1), max(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), stddev_samp(VALUE._col5), count(VALUE._col6), avg(VALUE._col7)
- Group By Vectorization:
- aggregators: VectorUDAFVarFinal(col 0:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_pop, VectorUDAFCountMerge(col 1:bigint) -> bigint, VectorUDAFMaxLong(col 2:tinyint) -> tinyint, VectorUDAFVarFinal(col 3:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFMaxLong(col 4:int) -> int, VectorUDAFVarFinal(col 5:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFCountMerge(col 6:bigint) -> bigint, VectorUDAFAvgFinal(col 7:struct<count:bigint,sum:double,input:tinyint>) -> double
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- native: false
- vectorProcessingMode: GLOBAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7]
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: double), (- _col0) (type: double), (_col0 - (- _col0)) (type: double), _col1 (type: bigint), (CAST( _col1 AS decimal(19,0)) % 79.553) (type: decimal(5,3)), _col2 (type: tinyint), (UDFToDouble(_col1) - (- _col0)) (type: double), (- (- _col0)) (type: double), (-1.0 % (- _col0)) (type: double), _col1 (type: bigint), (- _col1) (type: bigint), _col3 (type: double), (- (- (- _col0))) (type: double), (762 * (- _col1)) (type: bigint), _col4 (type: int), (UDFToLong(_col2) + (762 * (- _col1))) (type: bigint), ((- _col0) + UDFToDouble(_col4)) (type: double), _col5 (type: double), ((- _col1) % _col1) (type: bigint), _col6 (type: bigint), _col7 (type: double), (-3728 % (UDFToLong(_col2) + (762 * (- _col1)))) (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 8, 10, 1, 12, 2, 14, 13, 15, 1, 16, 3, 9, 19, 4, 18, 22, 5, 23, 6, 7, 24]
- selectExpressions: DoubleColUnaryMinus(col 0:double) -> 8:double, DoubleColSubtractDoubleColumn(col 0:double, col 9:double)(children: DoubleColUnaryMinus(col 0:double) -> 9:double) -> 10:double, DecimalColModuloDecimalScalar(col 11:decimal(19,0), val 79.553)(children: CastLongToDecimal(col 1:bigint) -> 11:decimal(19,0)) -> 12:decimal(5,3), DoubleColSubtractDoubleColumn(col 9:double, col 13:double)(children: CastLongToDouble(col 1:bigint) -> 9:double, DoubleColUnaryMinus(col 0:double) -> 13:double) -> 14:double, DoubleColUnaryMinus(col 9:double)(children: DoubleColUnaryMinus(col 0:double) -> 9:double) -> 13:double, DoubleScalarModuloDoubleColumn(val -1.0, col 9:double)(children: DoubleColUnaryMinus(col 0:double) -> 9:double) -> 15:double, LongColUnaryMinus(col 1:bigint) -> 16:bigint, DoubleColUnaryMinus(col 17:double)(children: DoubleColUnaryMinus(col 9:double)(children: DoubleColUnaryMinus(col 0:double) -> 9:double) -> 17:double) -> 9:double, LongScalarMultiply
LongColumn(val 762, col 18:bigint)(children: LongColUnaryMinus(col 1:bigint) -> 18:bigint) -> 19:bigint, LongColAddLongColumn(col 2:bigint, col 20:bigint)(children: col 2:tinyint, LongScalarMultiplyLongColumn(val 762, col 18:bigint)(children: LongColUnaryMinus(col 1:bigint) -> 18:bigint) -> 20:bigint) -> 18:bigint, DoubleColAddDoubleColumn(col 17:double, col 21:double)(children: DoubleColUnaryMinus(col 0:double) -> 17:double, CastLongToDouble(col 4:int) -> 21:double) -> 22:double, LongColModuloLongColumn(col 20:bigint, col 1:bigint)(children: LongColUnaryMinus(col 1:bigint) -> 20:bigint) -> 23:bigint, LongScalarModuloLongColumn(val -3728, col 20:bigint)(children: LongColAddLongColumn(col 2:bigint, col 24:bigint)(children: col 2:tinyint, LongScalarMultiplyLongColumn(val 762, col 20:bigint)(children: LongColUnaryMinus(col 1:bigint) -> 20:bigint) -> 24:bigint) -> 20:bigint) -> 24:bigint
Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -880,40 +829,23 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
Reducer 2
- Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: avg(VALUE._col0), max(VALUE._col1), stddev_samp(VALUE._col2), var_pop(VALUE._col3), var_pop(VALUE._col4), max(VALUE._col5)
- Group By Vectorization:
- aggregators: VectorUDAFAvgFinal(col 0:struct<count:bigint,sum:double,input:tinyint>) -> double, VectorUDAFMaxLong(col 1:bigint) -> bigint, VectorUDAFVarFinal(col 2:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFVarFinal(col 3:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_pop, VectorUDAFVarFinal(col 4:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_pop, VectorUDAFMaxDouble(col 5:float) -> float
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- native: false
- vectorProcessingMode: GLOBAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: double), (_col0 + 6981.0) (type: double), ((_col0 + 6981.0) + _col0) (type: double), _col1 (type: bigint), (((_col0 + 6981.0) + _col0) / _col0) (type: double), (- (_col0 + 6981.0)) (type: double), _col2 (type: double), (_col0 % (- (_col0 + 6981.0))) (type: double), _col3 (type: double), _col4 (type: double), (- _col1) (type: bigint), (UDFToDouble((- _col1)) / _col2) (type: double), _col5 (type: float), (_col4 * -26.28) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 6, 8, 1, 7, 10, 2, 9, 3, 4, 12, 14, 5, 11]
- selectExpressions: DoubleColAddDoubleScalar(col 0:double, val 6981.0) -> 6:double, DoubleColAddDoubleColumn(col 7:double, col 0:double)(children: DoubleColAddDoubleScalar(col 0:double, val 6981.0) -> 7:double) -> 8:double, DoubleColDivideDoubleColumn(col 9:double, col 0:double)(children: DoubleColAddDoubleColumn(col 7:double, col 0:double)(children: DoubleColAddDoubleScalar(col 0:double, val 6981.0) -> 7:double) -> 9:double) -> 7:double, DoubleColUnaryMinus(col 9:double)(children: DoubleColAddDoubleScalar(col 0:double, val 6981.0) -> 9:double) -> 10:double, DoubleColModuloDoubleColumn(col 0:double, col 11:double)(children: DoubleColUnaryMinus(col 9:double)(children: DoubleColAddDoubleScalar(col 0:double, val 6981.0) -> 9:double) -> 11:double) -> 9:double, LongColUnaryMinus(col 1:bigint) -> 12:bigint, DoubleColDivideDoubleColumn(col 11:double, col 2:double)(children: CastLongToDouble(col 13:bigint)(children: LongColUnaryMinus(col 1:bigint) -> 13:bigint) -> 11:do
uble) -> 14:double, DoubleColMultiplyDoubleScalar(col 4:double, val -26.28) -> 11:double
Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -2207,24 +2139,14 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
Reducer 2
- Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: stddev_samp(VALUE._col0), sum(VALUE._col1), var_pop(VALUE._col2), count(VALUE._col3)
- Group By Vectorization:
- aggregators: VectorUDAFVarFinal(col 1:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFSumLong(col 2:bigint) -> bigint, VectorUDAFVarFinal(col 3:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_pop, VectorUDAFCountMerge(col 4:bigint) -> bigint
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:smallint
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0, 1, 2, 3]
keys: KEY._col0 (type: smallint)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4
@@ -2232,19 +2154,10 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: smallint), (UDFToInteger(_col0) % -75) (type: int), _col1 (type: double), (-1.389 / CAST( _col0 AS decimal(5,0))) (type: decimal(10,9)), _col2 (type: bigint), (UDFToDouble((UDFToInteger(_col0) % -75)) / UDFToDouble(_col2)) (type: double), (- (UDFToInteger(_col0) % -75)) (type: int), _col3 (type: double), (- (- (UDFToInteger(_col0) % -75))) (type: int), _col4 (type: bigint), (_col4 - -89010) (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 5, 1, 7, 2, 11, 12, 3, 8, 4, 13]
- selectExpressions: LongColModuloLongScalar(col 0:int, val -75)(children: col 0:smallint) -> 5:int, DecimalScalarDivideDecimalColumn(val -1.389, col 6:decimal(5,0))(children: CastLongToDecimal(col 0:smallint) -> 6:decimal(5,0)) -> 7:decimal(10,9), DoubleColDivideDoubleColumn(col 9:double, col 10:double)(children: CastLongToDouble(col 8:int)(children: LongColModuloLongScalar(col 0:int, val -75)(children: col 0:smallint) -> 8:int) -> 9:double, CastLongToDouble(col 2:bigint) -> 10:double) -> 11:double, LongColUnaryMinus(col 8:int)(children: LongColModuloLongScalar(col 0:int, val -75)(children: col 0:smallint) -> 8:int) -> 12:int, LongColUnaryMinus(col 13:int)(children: LongColUnaryMinus(col 8:int)(children: LongColModuloLongScalar(col 0:int, val -75)(children: col 0:smallint) -> 8:int) -> 13:int) -> 8:int, LongColSubtractLongScalar(col 4:bigint, val -89010) -> 13:bigint
Statistics: Num rows: 1251 Data size: 268968 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: double), _col3 (type: decimal(10,9)), _col4 (type: bigint), _col5 (type: double), _col6 (type: int), _col7 (type: double), _col8 (type: int), _col9 (type: bigint), _col10 (type: bigint)
sort order: +++++++++++
- Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 1251 Data size: 268968 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.1
Reducer 3
@@ -2481,24 +2394,14 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
Reducer 2
- Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "var_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceSampleEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: var_samp(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), var_pop(VALUE._col3), stddev_pop(VALUE._col4), sum(VALUE._col5)
- Group By Vectorization:
- aggregators: VectorUDAFVarFinal(col 1:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_samp, VectorUDAFCountMerge(col 2:bigint) -> bigint, VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFVarFinal(col 4:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_pop, VectorUDAFVarFinal(col 5:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFSumDouble(col 6:double) -> double
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:double
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
keys: KEY._col0 (type: double)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
@@ -2506,19 +2409,10 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: double), _col1 (type: double), (2563.58 * _col1) (type: double), (- _col1) (type: double), _col2 (type: bigint), ((2563.58 * _col1) + -5638.15) (type: double), ((- _col1) * ((2563.58 * _col1) + -5638.15)) (type: double), _col3 (type: double), _col4 (type: double), (_col0 - (- _col1)) (type: double), _col5 (type: double), (_col0 + _col1) (type: double), (_col0 * 762.0) (type: double), _col6 (type: double), (-863.257 % (_col0 * 762.0)) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1, 7, 8, 2, 10, 11, 3, 4, 12, 5, 9, 13, 6, 15]
- selectExpressions: DoubleScalarMultiplyDoubleColumn(val 2563.58, col 1:double) -> 7:double, DoubleColUnaryMinus(col 1:double) -> 8:double, DoubleColAddDoubleScalar(col 9:double, val -5638.15)(children: DoubleScalarMultiplyDoubleColumn(val 2563.58, col 1:double) -> 9:double) -> 10:double, DoubleColMultiplyDoubleColumn(col 9:double, col 12:double)(children: DoubleColUnaryMinus(col 1:double) -> 9:double, DoubleColAddDoubleScalar(col 11:double, val -5638.15)(children: DoubleScalarMultiplyDoubleColumn(val 2563.58, col 1:double) -> 11:double) -> 12:double) -> 11:double, DoubleColSubtractDoubleColumn(col 0:double, col 9:double)(children: DoubleColUnaryMinus(col 1:double) -> 9:double) -> 12:double, DoubleColAddDoubleColumn(col 0:double, col 1:double) -> 9:double, DoubleColMultiplyDoubleScalar(col 0:double, val 762.0) -> 13:double, DoubleScalarModuloDoubleColumn(val -863.257, col 14:double)(children: DoubleColMultiplyDoubleScalar(col 0:double, val 762.0) -> 14:double) -
> 15:double
Statistics: Num rows: 1327 Data size: 285309 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: double)
sort order: +
- Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 1327 Data size: 285309 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: double), _col14 (type: double)
Reducer 3
@@ -2799,24 +2693,14 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
Reducer 2
- Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: stddev_pop(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), min(VALUE._col3), var_samp(VALUE._col4), var_pop(VALUE._col5), avg(VALUE._col6), var_samp(VALUE._col7), avg(VALUE._col8), min(VALUE._col9), var_pop(VALUE._col10), stddev_pop(VALUE._col11), sum(VALUE._col12)
- Group By Vectorization:
- aggregators: VectorUDAFVarFinal(col 2:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFAvgFinal(col 3:struct<count:bigint,sum:double,input:smallint>) -> double, VectorUDAFCountMerge(col 4:bigint) -> bigint, VectorUDAFMinLong(col 5:tinyint) -> tinyint, VectorUDAFVarFinal(col 6:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_samp, VectorUDAFVarFinal(col 7:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_pop, VectorUDAFAvgFinal(col 8:struct<count:bigint,sum:double,input:int>) -> double, VectorUDAFVarFinal(col 9:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_samp, VectorUDAFAvgFinal(col 10:struct<count:bigint,sum:double,input:float>) -> double, VectorUDAFMinDouble(col 11:double) -> double, VectorUDAFVarFinal(col 12:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_pop, VectorUDAFVarFinal(col 13:struct<count:bigint,
sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFSumLong(col 14:bigint) -> bigint
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:timestamp, col 1:string
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
keys: KEY._col0 (type: timestamp), KEY._col1 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
@@ -2824,19 +2708,10 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: timestamp), _col1 (type: string), _col2 (type: double), (_col2 * 10.175) (type: double), (- _col2) (type: double), _col3 (type: double), (- _col2) (type: double), (-26.28 - _col2) (type: double), _col4 (type: bigint), (- _col4) (type: bigint), ((-26.28 - _col2) * (- _col2)) (type: double), _col5 (type: tinyint), (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4))) (type: double), (- (_col2 * 10.175)) (type: double), _col6 (type: double), (_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) (type: double), (- (- _col2)) (type: double), (UDFToDouble((- _col4)) / _col2) (type: double), _col7 (type: double), (10.175 / _col3) (type: double), _col8 (type: double), _col9 (type: double), ((_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) - (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) (type: double), (- (- (_col2 * 10.175))) (type: double), _col10 (type: double), (((_col6 + (((-26.28 - _col2) * (-
_col2)) * UDFToDouble((- _col4)))) - (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) * 10.175) (type: double), (10.175 % (10.175 / _col3)) (type: double), (- _col5) (type: tinyint), _col11 (type: double), _col12 (type: double), (- ((-26.28 - _col2) * (- _col2))) (type: double), ((- _col2) % _col10) (type: double), (-26.28 / CAST( (- _col5) AS decimal(3,0))) (type: decimal(8,6)), _col13 (type: double), _col14 (type: bigint), ((_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) / _col7) (type: double), (- (- _col4)) (type: bigint), _col4 (type: bigint), ((_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) % -26.28) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1, 2, 15, 16, 3, 17, 18, 4, 19, 22, 5, 21, 23, 6, 20, 26, 27, 7, 25, 8, 9, 29, 28, 10, 30, 32, 24, 11, 12, 31, 34, 37, 13, 14, 38, 40, 4, 39]
- selectExpressions: DoubleColMultiplyDoubleScalar(col 2:double, val 10.175) -> 15:double, DoubleColUnaryMinus(col 2:double) -> 16:double, DoubleColUnaryMinus(col 2:double) -> 17:double, DoubleScalarSubtractDoubleColumn(val -26.28, col 2:double) -> 18:double, LongColUnaryMinus(col 4:bigint) -> 19:bigint, DoubleColMultiplyDoubleColumn(col 20:double, col 21:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2:double) -> 20:double, DoubleColUnaryMinus(col 2:double) -> 21:double) -> 22:double, DoubleColMultiplyDoubleColumn(col 23:double, col 20:double)(children: DoubleColMultiplyDoubleColumn(col 20:double, col 21:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2:double) -> 20:double, DoubleColUnaryMinus(col 2:double) -> 21:double) -> 23:double, CastLongToDouble(col 24:bigint)(children: LongColUnaryMinus(col 4:bigint) -> 24:bigint) -> 20:double) -> 21:double, DoubleColUnaryMinus(col 20:double)(children: DoubleColMultiplyDoubleScalar(
col 2:double, val 10.175) -> 20:double) -> 23:double, DoubleColAddDoubleColumn(col 6:double, col 25:double)(children: DoubleColMultiplyDoubleColumn(col 26:double, col 20:double)(children: DoubleColMultiplyDoubleColumn(col 20:double, col 25:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2:double) -> 20:double, DoubleColUnaryMinus(col 2:double) -> 25:double) -> 26:double, CastLongToDouble(col 24:bigint)(children: LongColUnaryMinus(col 4:bigint) -> 24:bigint) -> 20:double) -> 25:double) -> 20:double, DoubleColUnaryMinus(col 25:double)(children: DoubleColUnaryMinus(col 2:double) -> 25:double) -> 26:double, DoubleColDivideDoubleColumn(col 25:double, col 2:double)(children: CastLongToDouble(col 24:bigint)(children: LongColUnaryMinus(col 4:bigint) -> 24:bigint) -> 25:double) -> 27:double, DoubleScalarDivideDoubleColumn(val 10.175, col 3:double) -> 25:double, DoubleColSubtractDoubleColumn(col 28:double, col 30:double)(children: DoubleColAddDoubleColumn(col 6:double, col
29:double)(children: DoubleColMultiplyDoubleColumn(col 30:double, col 28:double)(children: DoubleColMultiplyDoubleColumn(col 28:double, col 29:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2:double) -> 28:double, DoubleColUnaryMinus(col 2:double) -> 29:double) -> 30:double, CastLongToDouble(col 24:bigint)(children: LongColUnaryMinus(col 4:bigint) -> 24:bigint) -> 28:double) -> 29:double) -> 28:double, DoubleColMultiplyDoubleColumn(col 31:double, col 29:double)(children: DoubleColMultiplyDoubleColumn(col 29:double, col 30:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2:double) -> 29:double, DoubleColUnaryMinus(col 2:double) -> 30:double) -> 31:double, CastLongToDouble(col 24:bigint)(children: LongColUnaryMinus(col 4:bigint) -> 24:bigint) -> 29:double) -> 30:double) -> 29:double, DoubleColUnaryMinus(col 30:double)(children: DoubleColUnaryMinus(col 28:double)(children: DoubleColMultiplyDoubleScalar(col 2:double, val 10.175) -> 28:double) -> 30:
double) -> 28:double, DoubleColMultiplyDoubleScalar(col 31:double, val 10.175)(children: DoubleColSubtractDoubleColumn(col 30:double, col 32:double)(children: DoubleColAddDoubleColumn(col 6:double, col 31:double)(children: DoubleColMultiplyDoubleColumn(col 32:double, col 30:double)(children: DoubleColMultiplyDoubleColumn(col 30:double, col 31:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2:double) -> 30:double, DoubleColUnaryMinus(col 2:double) -> 31:double) -> 32:double, CastLongToDouble(col 24:bigint)(children: LongColUnaryMinus(col 4:bigint) -> 24:bigint) -> 30:double) -> 31:double) -> 30:double, DoubleColMultiplyDoubleColumn(col 33:double, col 31:double)(children: DoubleColMultiplyDoubleColumn(col 31:double, col 32:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2:double) -> 31:double, DoubleColUnaryMinus(col 2:double) -> 32:double) -> 33:double, CastLongToDouble(col 24:bigint)(children: LongColUnaryMinus(col 4:bigint) -> 24:bigint) -> 31:
double) -> 32:double) -> 31:double) -> 30:double, DoubleScalarModuloDoubleColumn(val 10.175, col 31:double)(children: DoubleScalarDivideDoubleColumn(val 10.175, col 3:double) -> 31:double) -> 32:double, LongColUnaryMinus(col 5:tinyint) -> 24:tinyint, DoubleColUnaryMinus(col 34:double)(children: DoubleColMultiplyDoubleColumn(col 31:double, col 33:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2:double) -> 31:double, DoubleColUnaryMinus(col 2:double) -> 33:double) -> 34:double) -> 31:double, DoubleColModuloDoubleColumn(col 33:double, col 10:double)(children: DoubleColUnaryMinus(col 2:double) -> 33:double) -> 34:double, DecimalScalarDivideDecimalColumn(val -26.28, col 36:decimal(3,0))(children: CastLongToDecimal(col 35:tinyint)(children: LongColUnaryMinus(col 5:tinyint) -> 35:tinyint) -> 36:decimal(3,0)) -> 37:decimal(8,6), DoubleColDivideDoubleColumn(col 33:double, col 7:double)(children: DoubleColAddDoubleColumn(col 6:double, col 38:double)(children: DoubleColMult
iplyDoubleColumn(col 39:double, col 33:double)(children: DoubleColMultiplyDoubleColumn(col 33:double, col 38:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2:double) -> 33:double, DoubleColUnaryMinus(col 2:double) -> 38:double) -> 39:double, CastLongToDouble(col 35:bigint)(children: LongColUnaryMinus(col 4:bigint) -> 35:bigint) -> 33:double) -> 38:double) -> 33:double) -> 38:double, LongColUnaryMinus(col 35:bigint)(children: LongColUnaryMinus(col 4:bigint) -> 35:bigint) -> 40:bigint, DoubleColModuloDoubleScalar(col 33:double, val -26.28)(children: DoubleColAddDoubleColumn(col 6:double, col 39:double)(children: DoubleColMultiplyDoubleColumn(col 41:double, col 33:double)(children: DoubleColMultiplyDoubleColumn(col 33:double, col 39:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2:double) -> 33:double, DoubleColUnaryMinus(col 2:double) -> 39:double) -> 41:double, CastLongToDouble(col 35:bigint)(children: LongColUnaryMinus(col 4:bigint) -> 35:bigi
nt) -> 33:double) -> 39:double) -> 33:double) -> 39:double
Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: timestamp), _col1 (type: string), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: double), _col11 (type: tinyint), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double), _col22 (type: double), _col23 (type: double), _col24 (type: double), _col25 (type: double), _col26 (type: double), _col27 (type: tinyint), _col28 (type: double), _col29 (type: double), _col30 (type: double), _col31 (type: double), _col32 (type: decimal(8,6)), _col33 (type: double), _col34 (type: bigint), _col35 (type: double), _col36 (type: bigint), _col37 (type: bigint), _col38 (type: double)
sort order: +++++++++++++++++++++++++++++++++++++++
- Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.1
Reducer 3
@@ -3198,24 +3073,14 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
Reducer 2
- Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "var_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceSampleEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: max(VALUE._col0), sum(VALUE._col1), var_samp(VALUE._col2), avg(VALUE._col3), min(VALUE._col4), var_pop(VALUE._col5), sum(VALUE._col6), stddev_samp(VALUE._col7), stddev_pop(VALUE._col8), avg(VALUE._col9)
- Group By Vectorization:
- aggregators: VectorUDAFMaxDouble(col 1:float) -> float, VectorUDAFSumLong(col 2:bigint) -> bigint, VectorUDAFVarFinal(col 3:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_samp, VectorUDAFAvgFinal(col 4:struct<count:bigint,sum:double,input:double>) -> double, VectorUDAFMinLong(col 5:bigint) -> bigint, VectorUDAFVarFinal(col 6:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_pop, VectorUDAFSumLong(col 7:bigint) -> bigint, VectorUDAFVarFinal(col 8:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFVarFinal(col 9:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFAvgFinal(col 10:struct<count:bigint,sum:double,input:int>) -> double
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:boolean
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
keys: KEY._col0 (type: boolean)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
@@ -3223,19 +3088,10 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: boolean), _col1 (type: float), (- _col1) (type: float), (-26.28 / UDFToDouble(_col1)) (type: double), _col2 (type: bigint), (CAST( _col2 AS decimal(19,0)) - 10.175) (type: decimal(23,3)), _col3 (type: double), (_col3 % UDFToDouble(_col1)) (type: double), (10.175 + (- _col1)) (type: float), _col4 (type: double), (UDFToDouble((CAST( _col2 AS decimal(19,0)) - 10.175)) + _col3) (type: double), _col5 (type: bigint), _col6 (type: double), (- (10.175 + (- _col1))) (type: float), (79.553 / _col6) (type: double), (_col3 % (79.553 / _col6)) (type: double), _col7 (type: bigint), _col8 (type: double), (-1.389 * CAST( _col5 AS decimal(19,0))) (type: decimal(24,3)), (CAST( _col7 AS decimal(19,0)) - (-1.389 * CAST( _col5 AS decimal(19,0)))) (type: decimal(25,3)), _col9 (type: double), (- (CAST( _col7 AS decimal(19,0)) - (-1.389 * CAST( _col5 AS decimal(19,0))))) (type: decimal(25,3)), _col10 (type: double), (- _col10) (type: double), (_col10 * UDFToDoubl
e(_col7)) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1, 11, 12, 2, 14, 3, 15, 17, 4, 19, 5, 6, 16, 20, 22, 7, 8, 23, 26, 9, 28, 10, 21, 30]
- selectExpressions: DoubleColUnaryMinus(col 1:float) -> 11:float, DoubleScalarDivideDoubleColumn(val -26.28, col 1:double)(children: col 1:float) -> 12:double, DecimalColSubtractDecimalScalar(col 13:decimal(19,0), val 10.175)(children: CastLongToDecimal(col 2:bigint) -> 13:decimal(19,0)) -> 14:decimal(23,3), DoubleColModuloDoubleColumn(col 3:double, col 1:double)(children: col 1:float) -> 15:double, DoubleScalarAddDoubleColumn(val 10.175000190734863, col 16:float)(children: DoubleColUnaryMinus(col 1:float) -> 16:float) -> 17:float, DoubleColAddDoubleColumn(col 16:double, col 3:double)(children: CastDecimalToDouble(col 18:decimal(23,3))(children: DecimalColSubtractDecimalScalar(col 13:decimal(19,0), val 10.175)(children: CastLongToDecimal(col 2:bigint) -> 13:decimal(19,0)) -> 18:decimal(23,3)) -> 16:double) -> 19:double, DoubleColUnaryMinus(col 20:float)(children: DoubleScalarAddDoubleColumn(val 10.175000190734863, col 16:float)(children: DoubleColUnaryMinus(col
1:float) -> 16:float) -> 20:float) -> 16:float, DoubleScalarDivideDoubleColumn(val 79.553, col 6:double) -> 20:double, DoubleColModuloDoubleColumn(col 3:double, col 21:double)(children: DoubleScalarDivideDoubleColumn(val 79.553, col 6:double) -> 21:double) -> 22:double, DecimalScalarMultiplyDecimalColumn(val -1.389, col 13:decimal(19,0))(children: CastLongToDecimal(col 5:bigint) -> 13:decimal(19,0)) -> 23:decimal(24,3), DecimalColSubtractDecimalColumn(col 13:decimal(19,0), col 25:decimal(24,3))(children: CastLongToDecimal(col 7:bigint) -> 13:decimal(19,0), DecimalScalarMultiplyDecimalColumn(val -1.389, col 24:decimal(19,0))(children: CastLongToDecimal(col 5:bigint) -> 24:decimal(19,0)) -> 25:decimal(24,3)) -> 26:decimal(25,3), FuncNegateDecimalToDecimal(col 27:decimal(25,3))(children: DecimalColSubtractDecimalColumn(col 13:decimal(19,0), col 25:decimal(24,3))(children: CastLongToDecimal(col 7:bigint) -> 13:decimal(19,0), DecimalScalarMultiplyDecimalColumn(val -1.389, col 24:decimal(
19,0))(children: CastLongToDecimal(col 5:bigint) -> 24:decimal(19,0)) -> 25:decimal(24,3)) -> 27:decimal(25,3)) -> 28:decimal(25,3), DoubleColUnaryMinus(col 10:double) -> 21:double, DoubleColMultiplyDoubleColumn(col 10:double, col 29:double)(children: CastLongToDouble(col 7:bigint) -> 29:double) -> 30:double
Statistics: Num rows: 5119 Data size: 1100602 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: boolean)
sort order: +
- Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 5119 Data size: 1100602 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: float), _col2 (type: float), _col3 (type: double), _col4 (type: bigint), _col5 (type: decimal(23,3)), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: double), _col13 (type: float), _col14 (type: double), _col15 (type: double), _col17 (type: bigint), _col18 (type: double), _col19 (type: decimal(24,3)), _col20 (type: decimal(25,3)), _col21 (type: double), _col22 (type: decimal(25,3)), _col23 (type: double), _col24 (type: double), _col25 (type: double)
Reducer 3
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out b/ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out
index 55ebff2..a5575f5 100644
--- a/ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out
@@ -1088,40 +1088,23 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
Reducer 2
- Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: true
- vectorized: true
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "variance" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7)
- Group By Vectorization:
- aggregators: VectorUDAFAvgFinal(col 0:struct<count:bigint,sum:double,input:timestamp>) -> double, VectorUDAFVarFinal(col 1:struct<count:bigint,sum:double,variance:double>) -> double aggregation: variance, VectorUDAFVarFinal(col 2:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_pop, VectorUDAFVarFinal(col 3:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_samp, VectorUDAFVarFinal(col 4:struct<count:bigint,sum:double,variance:double>) -> double aggregation: std, VectorUDAFVarFinal(col 5:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev, VectorUDAFVarFinal(col 6:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFVarFinal(col 7:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- native: false
- vectorProcessingMode: GLOBAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7]
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: round(_col0, 0) (type: double), _col1 BETWEEN 8.97077295279421E19 AND 8.97077295279422E19 (type: boolean), _col2 BETWEEN 8.97077295279421E19 AND 8.97077295279422E19 (type: boolean), _col3 BETWEEN 9.20684592523616E19 AND 9.20684592523617E19 (type: boolean), round(_col4, 3) (type: double), round(_col5, 3) (type: double), round(_col6, 3) (type: double), round(_col7, 3) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [8, 9, 10, 11, 12, 13, 14, 15]
- selectExpressions: RoundWithNumDigitsDoubleToDouble(col 0, decimalPlaces 0) -> 8:double, VectorUDFAdaptor(_col1 BETWEEN 8.97077295279421E19 AND 8.97077295279422E19) -> 9:boolean, VectorUDFAdaptor(_col2 BETWEEN 8.97077295279421E19 AND 8.97077295279422E19) -> 10:boolean, VectorUDFAdaptor(_col3 BETWEEN 9.20684592523616E19 AND 9.20684592523617E19) -> 11:boolean, RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 3) -> 12:double, RoundWithNumDigitsDoubleToDouble(col 5, decimalPlaces 3) -> 13:double, RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 3) -> 14:double, RoundWithNumDigitsDoubleToDouble(col 7, decimalPlaces 3) -> 15:double
Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
[6/6] hive git commit: HIVE-18756: Vectorization: VectorUDAFVarFinal
produces Wrong Results (Matt McCline, reviewed by Sergey Shelukhin)
Posted by mm...@apache.org.
HIVE-18756: Vectorization: VectorUDAFVarFinal produces Wrong Results (Matt McCline, reviewed by Sergey Shelukhin)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8975924e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8975924e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8975924e
Branch: refs/heads/master
Commit: 8975924ec070380069d71d325a3358fe9932befb
Parents: 3df6bc2
Author: Matt McCline <mm...@hortonworks.com>
Authored: Tue Feb 20 22:33:59 2018 -0800
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Tue Feb 20 22:33:59 2018 -0800
----------------------------------------------------------------------
.../UDAFTemplates/VectorUDAFVarMerge.txt | 5 +
.../llap/parquet_types_vectorization.q.out | 2 +-
.../llap/vector_decimal_aggregate.q.out | 66 +------
.../llap/vector_decimal_udf.q.out | 100 ++--------
.../llap/vector_reuse_scratchcols.q.out | 58 +-----
.../llap/vector_string_decimal.q.out | 137 ++++++++++++++
.../llap/vector_udf_string_to_boolean.q.out | 189 +++++++++++++++++++
.../clientpositive/llap/vectorization_0.q.out | 50 ++---
.../clientpositive/llap/vectorization_1.q.out | 29 +--
.../clientpositive/llap/vectorization_12.q.out | 33 +---
.../clientpositive/llap/vectorization_13.q.out | 57 +-----
.../clientpositive/llap/vectorization_14.q.out | 33 +---
.../clientpositive/llap/vectorization_16.q.out | 30 +--
.../clientpositive/llap/vectorization_2.q.out | 29 +--
.../clientpositive/llap/vectorization_3.q.out | 29 +--
.../clientpositive/llap/vectorization_4.q.out | 29 +--
.../clientpositive/llap/vectorization_9.q.out | 30 +--
.../vectorization_input_format_excludes.q.out | 28 ++-
.../llap/vectorization_part_project.q.out | 12 +-
.../llap/vectorization_short_regress.q.out | 186 +++---------------
.../llap/vectorized_mapjoin3.q.out | 100 ++++------
.../llap/vectorized_parquet.q.out | 7 +-
.../llap/vectorized_parquet_types.q.out | 19 +-
.../llap/vectorized_timestamp.q.out | 24 +--
.../llap/vectorized_timestamp_funcs.q.out | 22 +--
.../spark/parquet_vectorization_0.q.out | 46 +----
.../spark/parquet_vectorization_1.q.out | 28 +--
.../spark/parquet_vectorization_12.q.out | 32 +---
.../spark/parquet_vectorization_13.q.out | 55 +-----
.../spark/parquet_vectorization_14.q.out | 32 +---
.../spark/parquet_vectorization_16.q.out | 29 +--
.../spark/parquet_vectorization_2.q.out | 28 +--
.../spark/parquet_vectorization_3.q.out | 28 +--
.../spark/parquet_vectorization_4.q.out | 28 +--
.../spark/parquet_vectorization_9.q.out | 29 +--
.../spark/vector_decimal_aggregate.q.out | 64 +------
.../clientpositive/spark/vectorization_0.q.out | 46 +----
.../clientpositive/spark/vectorization_1.q.out | 28 +--
.../clientpositive/spark/vectorization_12.q.out | 32 +---
.../clientpositive/spark/vectorization_13.q.out | 55 +-----
.../clientpositive/spark/vectorization_14.q.out | 32 +---
.../clientpositive/spark/vectorization_16.q.out | 29 +--
.../clientpositive/spark/vectorization_2.q.out | 28 +--
.../clientpositive/spark/vectorization_3.q.out | 28 +--
.../clientpositive/spark/vectorization_4.q.out | 28 +--
.../clientpositive/spark/vectorization_9.q.out | 29 +--
.../vectorization_input_format_excludes.q.out | 24 +--
.../spark/vectorization_short_regress.q.out | 178 ++---------------
.../spark/vectorized_timestamp_funcs.q.out | 21 +--
49 files changed, 575 insertions(+), 1686 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarMerge.txt
----------------------------------------------------------------------
diff --git a/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarMerge.txt b/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarMerge.txt
index 9b1c1cd..ccc5a22 100644
--- a/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarMerge.txt
+++ b/ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarMerge.txt
@@ -487,6 +487,9 @@ public class <ClassName> extends VectorAggregateExpression {
* Mode FINAL.
#ENDIF FINAL
*/
+
+/*
+ There seems to be a Wrong Results bug in VectorUDAFVarFinal -- disabling vectorization for now...
return
GenericUDAFVariance.isVarianceFamilyName(name) &&
inputColVectorType == ColumnVector.Type.STRUCT &&
@@ -498,6 +501,8 @@ public class <ClassName> extends VectorAggregateExpression {
outputColVectorType == ColumnVector.Type.DOUBLE &&
mode == Mode.FINAL;
#ENDIF FINAL
+*/
+ return false;
}
@Override
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/llap/parquet_types_vectorization.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/parquet_types_vectorization.q.out b/ql/src/test/results/clientpositive/llap/parquet_types_vectorization.q.out
index d62d947..1ccdff8 100644
--- a/ql/src/test/results/clientpositive/llap/parquet_types_vectorization.q.out
+++ b/ql/src/test/results/clientpositive/llap/parquet_types_vectorization.q.out
@@ -209,7 +209,7 @@ STAGE PLANS:
Execution mode: vectorized, llap
LLAP IO: all inputs (cache only)
Reducer 2
- Execution mode: vectorized, llap
+ Execution mode: llap
Reduce Operator Tree:
Group By Operator
aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4)
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out b/ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out
index 0a72b3f..5cb3e66 100644
--- a/ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out
@@ -303,55 +303,28 @@ STAGE PLANS:
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
- Execution mode: vectorized, llap
+ Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- reduceColumnNullOrder: a
- reduceColumnSortOrder: +
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 16
- dataColumns: KEY._col0:int, VALUE._col0:bigint, VALUE._col1:decimal(20,10), VALUE._col2:decimal(20,10), VALUE._col3:decimal(30,10), VALUE._col4:struct<count:bigint,sum:decimal(30,10),input:decimal(20,10)>, VALUE._col5:struct<count:bigint,sum:double,variance:double>, VALUE._col6:struct<count:bigint,sum:double,variance:double>, VALUE._col7:bigint, VALUE._col8:decimal(23,14), VALUE._col9:decimal(23,14), VALUE._col10:decimal(33,14), VALUE._col11:struct<count:bigint,sum:decimal(33,14),input:decimal(23,14)>, VALUE._col12:struct<count:bigint,sum:double,variance:double>, VALUE._col13:struct<count:bigint,sum:double,variance:double>, VALUE._col14:bigint
- partitionColumnCount: 0
- scratchColumnTypeNames: []
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), stddev_pop(VALUE._col5), stddev_samp(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9), sum(VALUE._col10), avg(VALUE._col11), stddev_pop(VALUE._col12), stddev_samp(VALUE._col13), count(VALUE._col14)
- Group By Vectorization:
- aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint, VectorUDAFMaxDecimal(col 2:decimal(20,10)) -> decimal(20,10), VectorUDAFMinDecimal(col 3:decimal(20,10)) -> decimal(20,10), VectorUDAFSumDecimal(col 4:decimal(30,10)) -> decimal(30,10), VectorUDAFAvgDecimalFinal(col 5:struct<count:bigint,sum:decimal(30,10),input:decimal(20,10)>) -> decimal(24,14), VectorUDAFVarFinal(col 6:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFVarFinal(col 7:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFCountMerge(col 8:bigint) -> bigint, VectorUDAFMaxDecimal(col 9:decimal(23,14)) -> decimal(23,14), VectorUDAFMinDecimal(col 10:decimal(23,14)) -> decimal(23,14), VectorUDAFSumDecimal(col 11:decimal(33,14)) -> decimal(33,14), VectorUDAFAvgDecimalFinal(col 12:struct<count:bigint,sum:decimal(33,14),input:decimal(23,14)>) -> decimal(27,18), VectorUDAFVarFinal(col 13:struct<count:bigint,sum:do
uble,variance:double>) -> double aggregation: stddev_pop, VectorUDAFVarFinal(col 14:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFCountMerge(col 15:bigint) -> bigint
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:int
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15
Statistics: Num rows: 6144 Data size: 1330955 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterLongColGreaterLongScalar(col 15:bigint, val 1)
predicate: (_col15 > 1) (type: boolean)
Statistics: Num rows: 2048 Data size: 443651 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: decimal(24,14)), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: decimal(23,14)), _col10 (type: decimal(23,14)), _col11 (type: decimal(33,14)), _col12 (type: decimal(27,18)), _col13 (type: double), _col14 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
Statistics: Num rows: 2048 Data size: 443651 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 2048 Data size: 443651 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -716,55 +689,28 @@ STAGE PLANS:
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
- Execution mode: vectorized, llap
+ Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- reduceColumnNullOrder: a
- reduceColumnSortOrder: +
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 16
- dataColumns: KEY._col0:int, VALUE._col0:bigint, VALUE._col1:decimal(11,5), VALUE._col2:decimal(11,5), VALUE._col3:decimal(21,5), VALUE._col4:struct<count:bigint,sum:decimal(21,5),input:decimal(11,5)>, VALUE._col5:struct<count:bigint,sum:double,variance:double>, VALUE._col6:struct<count:bigint,sum:double,variance:double>, VALUE._col7:bigint, VALUE._col8:decimal(16,0), VALUE._col9:decimal(16,0), VALUE._col10:decimal(26,0), VALUE._col11:struct<count:bigint,sum:decimal(26,0),input:decimal(16,0)>, VALUE._col12:struct<count:bigint,sum:double,variance:double>, VALUE._col13:struct<count:bigint,sum:double,variance:double>, VALUE._col14:bigint
- partitionColumnCount: 0
- scratchColumnTypeNames: []
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), stddev_pop(VALUE._col5), stddev_samp(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9), sum(VALUE._col10), avg(VALUE._col11), stddev_pop(VALUE._col12), stddev_samp(VALUE._col13), count(VALUE._col14)
- Group By Vectorization:
- aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint, VectorUDAFMaxDecimal(col 2:decimal(11,5)) -> decimal(11,5), VectorUDAFMinDecimal(col 3:decimal(11,5)) -> decimal(11,5), VectorUDAFSumDecimal(col 4:decimal(21,5)) -> decimal(21,5), VectorUDAFAvgDecimalFinal(col 5:struct<count:bigint,sum:decimal(21,5),input:decimal(11,5)>) -> decimal(15,9), VectorUDAFVarFinal(col 6:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFVarFinal(col 7:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFCountMerge(col 8:bigint) -> bigint, VectorUDAFMaxDecimal(col 9:decimal(16,0)) -> decimal(16,0), VectorUDAFMinDecimal(col 10:decimal(16,0)) -> decimal(16,0), VectorUDAFSumDecimal(col 11:decimal(26,0)) -> decimal(26,0), VectorUDAFAvgDecimalFinal(col 12:struct<count:bigint,sum:decimal(26,0),input:decimal(16,0)>) -> decimal(20,4), VectorUDAFVarFinal(col 13:struct<count:bigint,sum:double,variance:doub
le>) -> double aggregation: stddev_pop, VectorUDAFVarFinal(col 14:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFCountMerge(col 15:bigint) -> bigint
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:int
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15
Statistics: Num rows: 6144 Data size: 1330955 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterLongColGreaterLongScalar(col 15:bigint, val 1)
predicate: (_col15 > 1) (type: boolean)
Statistics: Num rows: 2048 Data size: 443651 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(11,5)), _col3 (type: decimal(11,5)), _col4 (type: decimal(21,5)), _col5 (type: decimal(15,9)), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: decimal(16,0)), _col10 (type: decimal(16,0)), _col11 (type: decimal(26,0)), _col12 (type: decimal(20,4)), _col13 (type: double), _col14 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
Statistics: Num rows: 2048 Data size: 443651 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 2048 Data size: 443651 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out b/ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out
index a306a17..f96c769 100644
--- a/ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out
@@ -3280,40 +3280,21 @@ STAGE PLANS:
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
- Execution mode: vectorized, llap
+ Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- reduceColumnNullOrder: a
- reduceColumnSortOrder: +
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 3
- dataColumns: KEY._col0:int, VALUE._col0:struct<count:bigint,sum:double,variance:double>, VALUE._col1:struct<count:bigint,sum:double,variance:double>
- partitionColumnCount: 0
- scratchColumnTypeNames: []
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: stddev(VALUE._col0), variance(VALUE._col1)
- Group By Vectorization:
- aggregators: VectorUDAFVarFinal(col 1:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev, VectorUDAFVarFinal(col 2:struct<count:bigint,sum:double,variance:double>) -> double aggregation: variance
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:int
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0, 1]
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 17 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 17 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -3434,40 +3415,21 @@ STAGE PLANS:
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
- Execution mode: vectorized, llap
+ Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- reduceColumnNullOrder: a
- reduceColumnSortOrder: +
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 3
- dataColumns: KEY._col0:int, VALUE._col0:struct<count:bigint,sum:double,variance:double>, VALUE._col1:struct<count:bigint,sum:double,variance:double>
- partitionColumnCount: 0
- scratchColumnTypeNames: []
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: stddev_samp(VALUE._col0), var_samp(VALUE._col1)
- Group By Vectorization:
- aggregators: VectorUDAFVarFinal(col 1:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFVarFinal(col 2:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_samp
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:int
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0, 1]
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 17 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 17 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -7263,40 +7225,21 @@ STAGE PLANS:
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
- Execution mode: vectorized, llap
+ Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- reduceColumnNullOrder: a
- reduceColumnSortOrder: +
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 3
- dataColumns: KEY._col0:int, VALUE._col0:struct<count:bigint,sum:double,variance:double>, VALUE._col1:struct<count:bigint,sum:double,variance:double>
- partitionColumnCount: 0
- scratchColumnTypeNames: []
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: stddev(VALUE._col0), variance(VALUE._col1)
- Group By Vectorization:
- aggregators: VectorUDAFVarFinal(col 1:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev, VectorUDAFVarFinal(col 2:struct<count:bigint,sum:double,variance:double>) -> double aggregation: variance
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:int
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0, 1]
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -7418,40 +7361,21 @@ STAGE PLANS:
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
- Execution mode: vectorized, llap
+ Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- reduceColumnNullOrder: a
- reduceColumnSortOrder: +
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 3
- dataColumns: KEY._col0:int, VALUE._col0:struct<count:bigint,sum:double,variance:double>, VALUE._col1:struct<count:bigint,sum:double,variance:double>
- partitionColumnCount: 0
- scratchColumnTypeNames: []
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: stddev_samp(VALUE._col0), var_samp(VALUE._col1)
- Group By Vectorization:
- aggregators: VectorUDAFVarFinal(col 1:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFVarFinal(col 2:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_samp
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:int
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0, 1]
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out b/ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out
index 6528b6f..b9c1ba3 100644
--- a/ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out
@@ -151,47 +151,24 @@ STAGE PLANS:
partitionColumnCount: 0
scratchColumnTypeNames: [double, decimal(22,3), decimal(13,3)]
Reducer 2
- Execution mode: vectorized, llap
+ Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- reduceColumnNullOrder:
- reduceColumnSortOrder:
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 9
- dataColumns: VALUE._col0:struct<count:bigint,sum:double,input:int>, VALUE._col1:double, VALUE._col2:struct<count:bigint,sum:double,variance:double>, VALUE._col3:struct<count:bigint,sum:double,variance:double>, VALUE._col4:struct<count:bigint,sum:double,variance:double>, VALUE._col5:struct<count:bigint,sum:double,input:float>, VALUE._col6:struct<count:bigint,sum:double,variance:double>, VALUE._col7:tinyint, VALUE._col8:bigint
- partitionColumnCount: 0
- scratchColumnTypeNames: []
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: avg(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_samp(VALUE._col3), var_samp(VALUE._col4), avg(VALUE._col5), stddev_samp(VALUE._col6), min(VALUE._col7), count(VALUE._col8)
- Group By Vectorization:
- aggregators: VectorUDAFAvgFinal(col 0:struct<count:bigint,sum:double,input:int>) -> double, VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFVarFinal(col 2:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFVarFinal(col 3:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFVarFinal(col 4:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_samp, VectorUDAFAvgFinal(col 5:struct<count:bigint,sum:double,input:float>) -> double, VectorUDAFVarFinal(col 6:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFMinLong(col 7:tinyint) -> tinyint, VectorUDAFCountMerge(col 8:bigint) -> bigint
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- native: false
- vectorProcessingMode: GLOBAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8]
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Statistics: Num rows: 1 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: double), (_col0 + -3728.0) (type: double), (- (_col0 + -3728.0)) (type: double), (- (- (_col0 + -3728.0))) (type: double), ((- (- (_col0 + -3728.0))) * (_col0 + -3728.0)) (type: double), _col1 (type: double), (- _col0) (type: double), _col2 (type: double), (((- (- (_col0 + -3728.0))) * (_col0 + -3728.0)) * (- (- (_col0 + -3728.0)))) (type: double), _col3 (type: double), (- _col2) (type: double), (_col2 - (- (- (_col0 + -3728.0)))) (type: double), ((_col2 - (- (- (_col0 + -3728.0)))) * _col2) (type: double), _col4 (type: double), _col5 (type: double), (10.175 - _col4) (type: double), (- (10.175 - _col4)) (type: double), ((- _col2) / -563.0) (type: double), _col6 (type: double), (- ((- _col2) / -563.0)) (type: double), (_col0 / _col1) (type: double), _col7 (type: tinyint), _col8 (type: bigint), (UDFToDouble(_col7) / ((- _col2) / -563.0)) (type: double), (- (_col0 / _col1)) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 9, 11, 10, 14, 1, 12, 2, 15, 3, 13, 17, 16, 4, 5, 18, 20, 21, 6, 19, 22, 7, 8, 24, 25]
- selectExpressions: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 9:double, DoubleColUnaryMinus(col 10:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 10:double) -> 11:double, DoubleColUnaryMinus(col 12:double)(children: DoubleColUnaryMinus(col 10:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 10:double) -> 12:double) -> 10:double, DoubleColMultiplyDoubleColumn(col 12:double, col 13:double)(children: DoubleColUnaryMinus(col 13:double)(children: DoubleColUnaryMinus(col 12:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 12:double) -> 13:double) -> 12:double, DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 13:double) -> 14:double, DoubleColUnaryMinus(col 0:double) -> 12:double, DoubleColMultiplyDoubleColumn(col 16:double, col 13:double)(children: DoubleColMultiplyDoubleColumn(col 13:double, col 15:double)(children: DoubleColUnaryMinus(col 15:double)(children: DoubleColUnaryM
inus(col 13:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 13:double) -> 15:double) -> 13:double, DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 15:double) -> 16:double, DoubleColUnaryMinus(col 15:double)(children: DoubleColUnaryMinus(col 13:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 13:double) -> 15:double) -> 13:double) -> 15:double, DoubleColUnaryMinus(col 2:double) -> 13:double, DoubleColSubtractDoubleColumn(col 2:double, col 16:double)(children: DoubleColUnaryMinus(col 17:double)(children: DoubleColUnaryMinus(col 16:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 16:double) -> 17:double) -> 16:double) -> 17:double, DoubleColMultiplyDoubleColumn(col 18:double, col 2:double)(children: DoubleColSubtractDoubleColumn(col 2:double, col 16:double)(children: DoubleColUnaryMinus(col 18:double)(children: DoubleColUnaryMinus(col 16:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0)
-> 16:double) -> 18:double) -> 16:double) -> 18:double) -> 16:double, DoubleScalarSubtractDoubleColumn(val 10.175, col 4:double) -> 18:double, DoubleColUnaryMinus(col 19:double)(children: DoubleScalarSubtractDoubleColumn(val 10.175, col 4:double) -> 19:double) -> 20:double, DoubleColDivideDoubleScalar(col 19:double, val -563.0)(children: DoubleColUnaryMinus(col 2:double) -> 19:double) -> 21:double, DoubleColUnaryMinus(col 22:double)(children: DoubleColDivideDoubleScalar(col 19:double, val -563.0)(children: DoubleColUnaryMinus(col 2:double) -> 19:double) -> 22:double) -> 19:double, DoubleColDivideDoubleColumn(col 0:double, col 1:double) -> 22:double, DoubleColDivideDoubleColumn(col 23:double, col 25:double)(children: CastLongToDouble(col 7:tinyint) -> 23:double, DoubleColDivideDoubleScalar(col 24:double, val -563.0)(children: DoubleColUnaryMinus(col 2:double) -> 24:double) -> 25:double) -> 24:double, DoubleColUnaryMinus(col 23:double)(children: DoubleColDivideDoubleColumn(col 0:doubl
e, col 1:double) -> 23:double) -> 25:double
Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -357,47 +334,24 @@ STAGE PLANS:
partitionColumnCount: 0
scratchColumnTypeNames: [double, double, double, decimal(22,3), decimal(13,3)]
Reducer 2
- Execution mode: vectorized, llap
+ Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- reduceColumnNullOrder:
- reduceColumnSortOrder:
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 9
- dataColumns: VALUE._col0:struct<count:bigint,sum:double,input:int>, VALUE._col1:double, VALUE._col2:struct<count:bigint,sum:double,variance:double>, VALUE._col3:struct<count:bigint,sum:double,variance:double>, VALUE._col4:struct<count:bigint,sum:double,variance:double>, VALUE._col5:struct<count:bigint,sum:double,input:float>, VALUE._col6:struct<count:bigint,sum:double,variance:double>, VALUE._col7:tinyint, VALUE._col8:bigint
- partitionColumnCount: 0
- scratchColumnTypeNames: []
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: avg(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_samp(VALUE._col3), var_samp(VALUE._col4), avg(VALUE._col5), stddev_samp(VALUE._col6), min(VALUE._col7), count(VALUE._col8)
- Group By Vectorization:
- aggregators: VectorUDAFAvgFinal(col 0:struct<count:bigint,sum:double,input:int>) -> double, VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFVarFinal(col 2:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFVarFinal(col 3:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFVarFinal(col 4:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_samp, VectorUDAFAvgFinal(col 5:struct<count:bigint,sum:double,input:float>) -> double, VectorUDAFVarFinal(col 6:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFMinLong(col 7:tinyint) -> tinyint, VectorUDAFCountMerge(col 8:bigint) -> bigint
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- native: false
- vectorProcessingMode: GLOBAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8]
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Statistics: Num rows: 1 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: double), (_col0 + -3728.0) (type: double), (- (_col0 + -3728.0)) (type: double), (- (- (_col0 + -3728.0))) (type: double), ((- (- (_col0 + -3728.0))) * (_col0 + -3728.0)) (type: double), _col1 (type: double), (- _col0) (type: double), _col2 (type: double), (((- (- (_col0 + -3728.0))) * (_col0 + -3728.0)) * (- (- (_col0 + -3728.0)))) (type: double), _col3 (type: double), (- _col2) (type: double), (_col2 - (- (- (_col0 + -3728.0)))) (type: double), ((_col2 - (- (- (_col0 + -3728.0)))) * _col2) (type: double), _col4 (type: double), _col5 (type: double), (10.175 - _col4) (type: double), (- (10.175 - _col4)) (type: double), ((- _col2) / -563.0) (type: double), _col6 (type: double), (- ((- _col2) / -563.0)) (type: double), (_col0 / _col1) (type: double), _col7 (type: tinyint), _col8 (type: bigint), (UDFToDouble(_col7) / ((- _col2) / -563.0)) (type: double), (- (_col0 / _col1)) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 9, 11, 14, 19, 1, 20, 2, 29, 3, 30, 34, 39, 4, 5, 40, 42, 44, 6, 47, 48, 7, 8, 52, 54]
- selectExpressions: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 9:double, DoubleColUnaryMinus(col 10:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 10:double) -> 11:double, DoubleColUnaryMinus(col 13:double)(children: DoubleColUnaryMinus(col 12:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 12:double) -> 13:double) -> 14:double, DoubleColMultiplyDoubleColumn(col 17:double, col 18:double)(children: DoubleColUnaryMinus(col 16:double)(children: DoubleColUnaryMinus(col 15:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 15:double) -> 16:double) -> 17:double, DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 18:double) -> 19:double, DoubleColUnaryMinus(col 0:double) -> 20:double, DoubleColMultiplyDoubleColumn(col 25:double, col 28:double)(children: DoubleColMultiplyDoubleColumn(col 23:double, col 24:double)(children: DoubleColUnaryMinus(col 22:double)(children: DoubleColUnaryM
inus(col 21:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 21:double) -> 22:double) -> 23:double, DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 24:double) -> 25:double, DoubleColUnaryMinus(col 27:double)(children: DoubleColUnaryMinus(col 26:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 26:double) -> 27:double) -> 28:double) -> 29:double, DoubleColUnaryMinus(col 2:double) -> 30:double, DoubleColSubtractDoubleColumn(col 2:double, col 33:double)(children: DoubleColUnaryMinus(col 32:double)(children: DoubleColUnaryMinus(col 31:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 31:double) -> 32:double) -> 33:double) -> 34:double, DoubleColMultiplyDoubleColumn(col 38:double, col 2:double)(children: DoubleColSubtractDoubleColumn(col 2:double, col 37:double)(children: DoubleColUnaryMinus(col 36:double)(children: DoubleColUnaryMinus(col 35:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0)
-> 35:double) -> 36:double) -> 37:double) -> 38:double) -> 39:double, DoubleScalarSubtractDoubleColumn(val 10.175, col 4:double) -> 40:double, DoubleColUnaryMinus(col 41:double)(children: DoubleScalarSubtractDoubleColumn(val 10.175, col 4:double) -> 41:double) -> 42:double, DoubleColDivideDoubleScalar(col 43:double, val -563.0)(children: DoubleColUnaryMinus(col 2:double) -> 43:double) -> 44:double, DoubleColUnaryMinus(col 46:double)(children: DoubleColDivideDoubleScalar(col 45:double, val -563.0)(children: DoubleColUnaryMinus(col 2:double) -> 45:double) -> 46:double) -> 47:double, DoubleColDivideDoubleColumn(col 0:double, col 1:double) -> 48:double, DoubleColDivideDoubleColumn(col 49:double, col 51:double)(children: CastLongToDouble(col 7:tinyint) -> 49:double, DoubleColDivideDoubleScalar(col 50:double, val -563.0)(children: DoubleColUnaryMinus(col 2:double) -> 50:double) -> 51:double) -> 52:double, DoubleColUnaryMinus(col 53:double)(children: DoubleColDivideDoubleColumn(col 0:doubl
e, col 1:double) -> 53:double) -> 54:double
Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/llap/vector_string_decimal.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_string_decimal.q.out b/ql/src/test/results/clientpositive/llap/vector_string_decimal.q.out
new file mode 100644
index 0000000..d792c46
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vector_string_decimal.q.out
@@ -0,0 +1,137 @@
+PREHOOK: query: drop table orc_decimal
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table orc_decimal
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table staging
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table staging
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table orc_decimal (id decimal(18,0)) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orc_decimal
+POSTHOOK: query: create table orc_decimal (id decimal(18,0)) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orc_decimal
+PREHOOK: query: create table staging (id decimal(18,0))
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@staging
+POSTHOOK: query: create table staging (id decimal(18,0))
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@staging
+PREHOOK: query: insert into staging values (34324.0), (100000000.0), (200000000.0), (300000000.0)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@staging
+POSTHOOK: query: insert into staging values (34324.0), (100000000.0), (200000000.0), (300000000.0)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@staging
+POSTHOOK: Lineage: staging.id SCRIPT []
+PREHOOK: query: insert overwrite table orc_decimal select id from staging
+PREHOOK: type: QUERY
+PREHOOK: Input: default@staging
+PREHOOK: Output: default@orc_decimal
+POSTHOOK: query: insert overwrite table orc_decimal select id from staging
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@staging
+POSTHOOK: Output: default@orc_decimal
+POSTHOOK: Lineage: orc_decimal.id SIMPLE [(staging)staging.FieldSchema(name:id, type:decimal(18,0), comment:null), ]
+PREHOOK: query: explain vectorization expression
+select * from orc_decimal where id in ('100000000', '200000000')
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization expression
+select * from orc_decimal where id in ('100000000', '200000000')
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: orc_decimal
+ Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterDoubleColumnInList(col 2:double, values [1.0E8, 2.0E8])(children: CastDecimalToDouble(col 0:decimal(18,0)) -> 2:double)
+ predicate: (UDFToDouble(id)) IN (1.0E8, 2.0E8) (type: boolean)
+ Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: id (type: decimal(18,0))
+ outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0]
+ Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select * from orc_decimal where id in ('100000000', '200000000')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_decimal
+#### A masked pattern was here ####
+POSTHOOK: query: select * from orc_decimal where id in ('100000000', '200000000')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_decimal
+#### A masked pattern was here ####
+100000000
+200000000
+PREHOOK: query: drop table orc_decimal
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@orc_decimal
+PREHOOK: Output: default@orc_decimal
+POSTHOOK: query: drop table orc_decimal
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@orc_decimal
+POSTHOOK: Output: default@orc_decimal
+PREHOOK: query: drop table staging
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@staging
+PREHOOK: Output: default@staging
+POSTHOOK: query: drop table staging
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@staging
+POSTHOOK: Output: default@staging
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/llap/vector_udf_string_to_boolean.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_udf_string_to_boolean.q.out b/ql/src/test/results/clientpositive/llap/vector_udf_string_to_boolean.q.out
new file mode 100644
index 0000000..647fcb7
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vector_udf_string_to_boolean.q.out
@@ -0,0 +1,189 @@
+PREHOOK: query: create table t (s string) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t
+POSTHOOK: query: create table t (s string) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t
+PREHOOK: query: insert into t values ('false')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values ('false')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.s SCRIPT []
+PREHOOK: query: insert into t values ('FALSE')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values ('FALSE')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.s SCRIPT []
+PREHOOK: query: insert into t values ('FaLsE')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values ('FaLsE')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.s SCRIPT []
+PREHOOK: query: insert into t values ('true')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values ('true')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.s SCRIPT []
+PREHOOK: query: insert into t values ('TRUE')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values ('TRUE')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.s SCRIPT []
+PREHOOK: query: insert into t values ('TrUe')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values ('TrUe')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.s SCRIPT []
+PREHOOK: query: insert into t values ('')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values ('')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.s SCRIPT []
+PREHOOK: query: insert into t values ('Other')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values ('Other')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.s SCRIPT []
+PREHOOK: query: insert into t values ('Off')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values ('Off')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.s SCRIPT []
+PREHOOK: query: insert into t values ('No')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values ('No')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.s SCRIPT []
+PREHOOK: query: insert into t values ('0')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values ('0')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.s SCRIPT []
+PREHOOK: query: insert into t values ('1')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values ('1')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.s SCRIPT []
+PREHOOK: query: explain select s,cast(s as boolean) from t order by s
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select s,cast(s as boolean) from t order by s
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t
+ Statistics: Num rows: 12 Data size: 1068 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: s (type: string), UDFToBoolean(s) (type: boolean)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12 Data size: 1116 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Statistics: Num rows: 12 Data size: 1116 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: boolean)
+ Execution mode: llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12 Data size: 1116 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 12 Data size: 1116 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select s,cast(s as boolean) from t order by s
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+#### A masked pattern was here ####
+POSTHOOK: query: select s,cast(s as boolean) from t order by s
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+#### A masked pattern was here ####
+ false
+0 false
+1 true
+FALSE false
+FaLsE false
+No false
+Off false
+Other true
+TRUE true
+TrUe true
+false false
+true true
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/llap/vectorization_0.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorization_0.q.out b/ql/src/test/results/clientpositive/llap/vectorization_0.q.out
index 2333716..49c3036 100644
--- a/ql/src/test/results/clientpositive/llap/vectorization_0.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorization_0.q.out
@@ -432,13 +432,12 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
Reducer 2
- Execution mode: vectorized, llap
+ Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "variance" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7)
@@ -940,13 +939,12 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
Reducer 2
- Execution mode: vectorized, llap
+ Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "variance" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7)
@@ -1448,13 +1446,12 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
Reducer 2
- Execution mode: vectorized, llap
+ Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "variance" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7)
@@ -1656,47 +1653,24 @@ STAGE PLANS:
partitionColumnCount: 0
scratchColumnTypeNames: [decimal(13,3), double]
Reducer 2
- Execution mode: vectorized, llap
+ Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- reduceColumnNullOrder:
- reduceColumnSortOrder:
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 6
- dataColumns: VALUE._col0:struct<count:bigint,sum:double,input:bigint>, VALUE._col1:struct<count:bigint,sum:double,variance:double>, VALUE._col2:struct<count:bigint,sum:double,variance:double>, VALUE._col3:bigint, VALUE._col4:double, VALUE._col5:tinyint
- partitionColumnCount: 0
- scratchColumnTypeNames: []
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: avg(VALUE._col0), stddev_pop(VALUE._col1), var_samp(VALUE._col2), count(VALUE._col3), sum(VALUE._col4), min(VALUE._col5)
- Group By Vectorization:
- aggregators: VectorUDAFAvgFinal(col 0:struct<count:bigint,sum:double,input:bigint>) -> double, VectorUDAFVarFinal(col 1:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFVarFinal(col 2:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_samp, VectorUDAFCountMerge(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFMinLong(col 5:tinyint) -> tinyint
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- native: false
- vectorProcessingMode: GLOBAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: double), (- _col0) (type: double), (-6432.0 + _col0) (type: double), _col1 (type: double), (- (-6432.0 + _col0)) (type: double), ((- (-6432.0 + _col0)) + (-6432.0 + _col0)) (type: double), _col2 (type: double), (- (-6432.0 + _col0)) (type: double), (-6432.0 + (- (-6432.0 + _col0))) (type: double), (- (-6432.0 + _col0)) (type: double), ((- (-6432.0 + _col0)) / (- (-6432.0 + _col0))) (type: double), _col3 (type: bigint), _col4 (type: double), (_col2 % _col1) (type: double), (- _col2) (type: double), ((- (-6432.0 + _col0)) * (- _col0)) (type: double), _col5 (type: tinyint), (- _col5) (type: tinyint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 6, 7, 1, 9, 11, 2, 10, 8, 13, 12, 3, 4, 14, 15, 18, 5, 19]
- selectExpressions: DoubleColUnaryMinus(col 0:double) -> 6:double, DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 7:double, DoubleColUnaryMinus(col 8:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 8:double) -> 9:double, DoubleColAddDoubleColumn(col 10:double, col 8:double)(children: DoubleColUnaryMinus(col 8:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 8:double) -> 10:double, DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 8:double) -> 11:double, DoubleColUnaryMinus(col 8:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 8:double) -> 10:double, DoubleScalarAddDoubleColumn(val -6432.0, col 12:double)(children: DoubleColUnaryMinus(col 8:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 8:double) -> 12:double) -> 8:double, DoubleColUnaryMinus(col 12:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 12:double)
-> 13:double, DoubleColDivideDoubleColumn(col 14:double, col 15:double)(children: DoubleColUnaryMinus(col 12:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 12:double) -> 14:double, DoubleColUnaryMinus(col 12:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 12:double) -> 15:double) -> 12:double, DoubleColModuloDoubleColumn(col 2:double, col 1:double) -> 14:double, DoubleColUnaryMinus(col 2:double) -> 15:double, DoubleColMultiplyDoubleColumn(col 17:double, col 16:double)(children: DoubleColUnaryMinus(col 16:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 16:double) -> 17:double, DoubleColUnaryMinus(col 0:double) -> 16:double) -> 18:double, LongColUnaryMinus(col 5:tinyint) -> 19:tinyint
Statistics: Num rows: 1 Data size: 136 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 1 Data size: 136 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/llap/vectorization_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorization_1.q.out b/ql/src/test/results/clientpositive/llap/vectorization_1.q.out
index 278bd0c..d2de8e7 100644
--- a/ql/src/test/results/clientpositive/llap/vectorization_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorization_1.q.out
@@ -119,47 +119,24 @@ STAGE PLANS:
partitionColumnCount: 0
scratchColumnTypeNames: [double]
Reducer 2
- Execution mode: vectorized, llap
+ Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- reduceColumnNullOrder:
- reduceColumnSortOrder:
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 6
- dataColumns: VALUE._col0:struct<count:bigint,sum:double,variance:double>, VALUE._col1:double, VALUE._col2:tinyint, VALUE._col3:int, VALUE._col4:struct<count:bigint,sum:double,variance:double>, VALUE._col5:bigint
- partitionColumnCount: 0
- scratchColumnTypeNames: []
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "var_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: var_pop(VALUE._col0), sum(VALUE._col1), max(VALUE._col2), max(VALUE._col3), var_samp(VALUE._col4), count(VALUE._col5)
- Group By Vectorization:
- aggregators: VectorUDAFVarFinal(col 0:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_pop, VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFMaxLong(col 2:tinyint) -> tinyint, VectorUDAFMaxLong(col 3:int) -> int, VectorUDAFVarFinal(col 4:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_samp, VectorUDAFCountMerge(col 5:bigint) -> bigint
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- native: false
- vectorProcessingMode: GLOBAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: double), (_col0 / -26.28) (type: double), _col1 (type: double), (-1.389 + _col1) (type: double), (_col1 * (-1.389 + _col1)) (type: double), _col2 (type: tinyint), (- (_col1 * (-1.389 + _col1))) (type: double), _col3 (type: int), (CAST( _col3 AS decimal(10,0)) * 79.553) (type: decimal(16,3)), _col4 (type: double), (10.175 % (- (_col1 * (-1.389 + _col1)))) (type: double), _col5 (type: bigint), (-563 % _col3) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 6, 1, 7, 9, 2, 8, 3, 12, 4, 13, 5, 14]
- selectExpressions: DoubleColDivideDoubleScalar(col 0:double, val -26.28) -> 6:double, DoubleScalarAddDoubleColumn(val -1.389, col 1:double) -> 7:double, DoubleColMultiplyDoubleColumn(col 1:double, col 8:double)(children: DoubleScalarAddDoubleColumn(val -1.389, col 1:double) -> 8:double) -> 9:double, DoubleColUnaryMinus(col 10:double)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 8:double)(children: DoubleScalarAddDoubleColumn(val -1.389, col 1:double) -> 8:double) -> 10:double) -> 8:double, DecimalColMultiplyDecimalScalar(col 11:decimal(10,0), val 79.553)(children: CastLongToDecimal(col 3:int) -> 11:decimal(10,0)) -> 12:decimal(16,3), DoubleScalarModuloDoubleColumn(val 10.175, col 10:double)(children: DoubleColUnaryMinus(col 13:double)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 10:double)(children: DoubleScalarAddDoubleColumn(val -1.389, col 1:double) -> 10:double) -> 13:double) -> 10:double) -> 13:double, LongScalarModuloLongColumn(v
al -563, col 3:int) -> 14:int
Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/llap/vectorization_12.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorization_12.q.out b/ql/src/test/results/clientpositive/llap/vectorization_12.q.out
index 1285b25..77f8e3b 100644
--- a/ql/src/test/results/clientpositive/llap/vectorization_12.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorization_12.q.out
@@ -146,31 +146,15 @@ STAGE PLANS:
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
- Execution mode: vectorized, llap
+ Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- reduceColumnNullOrder: aaaa
- reduceColumnSortOrder: ++++
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 9
- dataColumns: KEY._col0:double, KEY._col1:bigint, KEY._col2:string, KEY._col3:boolean, VALUE._col0:bigint, VALUE._col1:struct<count:bigint,sum:double,variance:double>, VALUE._col2:struct<count:bigint,sum:double,input:double>, VALUE._col3:bigint, VALUE._col4:struct<count:bigint,sum:double,variance:double>
- partitionColumnCount: 0
- scratchColumnTypeNames: []
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), avg(VALUE._col2), sum(VALUE._col3), stddev_pop(VALUE._col4)
- Group By Vectorization:
- aggregators: VectorUDAFCountMerge(col 4:bigint) -> bigint, VectorUDAFVarFinal(col 5:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFAvgFinal(col 6:struct<count:bigint,sum:double,input:double>) -> double, VectorUDAFSumLong(col 7:bigint) -> bigint, VectorUDAFVarFinal(col 8:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:double, col 1:bigint, col 2:string, col 3:boolean
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4]
keys: KEY._col0 (type: double), KEY._col1 (type: bigint), KEY._col2 (type: string), KEY._col3 (type: boolean)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
@@ -178,21 +162,10 @@ STAGE PLANS:
Select Operator
expressions: _col1 (type: bigint), _col3 (type: boolean), _col2 (type: string), _col0 (type: double), (-6432.0 * _col0) (type: double), (- _col1) (type: bigint), _col4 (type: bigint), (_col1 * _col4) (type: bigint), _col5 (type: double), ((-6432.0 * _col0) / -6432.0) (type: double), (- ((-6432.0 * _col0) / -6432.0)) (type: double), _col6 (type: double), (- (-6432.0 * _col0)) (type: double), (-5638.15 + CAST( _col1 AS decimal(19,0))) (type: decimal(22,2)), _col7 (type: bigint), (_col6 / (-6432.0 * _col0)) (type: double), (- (- ((-6432.0 * _col0) / -6432.0))) (type: double), (((-6432.0 * _col0) / -6432.0) + (- (-6432.0 * _col0))) (type: double), _col8 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col17, _col18, _col19
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [1, 3, 2, 0, 9, 10, 4, 11, 5, 13, 12, 6, 15, 17, 7, 18, 19, 14, 8]
- selectExpressions: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 9:double, LongColUnaryMinus(col 1:bigint) -> 10:bigint, LongColMultiplyLongColumn(col 1:bigint, col 4:bigint) -> 11:bigint, DoubleColDivideDoubleScalar(col 12:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 12:double) -> 13:double, DoubleColUnaryMinus(col 14:double)(children: DoubleColDivideDoubleScalar(col 12:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 12:double) -> 14:double) -> 12:double, DoubleColUnaryMinus(col 14:double)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 14:double) -> 15:double, DecimalScalarAddDecimalColumn(val -5638.15, col 16:decimal(19,0))(children: CastLongToDecimal(col 1:bigint) -> 16:decimal(19,0)) -> 17:decimal(22,2), DoubleColDivideDoubleColumn(col 6:double, col 14:double)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:
double) -> 14:double) -> 18:double, DoubleColUnaryMinus(col 14:double)(children: DoubleColUnaryMinus(col 19:double)(children: DoubleColDivideDoubleScalar(col 14:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 14:double) -> 19:double) -> 14:double) -> 19:double, DoubleColAddDoubleColumn(col 20:double, col 21:double)(children: DoubleColDivideDoubleScalar(col 14:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 14:double) -> 20:double, DoubleColUnaryMinus(col 14:double)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 14:double) -> 21:double) -> 14:double
Statistics: Num rows: 1 Data size: 338 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col3 (type: double), _col0 (type: bigint), _col2 (type: string)
sort order: +++
- Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
- keyColumnNums: [0, 1, 2]
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: [3, 9, 10, 4, 11, 5, 13, 12, 6, 15, 17, 7, 18, 19, 14, 8]
Statistics: Num rows: 1 Data size: 338 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: boolean), _col4 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: decimal(22,2)), _col14 (type: bigint), _col15 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double)
Reducer 3
[3/6] hive git commit: HIVE-18756: Vectorization: VectorUDAFVarFinal
produces Wrong Results (Matt McCline, reviewed by Sergey Shelukhin)
Posted by mm...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out
index 59a58e7..34b273c 100644
--- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out
+++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out
@@ -117,47 +117,23 @@ STAGE PLANS:
partitionColumnCount: 0
scratchColumnTypeNames: [double]
Reducer 2
- Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- reduceColumnNullOrder:
- reduceColumnSortOrder:
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 6
- dataColumns: VALUE._col0:struct<count:bigint,sum:double,variance:double>, VALUE._col1:double, VALUE._col2:tinyint, VALUE._col3:int, VALUE._col4:struct<count:bigint,sum:double,variance:double>, VALUE._col5:bigint
- partitionColumnCount: 0
- scratchColumnTypeNames: []
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "var_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: var_pop(VALUE._col0), sum(VALUE._col1), max(VALUE._col2), max(VALUE._col3), var_samp(VALUE._col4), count(VALUE._col5)
- Group By Vectorization:
- aggregators: VectorUDAFVarFinal(col 0:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_pop, VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFMaxLong(col 2:tinyint) -> tinyint, VectorUDAFMaxLong(col 3:int) -> int, VectorUDAFVarFinal(col 4:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_samp, VectorUDAFCountMerge(col 5:bigint) -> bigint
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- native: false
- vectorProcessingMode: GLOBAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: double), (_col0 / -26.28) (type: double), _col1 (type: double), (-1.389 + _col1) (type: double), (_col1 * (-1.389 + _col1)) (type: double), _col2 (type: tinyint), (- (_col1 * (-1.389 + _col1))) (type: double), _col3 (type: int), (CAST( _col3 AS decimal(10,0)) * 79.553) (type: decimal(16,3)), _col4 (type: double), (10.175 % (- (_col1 * (-1.389 + _col1)))) (type: double), _col5 (type: bigint), (-563 % _col3) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 6, 1, 7, 9, 2, 8, 3, 12, 4, 13, 5, 14]
- selectExpressions: DoubleColDivideDoubleScalar(col 0:double, val -26.28) -> 6:double, DoubleScalarAddDoubleColumn(val -1.389, col 1:double) -> 7:double, DoubleColMultiplyDoubleColumn(col 1:double, col 8:double)(children: DoubleScalarAddDoubleColumn(val -1.389, col 1:double) -> 8:double) -> 9:double, DoubleColUnaryMinus(col 10:double)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 8:double)(children: DoubleScalarAddDoubleColumn(val -1.389, col 1:double) -> 8:double) -> 10:double) -> 8:double, DecimalColMultiplyDecimalScalar(col 11:decimal(10,0), val 79.553)(children: CastLongToDecimal(col 3:int) -> 11:decimal(10,0)) -> 12:decimal(16,3), DoubleScalarModuloDoubleColumn(val 10.175, col 10:double)(children: DoubleColUnaryMinus(col 13:double)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 10:double)(children: DoubleScalarAddDoubleColumn(val -1.389, col 1:double) -> 10:double) -> 13:double) -> 10:double) -> 13:double, LongScalarModuloLongColumn(v
al -563, col 3:int) -> 14:int
Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out
index c624806..92cff3c 100644
--- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out
+++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out
@@ -144,31 +144,14 @@ STAGE PLANS:
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
- Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- reduceColumnNullOrder: aaaa
- reduceColumnSortOrder: ++++
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 9
- dataColumns: KEY._col0:double, KEY._col1:bigint, KEY._col2:string, KEY._col3:boolean, VALUE._col0:bigint, VALUE._col1:struct<count:bigint,sum:double,variance:double>, VALUE._col2:struct<count:bigint,sum:double,input:double>, VALUE._col3:bigint, VALUE._col4:struct<count:bigint,sum:double,variance:double>
- partitionColumnCount: 0
- scratchColumnTypeNames: []
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), avg(VALUE._col2), sum(VALUE._col3), stddev_pop(VALUE._col4)
- Group By Vectorization:
- aggregators: VectorUDAFCountMerge(col 4:bigint) -> bigint, VectorUDAFVarFinal(col 5:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFAvgFinal(col 6:struct<count:bigint,sum:double,input:double>) -> double, VectorUDAFSumLong(col 7:bigint) -> bigint, VectorUDAFVarFinal(col 8:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:double, col 1:bigint, col 2:string, col 3:boolean
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4]
keys: KEY._col0 (type: double), KEY._col1 (type: bigint), KEY._col2 (type: string), KEY._col3 (type: boolean)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
@@ -176,21 +159,10 @@ STAGE PLANS:
Select Operator
expressions: _col1 (type: bigint), _col3 (type: boolean), _col2 (type: string), _col0 (type: double), (-6432.0 * _col0) (type: double), (- _col1) (type: bigint), _col4 (type: bigint), (_col1 * _col4) (type: bigint), _col5 (type: double), ((-6432.0 * _col0) / -6432.0) (type: double), (- ((-6432.0 * _col0) / -6432.0)) (type: double), _col6 (type: double), (- (-6432.0 * _col0)) (type: double), (-5638.15 + CAST( _col1 AS decimal(19,0))) (type: decimal(22,2)), _col7 (type: bigint), (_col6 / (-6432.0 * _col0)) (type: double), (- (- ((-6432.0 * _col0) / -6432.0))) (type: double), (((-6432.0 * _col0) / -6432.0) + (- (-6432.0 * _col0))) (type: double), _col8 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col17, _col18, _col19
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [1, 3, 2, 0, 9, 10, 4, 11, 5, 13, 12, 6, 15, 17, 7, 18, 19, 14, 8]
- selectExpressions: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 9:double, LongColUnaryMinus(col 1:bigint) -> 10:bigint, LongColMultiplyLongColumn(col 1:bigint, col 4:bigint) -> 11:bigint, DoubleColDivideDoubleScalar(col 12:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 12:double) -> 13:double, DoubleColUnaryMinus(col 14:double)(children: DoubleColDivideDoubleScalar(col 12:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 12:double) -> 14:double) -> 12:double, DoubleColUnaryMinus(col 14:double)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 14:double) -> 15:double, DecimalScalarAddDecimalColumn(val -5638.15, col 16:decimal(19,0))(children: CastLongToDecimal(col 1:bigint) -> 16:decimal(19,0)) -> 17:decimal(22,2), DoubleColDivideDoubleColumn(col 6:double, col 14:double)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:
double) -> 14:double) -> 18:double, DoubleColUnaryMinus(col 14:double)(children: DoubleColUnaryMinus(col 19:double)(children: DoubleColDivideDoubleScalar(col 14:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 14:double) -> 19:double) -> 14:double) -> 19:double, DoubleColAddDoubleColumn(col 20:double, col 21:double)(children: DoubleColDivideDoubleScalar(col 14:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 14:double) -> 20:double, DoubleColUnaryMinus(col 14:double)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 14:double) -> 21:double) -> 14:double
Statistics: Num rows: 1877 Data size: 22524 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col3 (type: double), _col0 (type: bigint), _col2 (type: string)
sort order: +++
- Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
- keyColumnNums: [0, 1, 2]
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: [3, 9, 10, 4, 11, 5, 13, 12, 6, 15, 17, 7, 18, 19, 14, 8]
Statistics: Num rows: 1877 Data size: 22524 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: boolean), _col4 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: decimal(22,2)), _col14 (type: bigint), _col15 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double)
Reducer 3
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out
index 292b644..2251b11 100644
--- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out
+++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out
@@ -146,31 +146,14 @@ STAGE PLANS:
partitionColumnCount: 0
scratchColumnTypeNames: [double, decimal(11,4)]
Reducer 2
- Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- reduceColumnNullOrder: aaaaa
- reduceColumnSortOrder: +++++
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 11
- dataColumns: KEY._col0:boolean, KEY._col1:tinyint, KEY._col2:timestamp, KEY._col3:float, KEY._col4:string, VALUE._col0:tinyint, VALUE._col1:double, VALUE._col2:struct<count:bigint,sum:double,variance:double>, VALUE._col3:struct<count:bigint,sum:double,variance:double>, VALUE._col4:float, VALUE._col5:tinyint
- partitionColumnCount: 0
- scratchColumnTypeNames: []
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: max(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), min(VALUE._col5)
- Group By Vectorization:
- aggregators: VectorUDAFMaxLong(col 5:tinyint) -> tinyint, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFVarFinal(col 7:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFVarFinal(col 8:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFMaxDouble(col 9:float) -> float, VectorUDAFMinLong(col 10:tinyint) -> tinyint
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:boolean, col 1:tinyint, col 2:timestamp, col 3:float, col 4:string
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
@@ -178,21 +161,10 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), _col7 (type: double), (- _col6) (type: double), _col8 (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28 / (- (- _col6))) (type: double), _col9 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col10 (type: tinyint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 11, 5, 13, 6, 16, 15, 17, 7, 18, 8, 20, 22, 21, 9, 25, 10]
- selectExpressions: LongColUnaryMinus(col 1:tinyint) -> 11:tinyint, LongColAddLongColumn(col 12:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 12:tinyint) -> 13:tinyint, DoubleColMultiplyDoubleColumn(col 6:double, col 15:double)(children: CastLongToDouble(col 14:tinyint)(children: LongColAddLongColumn(col 12:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 12:tinyint) -> 14:tinyint) -> 15:double) -> 16:double, DoubleColUnaryMinus(col 6:double) -> 15:double, DoubleScalarMultiplyDoubleColumn(val 79.5530014038086, col 3:float) -> 17:float, DoubleColUnaryMinus(col 6:double) -> 18:double, DecimalColSubtractDecimalScalar(col 19:decimal(3,0), val 10.175)(children: CastLongToDecimal(col 14:tinyint)(children: LongColAddLongColumn(col 12:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 12:tinyint) -> 14:tinyint) -> 19:decimal(3,0)) -> 20:decimal(7,3), DoubleColUnaryMinus(col 21:double)(children: DoubleColUna
ryMinus(col 6:double) -> 21:double) -> 22:double, DoubleScalarDivideDoubleColumn(val -26.28, col 23:double)(children: DoubleColUnaryMinus(col 21:double)(children: DoubleColUnaryMinus(col 6:double) -> 21:double) -> 23:double) -> 21:double, DoubleColDivideDoubleColumn(col 24:double, col 23:double)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 23:double)(children: CastLongToDouble(col 14:tinyint)(children: LongColAddLongColumn(col 12:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 12:tinyint) -> 14:tinyint) -> 23:double) -> 24:double, CastLongToDouble(col 1:tinyint) -> 23:double) -> 25:double
Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), _col5 (type: tinyint), _col6 (type: tinyint), _col7 (type: tinyint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: decimal(7,3)), _col16 (type: double), _col17 (type: double), _col18 (type: float), _col19 (type: double), _col20 (type: tinyint)
sort order: +++++++++++++++++++++
- Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
- keyColumnNums: [0, 1, 2, 3, 4, 11, 5, 13, 6, 16, 15, 17, 7, 18, 8, 20, 22, 21, 9, 25, 10]
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: []
Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.1
Reducer 3
@@ -489,24 +461,14 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
Reducer 2
- Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: max(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), min(VALUE._col5)
- Group By Vectorization:
- aggregators: VectorUDAFMaxLong(col 5:tinyint) -> tinyint, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFVarFinal(col 7:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFVarFinal(col 8:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFMaxDouble(col 9:float) -> float, VectorUDAFMinLong(col 10:tinyint) -> tinyint
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:boolean, col 1:tinyint, col 2:timestamp, col 3:float, col 4:string
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
@@ -514,19 +476,10 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), _col7 (type: double), (- _col6) (type: double), _col8 (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28 / (- (- _col6))) (type: double), _col9 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col10 (type: tinyint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 11, 5, 13, 6, 16, 15, 17, 7, 18, 8, 20, 22, 21, 9, 25, 10]
- selectExpressions: LongColUnaryMinus(col 1:tinyint) -> 11:tinyint, LongColAddLongColumn(col 12:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 12:tinyint) -> 13:tinyint, DoubleColMultiplyDoubleColumn(col 6:double, col 15:double)(children: CastLongToDouble(col 14:tinyint)(children: LongColAddLongColumn(col 12:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 12:tinyint) -> 14:tinyint) -> 15:double) -> 16:double, DoubleColUnaryMinus(col 6:double) -> 15:double, DoubleScalarMultiplyDoubleColumn(val 79.5530014038086, col 3:float) -> 17:float, DoubleColUnaryMinus(col 6:double) -> 18:double, DecimalColSubtractDecimalScalar(col 19:decimal(3,0), val 10.175)(children: CastLongToDecimal(col 14:tinyint)(children: LongColAddLongColumn(col 12:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 12:tinyint) -> 14:tinyint) -> 19:decimal(3,0)) -> 20:decimal(7,3), DoubleColUnaryMinus(col 21:double)(children: DoubleColUna
ryMinus(col 6:double) -> 21:double) -> 22:double, DoubleScalarDivideDoubleColumn(val -26.28, col 23:double)(children: DoubleColUnaryMinus(col 21:double)(children: DoubleColUnaryMinus(col 6:double) -> 21:double) -> 23:double) -> 21:double, DoubleColDivideDoubleColumn(col 24:double, col 23:double)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 23:double)(children: CastLongToDouble(col 14:tinyint)(children: LongColAddLongColumn(col 12:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 12:tinyint) -> 14:tinyint) -> 23:double) -> 24:double, CastLongToDouble(col 1:tinyint) -> 23:double) -> 25:double
Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), _col5 (type: tinyint), _col6 (type: tinyint), _col7 (type: tinyint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: decimal(7,3)), _col16 (type: double), _col17 (type: double), _col18 (type: float), _col19 (type: double), _col20 (type: tinyint)
sort order: +++++++++++++++++++++
- Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.1
Reducer 3
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out
index 653a7bf..f5b2896 100644
--- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out
+++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out
@@ -147,31 +147,14 @@ STAGE PLANS:
partitionColumnCount: 0
scratchColumnTypeNames: [double, double]
Reducer 2
- Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- reduceColumnNullOrder: aaaaa
- reduceColumnSortOrder: +++++
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 11
- dataColumns: KEY._col0:string, KEY._col1:float, KEY._col2:double, KEY._col3:timestamp, KEY._col4:boolean, VALUE._col0:struct<count:bigint,sum:double,variance:double>, VALUE._col1:float, VALUE._col2:struct<count:bigint,sum:double,variance:double>, VALUE._col3:bigint, VALUE._col4:struct<count:bigint,sum:double,variance:double>, VALUE._col5:struct<count:bigint,sum:double,variance:double>
- partitionColumnCount: 0
- scratchColumnTypeNames: []
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: stddev_samp(VALUE._col0), max(VALUE._col1), stddev_pop(VALUE._col2), count(VALUE._col3), var_pop(VALUE._col4), var_samp(VALUE._col5)
- Group By Vectorization:
- aggregators: VectorUDAFVarFinal(col 5:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFMaxDouble(col 6:float) -> float, VectorUDAFVarFinal(col 7:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFCountMerge(col 8:bigint) -> bigint, VectorUDAFVarFinal(col 9:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_pop, VectorUDAFVarFinal(col 10:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_samp
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:string, col 1:float, col 2:double, col 3:timestamp, col 4:boolean
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
keys: KEY._col0 (type: string), KEY._col1 (type: float), KEY._col2 (type: double), KEY._col3 (type: timestamp), KEY._col4 (type: boolean)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
@@ -179,21 +162,10 @@ STAGE PLANS:
Select Operator
expressions: _col3 (type: timestamp), _col1 (type: float), _col0 (type: string), _col4 (type: boolean), _col2 (type: double), (-26.28 + _col2) (type: double), (- (-26.28 + _col2)) (type: double), _col5 (type: double), (_col1 * -26.28) (type: float), _col6 (type: float), (- _col1) (type: float), (- _col6) (type: float), ((- (-26.28 + _col2)) / 10.175) (type: double), _col7 (type: double), _col8 (type: bigint), (- ((- (-26.28 + _col2)) / 10.175)) (type: double), (-1.389 % _col5) (type: double), (UDFToDouble(_col1) - _col2) (type: double), _col9 (type: double), (_col9 % 10.175) (type: double), _col10 (type: double), (- (UDFToDouble(_col1) - _col2)) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [3, 1, 0, 4, 2, 11, 13, 5, 12, 6, 14, 15, 16, 7, 8, 18, 17, 19, 9, 20, 10, 22]
- selectExpressions: DoubleScalarAddDoubleColumn(val -26.28, col 2:double) -> 11:double, DoubleColUnaryMinus(col 12:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 2:double) -> 12:double) -> 13:double, DoubleColMultiplyDoubleScalar(col 1:float, val -26.280000686645508) -> 12:float, DoubleColUnaryMinus(col 1:float) -> 14:float, DoubleColUnaryMinus(col 6:float) -> 15:float, DoubleColDivideDoubleScalar(col 17:double, val 10.175)(children: DoubleColUnaryMinus(col 16:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 2:double) -> 16:double) -> 17:double) -> 16:double, DoubleColUnaryMinus(col 17:double)(children: DoubleColDivideDoubleScalar(col 18:double, val 10.175)(children: DoubleColUnaryMinus(col 17:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 2:double) -> 17:double) -> 18:double) -> 17:double) -> 18:double, DoubleScalarModuloDoubleColumn(val -1.389, col 5:double) -> 17:double, DoubleColSubtractDoubleColumn(col 1:double, co
l 2:double)(children: col 1:float) -> 19:double, DoubleColModuloDoubleScalar(col 9:double, val 10.175) -> 20:double, DoubleColUnaryMinus(col 21:double)(children: DoubleColSubtractDoubleColumn(col 1:double, col 2:double)(children: col 1:float) -> 21:double) -> 22:double
Statistics: Num rows: 303 Data size: 3636 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp)
sort order: ++++
- Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
- keyColumnNums: [0, 1, 2, 3]
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: [4, 11, 13, 5, 12, 6, 14, 15, 16, 7, 8, 18, 17, 19, 9, 20, 10, 22]
Statistics: Num rows: 303 Data size: 3636 Basic stats: COMPLETE Column stats: NONE
value expressions: _col3 (type: boolean), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: float), _col10 (type: float), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double)
Reducer 3
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out
index 336aa65..94fd08c 100644
--- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out
+++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out
@@ -119,31 +119,14 @@ STAGE PLANS:
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
- Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- reduceColumnNullOrder: aaa
- reduceColumnSortOrder: +++
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 6
- dataColumns: KEY._col0:double, KEY._col1:string, KEY._col2:timestamp, VALUE._col0:bigint, VALUE._col1:struct<count:bigint,sum:double,variance:double>, VALUE._col2:double
- partitionColumnCount: 0
- scratchColumnTypeNames: []
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), min(VALUE._col2)
- Group By Vectorization:
- aggregators: VectorUDAFCountMerge(col 3:bigint) -> bigint, VectorUDAFVarFinal(col 4:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFMinDouble(col 5:double) -> double
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:double, col 1:string, col 2:timestamp
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0, 1, 2]
keys: KEY._col0 (type: double), KEY._col1 (type: string), KEY._col2 (type: timestamp)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
@@ -151,17 +134,9 @@ STAGE PLANS:
Select Operator
expressions: _col1 (type: string), _col0 (type: double), _col2 (type: timestamp), (_col0 - 9763215.5639) (type: double), (- (_col0 - 9763215.5639)) (type: double), _col3 (type: bigint), _col4 (type: double), (- _col4) (type: double), (_col4 * UDFToDouble(_col3)) (type: double), _col5 (type: double), (9763215.5639 / _col0) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), _col4 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [1, 0, 2, 6, 8, 3, 4, 7, 10, 5, 9, 12, 4]
- selectExpressions: DoubleColSubtractDoubleScalar(col 0:double, val 9763215.5639) -> 6:double, DoubleColUnaryMinus(col 7:double)(children: DoubleColSubtractDoubleScalar(col 0:double, val 9763215.5639) -> 7:double) -> 8:double, DoubleColUnaryMinus(col 4:double) -> 7:double, DoubleColMultiplyDoubleColumn(col 4:double, col 9:double)(children: CastLongToDouble(col 3:bigint) -> 9:double) -> 10:double, DoubleScalarDivideDoubleColumn(val 9763215.5639, col 0:double) -> 9:double, DecimalColDivideDecimalScalar(col 11:decimal(19,0), val -1.389)(children: CastLongToDecimal(col 3:bigint) -> 11:decimal(19,0)) -> 12:decimal(28,6)
Statistics: Num rows: 2048 Data size: 24576 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 2048 Data size: 24576 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out
index a4d1a39..f8802e4 100644
--- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out
+++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out
@@ -121,47 +121,23 @@ STAGE PLANS:
partitionColumnCount: 0
scratchColumnTypeNames: [double]
Reducer 2
- Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- reduceColumnNullOrder:
- reduceColumnSortOrder:
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 6
- dataColumns: VALUE._col0:struct<count:bigint,sum:double,input:smallint>, VALUE._col1:double, VALUE._col2:struct<count:bigint,sum:double,variance:double>, VALUE._col3:bigint, VALUE._col4:tinyint, VALUE._col5:struct<count:bigint,sum:double,input:double>
- partitionColumnCount: 0
- scratchColumnTypeNames: []
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "var_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: avg(VALUE._col0), sum(VALUE._col1), var_pop(VALUE._col2), count(VALUE._col3), min(VALUE._col4), avg(VALUE._col5)
- Group By Vectorization:
- aggregators: VectorUDAFAvgFinal(col 0:struct<count:bigint,sum:double,input:smallint>) -> double, VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFVarFinal(col 2:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_pop, VectorUDAFCountMerge(col 3:bigint) -> bigint, VectorUDAFMinLong(col 4:tinyint) -> tinyint, VectorUDAFAvgFinal(col 5:struct<count:bigint,sum:double,input:double>) -> double
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- native: false
- vectorProcessingMode: GLOBAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: double), (_col0 % -563.0) (type: double), (_col0 + 762.0) (type: double), _col1 (type: double), _col2 (type: double), (- _col2) (type: double), (_col1 - _col0) (type: double), _col3 (type: bigint), (- (_col1 - _col0)) (type: double), (_col2 - 762.0) (type: double), _col4 (type: tinyint), ((- _col2) + UDFToDouble(_col4)) (type: double), _col5 (type: double), (((- _col2) + UDFToDouble(_col4)) - _col1) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 6, 7, 1, 2, 8, 9, 3, 11, 10, 4, 14, 5, 12]
- selectExpressions: DoubleColModuloDoubleScalar(col 0:double, val -563.0) -> 6:double, DoubleColAddDoubleScalar(col 0:double, val 762.0) -> 7:double, DoubleColUnaryMinus(col 2:double) -> 8:double, DoubleColSubtractDoubleColumn(col 1:double, col 0:double) -> 9:double, DoubleColUnaryMinus(col 10:double)(children: DoubleColSubtractDoubleColumn(col 1:double, col 0:double) -> 10:double) -> 11:double, DoubleColSubtractDoubleScalar(col 2:double, val 762.0) -> 10:double, DoubleColAddDoubleColumn(col 12:double, col 13:double)(children: DoubleColUnaryMinus(col 2:double) -> 12:double, CastLongToDouble(col 4:tinyint) -> 13:double) -> 14:double, DoubleColSubtractDoubleColumn(col 15:double, col 1:double)(children: DoubleColAddDoubleColumn(col 12:double, col 13:double)(children: DoubleColUnaryMinus(col 2:double) -> 12:double, CastLongToDouble(col 4:tinyint) -> 13:double) -> 15:double) -> 12:double
Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out
index 59bf2ce..91b98b4 100644
--- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out
+++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out
@@ -126,47 +126,23 @@ STAGE PLANS:
partitionColumnCount: 0
scratchColumnTypeNames: [double, decimal(22,3), decimal(8,3)]
Reducer 2
- Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- reduceColumnNullOrder:
- reduceColumnSortOrder:
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 6
- dataColumns: VALUE._col0:struct<count:bigint,sum:double,variance:double>, VALUE._col1:struct<count:bigint,sum:double,variance:double>, VALUE._col2:struct<count:bigint,sum:double,variance:double>, VALUE._col3:double, VALUE._col4:struct<count:bigint,sum:double,input:int>, VALUE._col5:struct<count:bigint,sum:double,variance:double>
- partitionColumnCount: 0
- scratchColumnTypeNames: []
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: stddev_samp(VALUE._col0), stddev_pop(VALUE._col1), stddev_samp(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), stddev_pop(VALUE._col5)
- Group By Vectorization:
- aggregators: VectorUDAFVarFinal(col 0:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFVarFinal(col 1:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFVarFinal(col 2:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFAvgFinal(col 4:struct<count:bigint,sum:double,input:int>) -> double, VectorUDAFVarFinal(col 5:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- native: false
- vectorProcessingMode: GLOBAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: double), (_col0 - 10.175) (type: double), _col1 (type: double), (_col0 * (_col0 - 10.175)) (type: double), (- _col1) (type: double), (_col0 % 79.553) (type: double), (- (_col0 * (_col0 - 10.175))) (type: double), _col2 (type: double), (- _col0) (type: double), _col3 (type: double), ((- (_col0 * (_col0 - 10.175))) / (_col0 - 10.175)) (type: double), (- (_col0 - 10.175)) (type: double), _col4 (type: double), (-3728.0 - _col0) (type: double), _col5 (type: double), (_col4 / _col2) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 6, 1, 8, 7, 9, 10, 2, 11, 3, 14, 13, 4, 12, 5, 15]
- selectExpressions: DoubleColSubtractDoubleScalar(col 0:double, val 10.175) -> 6:double, DoubleColMultiplyDoubleColumn(col 0:double, col 7:double)(children: DoubleColSubtractDoubleScalar(col 0:double, val 10.175) -> 7:double) -> 8:double, DoubleColUnaryMinus(col 1:double) -> 7:double, DoubleColModuloDoubleScalar(col 0:double, val 79.553) -> 9:double, DoubleColUnaryMinus(col 11:double)(children: DoubleColMultiplyDoubleColumn(col 0:double, col 10:double)(children: DoubleColSubtractDoubleScalar(col 0:double, val 10.175) -> 10:double) -> 11:double) -> 10:double, DoubleColUnaryMinus(col 0:double) -> 11:double, DoubleColDivideDoubleColumn(col 12:double, col 13:double)(children: DoubleColUnaryMinus(col 13:double)(children: DoubleColMultiplyDoubleColumn(col 0:double, col 12:double)(children: DoubleColSubtractDoubleScalar(col 0:double, val 10.175) -> 12:double) -> 13:double) -> 12:double, DoubleColSubtractDoubleScalar(col 0:double, val 10.175) -> 13:double) -> 14:double,
DoubleColUnaryMinus(col 12:double)(children: DoubleColSubtractDoubleScalar(col 0:double, val 10.175) -> 12:double) -> 13:double, DoubleScalarSubtractDoubleColumn(val -3728.0, col 0:double) -> 12:double, DoubleColDivideDoubleColumn(col 4:double, col 2:double) -> 15:double
Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out
index 17024bc..1d0de53 100644
--- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out
+++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out
@@ -121,47 +121,23 @@ STAGE PLANS:
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
- Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- reduceColumnNullOrder:
- reduceColumnSortOrder:
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 5
- dataColumns: VALUE._col0:bigint, VALUE._col1:struct<count:bigint,sum:double,variance:double>, VALUE._col2:struct<count:bigint,sum:double,input:double>, VALUE._col3:struct<count:bigint,sum:double,variance:double>, VALUE._col4:tinyint
- partitionColumnCount: 0
- scratchColumnTypeNames: []
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: sum(VALUE._col0), stddev_pop(VALUE._col1), avg(VALUE._col2), var_pop(VALUE._col3), min(VALUE._col4)
- Group By Vectorization:
- aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint, VectorUDAFVarFinal(col 1:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFAvgFinal(col 2:struct<count:bigint,sum:double,input:double>) -> double, VectorUDAFVarFinal(col 3:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_pop, VectorUDAFMinLong(col 4:tinyint) -> tinyint
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- native: false
- vectorProcessingMode: GLOBAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4]
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: bigint), (_col0 * -563) (type: bigint), (-3728 + _col0) (type: bigint), _col1 (type: double), (- _col1) (type: double), _col2 (type: double), ((_col0 * -563) % _col0) (type: bigint), (UDFToDouble(((_col0 * -563) % _col0)) / _col2) (type: double), _col3 (type: double), (- (UDFToDouble(((_col0 * -563) % _col0)) / _col2)) (type: double), ((-3728 + _col0) - (_col0 * -563)) (type: bigint), _col4 (type: tinyint), _col4 (type: tinyint), (UDFToDouble(_col4) * (- (UDFToDouble(((_col0 * -563) % _col0)) / _col2))) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 5, 6, 1, 7, 2, 9, 12, 3, 11, 14, 4, 4, 16]
- selectExpressions: LongColMultiplyLongScalar(col 0:bigint, val -563) -> 5:bigint, LongScalarAddLongColumn(val -3728, col 0:bigint) -> 6:bigint, DoubleColUnaryMinus(col 1:double) -> 7:double, LongColModuloLongColumn(col 8:bigint, col 0:bigint)(children: LongColMultiplyLongScalar(col 0:bigint, val -563) -> 8:bigint) -> 9:bigint, DoubleColDivideDoubleColumn(col 11:double, col 2:double)(children: CastLongToDouble(col 10:bigint)(children: LongColModuloLongColumn(col 8:bigint, col 0:bigint)(children: LongColMultiplyLongScalar(col 0:bigint, val -563) -> 8:bigint) -> 10:bigint) -> 11:double) -> 12:double, DoubleColUnaryMinus(col 13:double)(children: DoubleColDivideDoubleColumn(col 11:double, col 2:double)(children: CastLongToDouble(col 10:bigint)(children: LongColModuloLongColumn(col 8:bigint, col 0:bigint)(children: LongColMultiplyLongScalar(col 0:bigint, val -563) -> 8:bigint) -> 10:bigint) -> 11:double) -> 13:double) -> 11:double, LongColSubtractLongColumn(col 8:big
int, col 10:bigint)(children: LongScalarAddLongColumn(val -3728, col 0:bigint) -> 8:bigint, LongColMultiplyLongScalar(col 0:bigint, val -563) -> 10:bigint) -> 14:bigint, DoubleColMultiplyDoubleColumn(col 13:double, col 15:double)(children: CastLongToDouble(col 4:tinyint) -> 13:double, DoubleColUnaryMinus(col 16:double)(children: DoubleColDivideDoubleColumn(col 15:double, col 2:double)(children: CastLongToDouble(col 10:bigint)(children: LongColModuloLongColumn(col 8:bigint, col 0:bigint)(children: LongColMultiplyLongScalar(col 0:bigint, val -563) -> 8:bigint) -> 10:bigint) -> 15:double) -> 16:double) -> 15:double) -> 16:double
Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out
index 336aa65..94fd08c 100644
--- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out
+++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out
@@ -119,31 +119,14 @@ STAGE PLANS:
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
- Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- reduceColumnNullOrder: aaa
- reduceColumnSortOrder: +++
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 6
- dataColumns: KEY._col0:double, KEY._col1:string, KEY._col2:timestamp, VALUE._col0:bigint, VALUE._col1:struct<count:bigint,sum:double,variance:double>, VALUE._col2:double
- partitionColumnCount: 0
- scratchColumnTypeNames: []
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), min(VALUE._col2)
- Group By Vectorization:
- aggregators: VectorUDAFCountMerge(col 3:bigint) -> bigint, VectorUDAFVarFinal(col 4:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFMinDouble(col 5:double) -> double
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:double, col 1:string, col 2:timestamp
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0, 1, 2]
keys: KEY._col0 (type: double), KEY._col1 (type: string), KEY._col2 (type: timestamp)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
@@ -151,17 +134,9 @@ STAGE PLANS:
Select Operator
expressions: _col1 (type: string), _col0 (type: double), _col2 (type: timestamp), (_col0 - 9763215.5639) (type: double), (- (_col0 - 9763215.5639)) (type: double), _col3 (type: bigint), _col4 (type: double), (- _col4) (type: double), (_col4 * UDFToDouble(_col3)) (type: double), _col5 (type: double), (9763215.5639 / _col0) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), _col4 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [1, 0, 2, 6, 8, 3, 4, 7, 10, 5, 9, 12, 4]
- selectExpressions: DoubleColSubtractDoubleScalar(col 0:double, val 9763215.5639) -> 6:double, DoubleColUnaryMinus(col 7:double)(children: DoubleColSubtractDoubleScalar(col 0:double, val 9763215.5639) -> 7:double) -> 8:double, DoubleColUnaryMinus(col 4:double) -> 7:double, DoubleColMultiplyDoubleColumn(col 4:double, col 9:double)(children: CastLongToDouble(col 3:bigint) -> 9:double) -> 10:double, DoubleScalarDivideDoubleColumn(val 9763215.5639, col 0:double) -> 9:double, DecimalColDivideDecimalScalar(col 11:decimal(19,0), val -1.389)(children: CastLongToDecimal(col 3:bigint) -> 11:decimal(19,0)) -> 12:decimal(28,6)
Statistics: Num rows: 2048 Data size: 24576 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 2048 Data size: 24576 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out b/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out
index 9880bd7..bbe9431 100644
--- a/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out
@@ -299,55 +299,27 @@ STAGE PLANS:
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
- Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- reduceColumnNullOrder: a
- reduceColumnSortOrder: +
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 16
- dataColumns: KEY._col0:int, VALUE._col0:bigint, VALUE._col1:decimal(20,10), VALUE._col2:decimal(20,10), VALUE._col3:decimal(30,10), VALUE._col4:struct<count:bigint,sum:decimal(30,10),input:decimal(20,10)>, VALUE._col5:struct<count:bigint,sum:double,variance:double>, VALUE._col6:struct<count:bigint,sum:double,variance:double>, VALUE._col7:bigint, VALUE._col8:decimal(23,14), VALUE._col9:decimal(23,14), VALUE._col10:decimal(33,14), VALUE._col11:struct<count:bigint,sum:decimal(33,14),input:decimal(23,14)>, VALUE._col12:struct<count:bigint,sum:double,variance:double>, VALUE._col13:struct<count:bigint,sum:double,variance:double>, VALUE._col14:bigint
- partitionColumnCount: 0
- scratchColumnTypeNames: []
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), stddev_pop(VALUE._col5), stddev_samp(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9), sum(VALUE._col10), avg(VALUE._col11), stddev_pop(VALUE._col12), stddev_samp(VALUE._col13), count(VALUE._col14)
- Group By Vectorization:
- aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint, VectorUDAFMaxDecimal(col 2:decimal(20,10)) -> decimal(20,10), VectorUDAFMinDecimal(col 3:decimal(20,10)) -> decimal(20,10), VectorUDAFSumDecimal(col 4:decimal(30,10)) -> decimal(30,10), VectorUDAFAvgDecimalFinal(col 5:struct<count:bigint,sum:decimal(30,10),input:decimal(20,10)>) -> decimal(24,14), VectorUDAFVarFinal(col 6:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFVarFinal(col 7:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFCountMerge(col 8:bigint) -> bigint, VectorUDAFMaxDecimal(col 9:decimal(23,14)) -> decimal(23,14), VectorUDAFMinDecimal(col 10:decimal(23,14)) -> decimal(23,14), VectorUDAFSumDecimal(col 11:decimal(33,14)) -> decimal(33,14), VectorUDAFAvgDecimalFinal(col 12:struct<count:bigint,sum:decimal(33,14),input:decimal(23,14)>) -> decimal(27,18), VectorUDAFVarFinal(col 13:struct<count:bigint,sum:do
uble,variance:double>) -> double aggregation: stddev_pop, VectorUDAFVarFinal(col 14:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFCountMerge(col 15:bigint) -> bigint
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:int
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15
Statistics: Num rows: 6144 Data size: 1082441 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterLongColGreaterLongScalar(col 15:bigint, val 1)
predicate: (_col15 > 1) (type: boolean)
Statistics: Num rows: 2048 Data size: 360813 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: decimal(24,14)), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: decimal(23,14)), _col10 (type: decimal(23,14)), _col11 (type: decimal(33,14)), _col12 (type: decimal(27,18)), _col13 (type: double), _col14 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
Statistics: Num rows: 2048 Data size: 360813 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 2048 Data size: 360813 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -706,55 +678,27 @@ STAGE PLANS:
partitionColumnCount: 0
scratchColumnTypeNames: [decimal(11,5), decimal(11,5), decimal(16,0), decimal(16,0)]
Reducer 2
- Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- reduceColumnNullOrder: a
- reduceColumnSortOrder: +
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 16
- dataColumns: KEY._col0:int, VALUE._col0:bigint, VALUE._col1:decimal(11,5), VALUE._col2:decimal(11,5), VALUE._col3:decimal(21,5), VALUE._col4:struct<count:bigint,sum:decimal(21,5),input:decimal(11,5)>, VALUE._col5:struct<count:bigint,sum:double,variance:double>, VALUE._col6:struct<count:bigint,sum:double,variance:double>, VALUE._col7:bigint, VALUE._col8:decimal(16,0), VALUE._col9:decimal(16,0), VALUE._col10:decimal(26,0), VALUE._col11:struct<count:bigint,sum:decimal(26,0),input:decimal(16,0)>, VALUE._col12:struct<count:bigint,sum:double,variance:double>, VALUE._col13:struct<count:bigint,sum:double,variance:double>, VALUE._col14:bigint
- partitionColumnCount: 0
- scratchColumnTypeNames: []
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), stddev_pop(VALUE._col5), stddev_samp(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9), sum(VALUE._col10), avg(VALUE._col11), stddev_pop(VALUE._col12), stddev_samp(VALUE._col13), count(VALUE._col14)
- Group By Vectorization:
- aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint, VectorUDAFMaxDecimal(col 2:decimal(11,5)) -> decimal(11,5), VectorUDAFMinDecimal(col 3:decimal(11,5)) -> decimal(11,5), VectorUDAFSumDecimal(col 4:decimal(21,5)) -> decimal(21,5), VectorUDAFAvgDecimalFinal(col 5:struct<count:bigint,sum:decimal(21,5),input:decimal(11,5)>) -> decimal(15,9), VectorUDAFVarFinal(col 6:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFVarFinal(col 7:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFCountMerge(col 8:bigint) -> bigint, VectorUDAFMaxDecimal(col 9:decimal(16,0)) -> decimal(16,0), VectorUDAFMinDecimal(col 10:decimal(16,0)) -> decimal(16,0), VectorUDAFSumDecimal(col 11:decimal(26,0)) -> decimal(26,0), VectorUDAFAvgDecimalFinal(col 12:struct<count:bigint,sum:decimal(26,0),input:decimal(16,0)>) -> decimal(20,4), VectorUDAFVarFinal(col 13:struct<count:bigint,sum:double,variance:doub
le>) -> double aggregation: stddev_pop, VectorUDAFVarFinal(col 14:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFCountMerge(col 15:bigint) -> bigint
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:int
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15
Statistics: Num rows: 6144 Data size: 173221 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterLongColGreaterLongScalar(col 15:bigint, val 1)
predicate: (_col15 > 1) (type: boolean)
Statistics: Num rows: 2048 Data size: 57740 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(11,5)), _col3 (type: decimal(11,5)), _col4 (type: decimal(21,5)), _col5 (type: decimal(15,9)), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: decimal(16,0)), _col10 (type: decimal(16,0)), _col11 (type: decimal(26,0)), _col12 (type: decimal(20,4)), _col13 (type: double), _col14 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
Statistics: Num rows: 2048 Data size: 57740 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 2048 Data size: 57740 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/spark/vectorization_0.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_0.q.out b/ql/src/test/results/clientpositive/spark/vectorization_0.q.out
index 8faa73a..3b5189f 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_0.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_0.q.out
@@ -426,13 +426,11 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
Reducer 2
- Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "variance" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7)
@@ -928,13 +926,11 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
Reducer 2
- Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "variance" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7)
@@ -1430,13 +1426,11 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
Reducer 2
- Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "variance" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7)
@@ -1636,47 +1630,23 @@ STAGE PLANS:
partitionColumnCount: 0
scratchColumnTypeNames: [decimal(13,3), double]
Reducer 2
- Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- reduceColumnNullOrder:
- reduceColumnSortOrder:
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 6
- dataColumns: VALUE._col0:struct<count:bigint,sum:double,input:bigint>, VALUE._col1:struct<count:bigint,sum:double,variance:double>, VALUE._col2:struct<count:bigint,sum:double,variance:double>, VALUE._col3:bigint, VALUE._col4:double, VALUE._col5:tinyint
- partitionColumnCount: 0
- scratchColumnTypeNames: []
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: avg(VALUE._col0), stddev_pop(VALUE._col1), var_samp(VALUE._col2), count(VALUE._col3), sum(VALUE._col4), min(VALUE._col5)
- Group By Vectorization:
- aggregators: VectorUDAFAvgFinal(col 0:struct<count:bigint,sum:double,input:bigint>) -> double, VectorUDAFVarFinal(col 1:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFVarFinal(col 2:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_samp, VectorUDAFCountMerge(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFMinLong(col 5:tinyint) -> tinyint
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- native: false
- vectorProcessingMode: GLOBAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: double), (- _col0) (type: double), (-6432.0 + _col0) (type: double), _col1 (type: double), (- (-6432.0 + _col0)) (type: double), ((- (-6432.0 + _col0)) + (-6432.0 + _col0)) (type: double), _col2 (type: double), (- (-6432.0 + _col0)) (type: double), (-6432.0 + (- (-6432.0 + _col0))) (type: double), (- (-6432.0 + _col0)) (type: double), ((- (-6432.0 + _col0)) / (- (-6432.0 + _col0))) (type: double), _col3 (type: bigint), _col4 (type: double), (_col2 % _col1) (type: double), (- _col2) (type: double), ((- (-6432.0 + _col0)) * (- _col0)) (type: double), _col5 (type: tinyint), (- _col5) (type: tinyint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 6, 7, 1, 9, 11, 2, 10, 8, 13, 12, 3, 4, 14, 15, 18, 5, 19]
- selectExpressions: DoubleColUnaryMinus(col 0:double) -> 6:double, DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 7:double, DoubleColUnaryMinus(col 8:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 8:double) -> 9:double, DoubleColAddDoubleColumn(col 10:double, col 8:double)(children: DoubleColUnaryMinus(col 8:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 8:double) -> 10:double, DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 8:double) -> 11:double, DoubleColUnaryMinus(col 8:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 8:double) -> 10:double, DoubleScalarAddDoubleColumn(val -6432.0, col 12:double)(children: DoubleColUnaryMinus(col 8:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 8:double) -> 12:double) -> 8:double, DoubleColUnaryMinus(col 12:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 12:double)
-> 13:double, DoubleColDivideDoubleColumn(col 14:double, col 15:double)(children: DoubleColUnaryMinus(col 12:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 12:double) -> 14:double, DoubleColUnaryMinus(col 12:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 12:double) -> 15:double) -> 12:double, DoubleColModuloDoubleColumn(col 2:double, col 1:double) -> 14:double, DoubleColUnaryMinus(col 2:double) -> 15:double, DoubleColMultiplyDoubleColumn(col 17:double, col 16:double)(children: DoubleColUnaryMinus(col 16:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 16:double) -> 17:double, DoubleColUnaryMinus(col 0:double) -> 16:double) -> 18:double, LongColUnaryMinus(col 5:tinyint) -> 19:tinyint
Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/spark/vectorization_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_1.q.out b/ql/src/test/results/clientpositive/spark/vectorization_1.q.out
index 3e8e391..8090d90 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_1.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_1.q.out
@@ -117,47 +117,23 @@ STAGE PLANS:
partitionColumnCount: 0
scratchColumnTypeNames: [double]
Reducer 2
- Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- reduceColumnNullOrder:
- reduceColumnSortOrder:
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 6
- dataColumns: VALUE._col0:struct<count:bigint,sum:double,variance:double>, VALUE._col1:double, VALUE._col2:tinyint, VALUE._col3:int, VALUE._col4:struct<count:bigint,sum:double,variance:double>, VALUE._col5:bigint
- partitionColumnCount: 0
- scratchColumnTypeNames: []
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "var_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: var_pop(VALUE._col0), sum(VALUE._col1), max(VALUE._col2), max(VALUE._col3), var_samp(VALUE._col4), count(VALUE._col5)
- Group By Vectorization:
- aggregators: VectorUDAFVarFinal(col 0:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_pop, VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFMaxLong(col 2:tinyint) -> tinyint, VectorUDAFMaxLong(col 3:int) -> int, VectorUDAFVarFinal(col 4:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_samp, VectorUDAFCountMerge(col 5:bigint) -> bigint
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- native: false
- vectorProcessingMode: GLOBAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: double), (_col0 / -26.28) (type: double), _col1 (type: double), (-1.389 + _col1) (type: double), (_col1 * (-1.389 + _col1)) (type: double), _col2 (type: tinyint), (- (_col1 * (-1.389 + _col1))) (type: double), _col3 (type: int), (CAST( _col3 AS decimal(10,0)) * 79.553) (type: decimal(16,3)), _col4 (type: double), (10.175 % (- (_col1 * (-1.389 + _col1)))) (type: double), _col5 (type: bigint), (-563 % _col3) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 6, 1, 7, 9, 2, 8, 3, 12, 4, 13, 5, 14]
- selectExpressions: DoubleColDivideDoubleScalar(col 0:double, val -26.28) -> 6:double, DoubleScalarAddDoubleColumn(val -1.389, col 1:double) -> 7:double, DoubleColMultiplyDoubleColumn(col 1:double, col 8:double)(children: DoubleScalarAddDoubleColumn(val -1.389, col 1:double) -> 8:double) -> 9:double, DoubleColUnaryMinus(col 10:double)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 8:double)(children: DoubleScalarAddDoubleColumn(val -1.389, col 1:double) -> 8:double) -> 10:double) -> 8:double, DecimalColMultiplyDecimalScalar(col 11:decimal(10,0), val 79.553)(children: CastLongToDecimal(col 3:int) -> 11:decimal(10,0)) -> 12:decimal(16,3), DoubleScalarModuloDoubleColumn(val 10.175, col 10:double)(children: DoubleColUnaryMinus(col 13:double)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 10:double)(children: DoubleScalarAddDoubleColumn(val -1.389, col 1:double) -> 10:double) -> 13:double) -> 10:double) -> 13:double, LongScalarModuloLongColumn(v
al -563, col 3:int) -> 14:int
Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
[4/6] hive git commit: HIVE-18756: Vectorization: VectorUDAFVarFinal
produces Wrong Results (Matt McCline, reviewed by Sergey Shelukhin)
Posted by mm...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out
index 0027ab5..bd5e284 100644
--- a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out
@@ -142,40 +142,24 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
Reducer 2
- Execution mode: vectorized, llap
+ Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: avg(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_samp(VALUE._col3), var_samp(VALUE._col4), avg(VALUE._col5), stddev_samp(VALUE._col6), min(VALUE._col7), count(VALUE._col8)
- Group By Vectorization:
- aggregators: VectorUDAFAvgFinal(col 0:struct<count:bigint,sum:double,input:int>) -> double, VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFVarFinal(col 2:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFVarFinal(col 3:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFVarFinal(col 4:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_samp, VectorUDAFAvgFinal(col 5:struct<count:bigint,sum:double,input:float>) -> double, VectorUDAFVarFinal(col 6:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFMinLong(col 7:tinyint) -> tinyint, VectorUDAFCountMerge(col 8:bigint) -> bigint
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- native: false
- vectorProcessingMode: GLOBAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8]
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Statistics: Num rows: 1 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: double), (_col0 + -3728.0) (type: double), (- (_col0 + -3728.0)) (type: double), (- (- (_col0 + -3728.0))) (type: double), ((- (- (_col0 + -3728.0))) * (_col0 + -3728.0)) (type: double), _col1 (type: double), (- _col0) (type: double), _col2 (type: double), (((- (- (_col0 + -3728.0))) * (_col0 + -3728.0)) * (- (- (_col0 + -3728.0)))) (type: double), _col3 (type: double), (- _col2) (type: double), (_col2 - (- (- (_col0 + -3728.0)))) (type: double), ((_col2 - (- (- (_col0 + -3728.0)))) * _col2) (type: double), _col4 (type: double), _col5 (type: double), (10.175 - _col4) (type: double), (- (10.175 - _col4)) (type: double), ((- _col2) / -563.0) (type: double), _col6 (type: double), (- ((- _col2) / -563.0)) (type: double), (_col0 / _col1) (type: double), _col7 (type: tinyint), _col8 (type: bigint), (UDFToDouble(_col7) / ((- _col2) / -563.0)) (type: double), (- (_col0 / _col1)) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 9, 11, 10, 14, 1, 12, 2, 15, 3, 13, 17, 16, 4, 5, 18, 20, 21, 6, 19, 22, 7, 8, 24, 25]
- selectExpressions: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 9:double, DoubleColUnaryMinus(col 10:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 10:double) -> 11:double, DoubleColUnaryMinus(col 12:double)(children: DoubleColUnaryMinus(col 10:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 10:double) -> 12:double) -> 10:double, DoubleColMultiplyDoubleColumn(col 12:double, col 13:double)(children: DoubleColUnaryMinus(col 13:double)(children: DoubleColUnaryMinus(col 12:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 12:double) -> 13:double) -> 12:double, DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 13:double) -> 14:double, DoubleColUnaryMinus(col 0:double) -> 12:double, DoubleColMultiplyDoubleColumn(col 16:double, col 13:double)(children: DoubleColMultiplyDoubleColumn(col 13:double, col 15:double)(children: DoubleColUnaryMinus(col 15:double)(children: DoubleColUnaryM
inus(col 13:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 13:double) -> 15:double) -> 13:double, DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 15:double) -> 16:double, DoubleColUnaryMinus(col 15:double)(children: DoubleColUnaryMinus(col 13:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 13:double) -> 15:double) -> 13:double) -> 15:double, DoubleColUnaryMinus(col 2:double) -> 13:double, DoubleColSubtractDoubleColumn(col 2:double, col 16:double)(children: DoubleColUnaryMinus(col 17:double)(children: DoubleColUnaryMinus(col 16:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0) -> 16:double) -> 17:double) -> 16:double) -> 17:double, DoubleColMultiplyDoubleColumn(col 18:double, col 2:double)(children: DoubleColSubtractDoubleColumn(col 2:double, col 16:double)(children: DoubleColUnaryMinus(col 18:double)(children: DoubleColUnaryMinus(col 16:double)(children: DoubleColAddDoubleScalar(col 0:double, val -3728.0)
-> 16:double) -> 18:double) -> 16:double) -> 18:double) -> 16:double, DoubleScalarSubtractDoubleColumn(val 10.175, col 4:double) -> 18:double, DoubleColUnaryMinus(col 19:double)(children: DoubleScalarSubtractDoubleColumn(val 10.175, col 4:double) -> 19:double) -> 20:double, DoubleColDivideDoubleScalar(col 19:double, val -563.0)(children: DoubleColUnaryMinus(col 2:double) -> 19:double) -> 21:double, DoubleColUnaryMinus(col 22:double)(children: DoubleColDivideDoubleScalar(col 19:double, val -563.0)(children: DoubleColUnaryMinus(col 2:double) -> 19:double) -> 22:double) -> 19:double, DoubleColDivideDoubleColumn(col 0:double, col 1:double) -> 22:double, DoubleColDivideDoubleColumn(col 23:double, col 25:double)(children: CastLongToDouble(col 7:tinyint) -> 23:double, DoubleColDivideDoubleScalar(col 24:double, val -563.0)(children: DoubleColUnaryMinus(col 2:double) -> 24:double) -> 25:double) -> 24:double, DoubleColUnaryMinus(col 23:double)(children: DoubleColDivideDoubleColumn(col 0:doubl
e, col 1:double) -> 23:double) -> 25:double
Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -403,40 +387,24 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
Reducer 2
- Execution mode: vectorized, llap
+ Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "var_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: max(VALUE._col0), var_pop(VALUE._col1), stddev_pop(VALUE._col2), max(VALUE._col3), avg(VALUE._col4), min(VALUE._col5), min(VALUE._col6), stddev_samp(VALUE._col7), var_samp(VALUE._col8)
- Group By Vectorization:
- aggregators: VectorUDAFMaxLong(col 0:int) -> int, VectorUDAFVarFinal(col 1:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_pop, VectorUDAFVarFinal(col 2:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFMaxDouble(col 3:double) -> double, VectorUDAFAvgFinal(col 4:struct<count:bigint,sum:double,input:tinyint>) -> double, VectorUDAFMinLong(col 5:int) -> int, VectorUDAFMinDouble(col 6:double) -> double, VectorUDAFVarFinal(col 7:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFVarFinal(col 8:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_samp
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- native: false
- vectorProcessingMode: GLOBAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8]
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: int), (UDFToDouble(_col0) / -3728.0) (type: double), (_col0 * -3728) (type: int), _col1 (type: double), (- (_col0 * -3728)) (type: int), _col2 (type: double), (-563 % (_col0 * -3728)) (type: int), (_col1 / _col2) (type: double), (- _col2) (type: double), _col3 (type: double), _col4 (type: double), (_col2 - 10.175) (type: double), _col5 (type: int), (UDFToDouble((_col0 * -3728)) % (_col2 - 10.175)) (type: double), (- _col3) (type: double), _col6 (type: double), (_col3 % -26.28) (type: double), _col7 (type: double), (- (UDFToDouble(_col0) / -3728.0)) (type: double), ((- (_col0 * -3728)) % (-563 % (_col0 * -3728))) (type: int), ((UDFToDouble(_col0) / -3728.0) - _col4) (type: double), (- (_col0 * -3728)) (type: int), _col8 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 10, 11, 1, 13, 2, 14, 9, 15, 3, 4, 16, 5, 19, 17, 6, 18, 7, 20, 12, 21, 23, 8]
- selectExpressions: DoubleColDivideDoubleScalar(col 9:double, val -3728.0)(children: CastLongToDouble(col 0:int) -> 9:double) -> 10:double, LongColMultiplyLongScalar(col 0:int, val -3728) -> 11:int, LongColUnaryMinus(col 12:int)(children: LongColMultiplyLongScalar(col 0:int, val -3728) -> 12:int) -> 13:int, LongScalarModuloLongColumn(val -563, col 12:int)(children: LongColMultiplyLongScalar(col 0:int, val -3728) -> 12:int) -> 14:int, DoubleColDivideDoubleColumn(col 1:double, col 2:double) -> 9:double, DoubleColUnaryMinus(col 2:double) -> 15:double, DoubleColSubtractDoubleScalar(col 2:double, val 10.175) -> 16:double, DoubleColModuloDoubleColumn(col 17:double, col 18:double)(children: CastLongToDouble(col 12:int)(children: LongColMultiplyLongScalar(col 0:int, val -3728) -> 12:int) -> 17:double, DoubleColSubtractDoubleScalar(col 2:double, val 10.175) -> 18:double) -> 19:double, DoubleColUnaryMinus(col 3:double) -> 17:double, DoubleColModuloDoubleScalar(col 3:doubl
e, val -26.28) -> 18:double, DoubleColUnaryMinus(col 21:double)(children: DoubleColDivideDoubleScalar(col 20:double, val -3728.0)(children: CastLongToDouble(col 0:int) -> 20:double) -> 21:double) -> 20:double, LongColModuloLongColumn(col 22:int, col 23:int)(children: LongColUnaryMinus(col 12:int)(children: LongColMultiplyLongScalar(col 0:int, val -3728) -> 12:int) -> 22:int, LongScalarModuloLongColumn(val -563, col 12:int)(children: LongColMultiplyLongScalar(col 0:int, val -3728) -> 12:int) -> 23:int) -> 12:int, DoubleColSubtractDoubleColumn(col 24:double, col 4:double)(children: DoubleColDivideDoubleScalar(col 21:double, val -3728.0)(children: CastLongToDouble(col 0:int) -> 21:double) -> 24:double) -> 21:double, LongColUnaryMinus(col 22:int)(children: LongColMultiplyLongScalar(col 0:int, val -3728) -> 22:int) -> 23:int
Statistics: Num rows: 1 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 1 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -519,7 +487,7 @@ WHERE (((cbigint <= 197)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@alltypesorc
#### A masked pattern was here ####
--20301111 5445.576984978541 -1626869520 7.9684972882908944E16 1626869520 NULL -563 NULL NULL NULL -8.935323383084578 NULL -1069736047 NULL NULL NULL NULL NULL -5445.576984978541 -58 5454.512308361625 1626869520 7.2647256545687792E16
+-20301111 5445.576984978541 -1626869520 7.9684972882908944E16 1626869520 NULL -563 NULL NULL NULL -8.935323383084578 NULL -1069736047 NULL NULL NULL NULL NULL -5445.576984978541 511 5454.512308361625 1626869520 7.2647256545687792E16
PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT VAR_POP(cbigint),
(-(VAR_POP(cbigint))),
@@ -656,40 +624,24 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
Reducer 2
- Execution mode: vectorized, llap
+ Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "var_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: var_pop(VALUE._col0), count(VALUE._col1), max(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), stddev_samp(VALUE._col5), count(VALUE._col6), avg(VALUE._col7)
- Group By Vectorization:
- aggregators: VectorUDAFVarFinal(col 0:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_pop, VectorUDAFCountMerge(col 1:bigint) -> bigint, VectorUDAFMaxLong(col 2:tinyint) -> tinyint, VectorUDAFVarFinal(col 3:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFMaxLong(col 4:int) -> int, VectorUDAFVarFinal(col 5:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFCountMerge(col 6:bigint) -> bigint, VectorUDAFAvgFinal(col 7:struct<count:bigint,sum:double,input:tinyint>) -> double
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- native: false
- vectorProcessingMode: GLOBAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7]
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: double), (- _col0) (type: double), (_col0 - (- _col0)) (type: double), _col1 (type: bigint), (CAST( _col1 AS decimal(19,0)) % 79.553) (type: decimal(5,3)), _col2 (type: tinyint), (UDFToDouble(_col1) - (- _col0)) (type: double), (- (- _col0)) (type: double), (-1.0 % (- _col0)) (type: double), _col1 (type: bigint), (- _col1) (type: bigint), _col3 (type: double), (- (- (- _col0))) (type: double), (762 * (- _col1)) (type: bigint), _col4 (type: int), (UDFToLong(_col2) + (762 * (- _col1))) (type: bigint), ((- _col0) + UDFToDouble(_col4)) (type: double), _col5 (type: double), ((- _col1) % _col1) (type: bigint), _col6 (type: bigint), _col7 (type: double), (-3728 % (UDFToLong(_col2) + (762 * (- _col1)))) (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 8, 10, 1, 12, 2, 14, 13, 15, 1, 16, 3, 9, 19, 4, 18, 22, 5, 23, 6, 7, 24]
- selectExpressions: DoubleColUnaryMinus(col 0:double) -> 8:double, DoubleColSubtractDoubleColumn(col 0:double, col 9:double)(children: DoubleColUnaryMinus(col 0:double) -> 9:double) -> 10:double, DecimalColModuloDecimalScalar(col 11:decimal(19,0), val 79.553)(children: CastLongToDecimal(col 1:bigint) -> 11:decimal(19,0)) -> 12:decimal(5,3), DoubleColSubtractDoubleColumn(col 9:double, col 13:double)(children: CastLongToDouble(col 1:bigint) -> 9:double, DoubleColUnaryMinus(col 0:double) -> 13:double) -> 14:double, DoubleColUnaryMinus(col 9:double)(children: DoubleColUnaryMinus(col 0:double) -> 9:double) -> 13:double, DoubleScalarModuloDoubleColumn(val -1.0, col 9:double)(children: DoubleColUnaryMinus(col 0:double) -> 9:double) -> 15:double, LongColUnaryMinus(col 1:bigint) -> 16:bigint, DoubleColUnaryMinus(col 17:double)(children: DoubleColUnaryMinus(col 9:double)(children: DoubleColUnaryMinus(col 0:double) -> 9:double) -> 17:double) -> 9:double, LongScalarMultiply
LongColumn(val 762, col 18:bigint)(children: LongColUnaryMinus(col 1:bigint) -> 18:bigint) -> 19:bigint, LongColAddLongColumn(col 2:bigint, col 20:bigint)(children: col 2:tinyint, LongScalarMultiplyLongColumn(val 762, col 18:bigint)(children: LongColUnaryMinus(col 1:bigint) -> 18:bigint) -> 20:bigint) -> 18:bigint, DoubleColAddDoubleColumn(col 17:double, col 21:double)(children: DoubleColUnaryMinus(col 0:double) -> 17:double, CastLongToDouble(col 4:int) -> 21:double) -> 22:double, LongColModuloLongColumn(col 20:bigint, col 1:bigint)(children: LongColUnaryMinus(col 1:bigint) -> 20:bigint) -> 23:bigint, LongScalarModuloLongColumn(val -3728, col 20:bigint)(children: LongColAddLongColumn(col 2:bigint, col 24:bigint)(children: col 2:tinyint, LongScalarMultiplyLongColumn(val 762, col 20:bigint)(children: LongColUnaryMinus(col 1:bigint) -> 20:bigint) -> 24:bigint) -> 20:bigint) -> 24:bigint
Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -888,40 +840,24 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
Reducer 2
- Execution mode: vectorized, llap
+ Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: avg(VALUE._col0), max(VALUE._col1), stddev_samp(VALUE._col2), var_pop(VALUE._col3), var_pop(VALUE._col4), max(VALUE._col5)
- Group By Vectorization:
- aggregators: VectorUDAFAvgFinal(col 0:struct<count:bigint,sum:double,input:tinyint>) -> double, VectorUDAFMaxLong(col 1:bigint) -> bigint, VectorUDAFVarFinal(col 2:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFVarFinal(col 3:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_pop, VectorUDAFVarFinal(col 4:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_pop, VectorUDAFMaxDouble(col 5:float) -> float
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- native: false
- vectorProcessingMode: GLOBAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: double), (_col0 + 6981.0) (type: double), ((_col0 + 6981.0) + _col0) (type: double), _col1 (type: bigint), (((_col0 + 6981.0) + _col0) / _col0) (type: double), (- (_col0 + 6981.0)) (type: double), _col2 (type: double), (_col0 % (- (_col0 + 6981.0))) (type: double), _col3 (type: double), _col4 (type: double), (- _col1) (type: bigint), (UDFToDouble((- _col1)) / _col2) (type: double), _col5 (type: float), (_col4 * -26.28) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 6, 8, 1, 7, 10, 2, 9, 3, 4, 12, 14, 5, 11]
- selectExpressions: DoubleColAddDoubleScalar(col 0:double, val 6981.0) -> 6:double, DoubleColAddDoubleColumn(col 7:double, col 0:double)(children: DoubleColAddDoubleScalar(col 0:double, val 6981.0) -> 7:double) -> 8:double, DoubleColDivideDoubleColumn(col 9:double, col 0:double)(children: DoubleColAddDoubleColumn(col 7:double, col 0:double)(children: DoubleColAddDoubleScalar(col 0:double, val 6981.0) -> 7:double) -> 9:double) -> 7:double, DoubleColUnaryMinus(col 9:double)(children: DoubleColAddDoubleScalar(col 0:double, val 6981.0) -> 9:double) -> 10:double, DoubleColModuloDoubleColumn(col 0:double, col 11:double)(children: DoubleColUnaryMinus(col 9:double)(children: DoubleColAddDoubleScalar(col 0:double, val 6981.0) -> 9:double) -> 11:double) -> 9:double, LongColUnaryMinus(col 1:bigint) -> 12:bigint, DoubleColDivideDoubleColumn(col 11:double, col 2:double)(children: CastLongToDouble(col 13:bigint)(children: LongColUnaryMinus(col 1:bigint) -> 13:bigint) -> 11:do
uble) -> 14:double, DoubleColMultiplyDoubleScalar(col 4:double, val -26.28) -> 11:double
Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -2225,24 +2161,15 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
Reducer 2
- Execution mode: vectorized, llap
+ Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: stddev_samp(VALUE._col0), sum(VALUE._col1), var_pop(VALUE._col2), count(VALUE._col3)
- Group By Vectorization:
- aggregators: VectorUDAFVarFinal(col 1:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFSumLong(col 2:bigint) -> bigint, VectorUDAFVarFinal(col 3:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_pop, VectorUDAFCountMerge(col 4:bigint) -> bigint
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:smallint
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0, 1, 2, 3]
keys: KEY._col0 (type: smallint)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4
@@ -2250,19 +2177,10 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: smallint), (UDFToInteger(_col0) % -75) (type: int), _col1 (type: double), (-1.389 / CAST( _col0 AS decimal(5,0))) (type: decimal(10,9)), _col2 (type: bigint), (UDFToDouble((UDFToInteger(_col0) % -75)) / UDFToDouble(_col2)) (type: double), (- (UDFToInteger(_col0) % -75)) (type: int), _col3 (type: double), (- (- (UDFToInteger(_col0) % -75))) (type: int), _col4 (type: bigint), (_col4 - -89010) (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 5, 1, 7, 2, 11, 12, 3, 8, 4, 13]
- selectExpressions: LongColModuloLongScalar(col 0:int, val -75)(children: col 0:smallint) -> 5:int, DecimalScalarDivideDecimalColumn(val -1.389, col 6:decimal(5,0))(children: CastLongToDecimal(col 0:smallint) -> 6:decimal(5,0)) -> 7:decimal(10,9), DoubleColDivideDoubleColumn(col 9:double, col 10:double)(children: CastLongToDouble(col 8:int)(children: LongColModuloLongScalar(col 0:int, val -75)(children: col 0:smallint) -> 8:int) -> 9:double, CastLongToDouble(col 2:bigint) -> 10:double) -> 11:double, LongColUnaryMinus(col 8:int)(children: LongColModuloLongScalar(col 0:int, val -75)(children: col 0:smallint) -> 8:int) -> 12:int, LongColUnaryMinus(col 13:int)(children: LongColUnaryMinus(col 8:int)(children: LongColModuloLongScalar(col 0:int, val -75)(children: col 0:smallint) -> 8:int) -> 13:int) -> 8:int, LongColSubtractLongScalar(col 4:bigint, val -89010) -> 13:bigint
Statistics: Num rows: 1141 Data size: 199664 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: double), _col3 (type: decimal(10,9)), _col4 (type: bigint), _col5 (type: double), _col6 (type: int), _col7 (type: double), _col8 (type: int), _col9 (type: bigint), _col10 (type: bigint)
sort order: +++++++++++
- Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 1141 Data size: 199664 Basic stats: COMPLETE Column stats: COMPLETE
TopN Hash Memory Usage: 0.1
Reducer 3
@@ -2501,24 +2419,15 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
Reducer 2
- Execution mode: vectorized, llap
+ Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "var_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceSampleEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: var_samp(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), var_pop(VALUE._col3), stddev_pop(VALUE._col4), sum(VALUE._col5)
- Group By Vectorization:
- aggregators: VectorUDAFVarFinal(col 1:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_samp, VectorUDAFCountMerge(col 2:bigint) -> bigint, VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFVarFinal(col 4:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_pop, VectorUDAFVarFinal(col 5:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFSumDouble(col 6:double) -> double
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:double
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
keys: KEY._col0 (type: double)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
@@ -2526,19 +2435,10 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: double), _col1 (type: double), (2563.58 * _col1) (type: double), (- _col1) (type: double), _col2 (type: bigint), ((2563.58 * _col1) + -5638.15) (type: double), ((- _col1) * ((2563.58 * _col1) + -5638.15)) (type: double), _col3 (type: double), _col4 (type: double), (_col0 - (- _col1)) (type: double), _col5 (type: double), (_col0 + _col1) (type: double), (_col0 * 762.0) (type: double), _col6 (type: double), (-863.257 % (_col0 * 762.0)) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1, 7, 8, 2, 10, 11, 3, 4, 12, 5, 9, 13, 6, 15]
- selectExpressions: DoubleScalarMultiplyDoubleColumn(val 2563.58, col 1:double) -> 7:double, DoubleColUnaryMinus(col 1:double) -> 8:double, DoubleColAddDoubleScalar(col 9:double, val -5638.15)(children: DoubleScalarMultiplyDoubleColumn(val 2563.58, col 1:double) -> 9:double) -> 10:double, DoubleColMultiplyDoubleColumn(col 9:double, col 12:double)(children: DoubleColUnaryMinus(col 1:double) -> 9:double, DoubleColAddDoubleScalar(col 11:double, val -5638.15)(children: DoubleScalarMultiplyDoubleColumn(val 2563.58, col 1:double) -> 11:double) -> 12:double) -> 11:double, DoubleColSubtractDoubleColumn(col 0:double, col 9:double)(children: DoubleColUnaryMinus(col 1:double) -> 9:double) -> 12:double, DoubleColAddDoubleColumn(col 0:double, col 1:double) -> 9:double, DoubleColMultiplyDoubleScalar(col 0:double, val 762.0) -> 13:double, DoubleScalarModuloDoubleColumn(val -863.257, col 14:double)(children: DoubleColMultiplyDoubleScalar(col 0:double, val 762.0) -> 14:double) -
> 15:double
Statistics: Num rows: 1136 Data size: 143112 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: double)
sort order: +
- Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 1136 Data size: 143112 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: double), _col14 (type: double)
Reducer 3
@@ -2821,24 +2721,15 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
Reducer 2
- Execution mode: vectorized, llap
+ Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: stddev_pop(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), min(VALUE._col3), var_samp(VALUE._col4), var_pop(VALUE._col5), avg(VALUE._col6), var_samp(VALUE._col7), avg(VALUE._col8), min(VALUE._col9), var_pop(VALUE._col10), stddev_pop(VALUE._col11), sum(VALUE._col12)
- Group By Vectorization:
- aggregators: VectorUDAFVarFinal(col 2:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFAvgFinal(col 3:struct<count:bigint,sum:double,input:smallint>) -> double, VectorUDAFCountMerge(col 4:bigint) -> bigint, VectorUDAFMinLong(col 5:tinyint) -> tinyint, VectorUDAFVarFinal(col 6:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_samp, VectorUDAFVarFinal(col 7:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_pop, VectorUDAFAvgFinal(col 8:struct<count:bigint,sum:double,input:int>) -> double, VectorUDAFVarFinal(col 9:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_samp, VectorUDAFAvgFinal(col 10:struct<count:bigint,sum:double,input:float>) -> double, VectorUDAFMinDouble(col 11:double) -> double, VectorUDAFVarFinal(col 12:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_pop, VectorUDAFVarFinal(col 13:struct<count:bigint,
sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFSumLong(col 14:bigint) -> bigint
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:timestamp, col 1:string
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
keys: KEY._col0 (type: timestamp), KEY._col1 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
@@ -2846,19 +2737,10 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: timestamp), _col1 (type: string), _col2 (type: double), (_col2 * 10.175) (type: double), (- _col2) (type: double), _col3 (type: double), (- _col2) (type: double), (-26.28 - _col2) (type: double), _col4 (type: bigint), (- _col4) (type: bigint), ((-26.28 - _col2) * (- _col2)) (type: double), _col5 (type: tinyint), (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4))) (type: double), (- (_col2 * 10.175)) (type: double), _col6 (type: double), (_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) (type: double), (- (- _col2)) (type: double), (UDFToDouble((- _col4)) / _col2) (type: double), _col7 (type: double), (10.175 / _col3) (type: double), _col8 (type: double), _col9 (type: double), ((_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) - (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) (type: double), (- (- (_col2 * 10.175))) (type: double), _col10 (type: double), (((_col6 + (((-26.28 - _col2) * (-
_col2)) * UDFToDouble((- _col4)))) - (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) * 10.175) (type: double), (10.175 % (10.175 / _col3)) (type: double), (- _col5) (type: tinyint), _col11 (type: double), _col12 (type: double), (- ((-26.28 - _col2) * (- _col2))) (type: double), ((- _col2) % _col10) (type: double), (-26.28 / CAST( (- _col5) AS decimal(3,0))) (type: decimal(8,6)), _col13 (type: double), _col14 (type: bigint), ((_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) / _col7) (type: double), (- (- _col4)) (type: bigint), _col4 (type: bigint), ((_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) % -26.28) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1, 2, 15, 16, 3, 17, 18, 4, 19, 22, 5, 21, 23, 6, 20, 26, 27, 7, 25, 8, 9, 29, 28, 10, 30, 32, 24, 11, 12, 31, 34, 37, 13, 14, 38, 40, 4, 39]
- selectExpressions: DoubleColMultiplyDoubleScalar(col 2:double, val 10.175) -> 15:double, DoubleColUnaryMinus(col 2:double) -> 16:double, DoubleColUnaryMinus(col 2:double) -> 17:double, DoubleScalarSubtractDoubleColumn(val -26.28, col 2:double) -> 18:double, LongColUnaryMinus(col 4:bigint) -> 19:bigint, DoubleColMultiplyDoubleColumn(col 20:double, col 21:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2:double) -> 20:double, DoubleColUnaryMinus(col 2:double) -> 21:double) -> 22:double, DoubleColMultiplyDoubleColumn(col 23:double, col 20:double)(children: DoubleColMultiplyDoubleColumn(col 20:double, col 21:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2:double) -> 20:double, DoubleColUnaryMinus(col 2:double) -> 21:double) -> 23:double, CastLongToDouble(col 24:bigint)(children: LongColUnaryMinus(col 4:bigint) -> 24:bigint) -> 20:double) -> 21:double, DoubleColUnaryMinus(col 20:double)(children: DoubleColMultiplyDoubleScalar(
col 2:double, val 10.175) -> 20:double) -> 23:double, DoubleColAddDoubleColumn(col 6:double, col 25:double)(children: DoubleColMultiplyDoubleColumn(col 26:double, col 20:double)(children: DoubleColMultiplyDoubleColumn(col 20:double, col 25:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2:double) -> 20:double, DoubleColUnaryMinus(col 2:double) -> 25:double) -> 26:double, CastLongToDouble(col 24:bigint)(children: LongColUnaryMinus(col 4:bigint) -> 24:bigint) -> 20:double) -> 25:double) -> 20:double, DoubleColUnaryMinus(col 25:double)(children: DoubleColUnaryMinus(col 2:double) -> 25:double) -> 26:double, DoubleColDivideDoubleColumn(col 25:double, col 2:double)(children: CastLongToDouble(col 24:bigint)(children: LongColUnaryMinus(col 4:bigint) -> 24:bigint) -> 25:double) -> 27:double, DoubleScalarDivideDoubleColumn(val 10.175, col 3:double) -> 25:double, DoubleColSubtractDoubleColumn(col 28:double, col 30:double)(children: DoubleColAddDoubleColumn(col 6:double, col
29:double)(children: DoubleColMultiplyDoubleColumn(col 30:double, col 28:double)(children: DoubleColMultiplyDoubleColumn(col 28:double, col 29:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2:double) -> 28:double, DoubleColUnaryMinus(col 2:double) -> 29:double) -> 30:double, CastLongToDouble(col 24:bigint)(children: LongColUnaryMinus(col 4:bigint) -> 24:bigint) -> 28:double) -> 29:double) -> 28:double, DoubleColMultiplyDoubleColumn(col 31:double, col 29:double)(children: DoubleColMultiplyDoubleColumn(col 29:double, col 30:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2:double) -> 29:double, DoubleColUnaryMinus(col 2:double) -> 30:double) -> 31:double, CastLongToDouble(col 24:bigint)(children: LongColUnaryMinus(col 4:bigint) -> 24:bigint) -> 29:double) -> 30:double) -> 29:double, DoubleColUnaryMinus(col 30:double)(children: DoubleColUnaryMinus(col 28:double)(children: DoubleColMultiplyDoubleScalar(col 2:double, val 10.175) -> 28:double) -> 30:
double) -> 28:double, DoubleColMultiplyDoubleScalar(col 31:double, val 10.175)(children: DoubleColSubtractDoubleColumn(col 30:double, col 32:double)(children: DoubleColAddDoubleColumn(col 6:double, col 31:double)(children: DoubleColMultiplyDoubleColumn(col 32:double, col 30:double)(children: DoubleColMultiplyDoubleColumn(col 30:double, col 31:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2:double) -> 30:double, DoubleColUnaryMinus(col 2:double) -> 31:double) -> 32:double, CastLongToDouble(col 24:bigint)(children: LongColUnaryMinus(col 4:bigint) -> 24:bigint) -> 30:double) -> 31:double) -> 30:double, DoubleColMultiplyDoubleColumn(col 33:double, col 31:double)(children: DoubleColMultiplyDoubleColumn(col 31:double, col 32:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2:double) -> 31:double, DoubleColUnaryMinus(col 2:double) -> 32:double) -> 33:double, CastLongToDouble(col 24:bigint)(children: LongColUnaryMinus(col 4:bigint) -> 24:bigint) -> 31:
double) -> 32:double) -> 31:double) -> 30:double, DoubleScalarModuloDoubleColumn(val 10.175, col 31:double)(children: DoubleScalarDivideDoubleColumn(val 10.175, col 3:double) -> 31:double) -> 32:double, LongColUnaryMinus(col 5:tinyint) -> 24:tinyint, DoubleColUnaryMinus(col 34:double)(children: DoubleColMultiplyDoubleColumn(col 31:double, col 33:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2:double) -> 31:double, DoubleColUnaryMinus(col 2:double) -> 33:double) -> 34:double) -> 31:double, DoubleColModuloDoubleColumn(col 33:double, col 10:double)(children: DoubleColUnaryMinus(col 2:double) -> 33:double) -> 34:double, DecimalScalarDivideDecimalColumn(val -26.28, col 36:decimal(3,0))(children: CastLongToDecimal(col 35:tinyint)(children: LongColUnaryMinus(col 5:tinyint) -> 35:tinyint) -> 36:decimal(3,0)) -> 37:decimal(8,6), DoubleColDivideDoubleColumn(col 33:double, col 7:double)(children: DoubleColAddDoubleColumn(col 6:double, col 38:double)(children: DoubleColMult
iplyDoubleColumn(col 39:double, col 33:double)(children: DoubleColMultiplyDoubleColumn(col 33:double, col 38:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2:double) -> 33:double, DoubleColUnaryMinus(col 2:double) -> 38:double) -> 39:double, CastLongToDouble(col 35:bigint)(children: LongColUnaryMinus(col 4:bigint) -> 35:bigint) -> 33:double) -> 38:double) -> 33:double) -> 38:double, LongColUnaryMinus(col 35:bigint)(children: LongColUnaryMinus(col 4:bigint) -> 35:bigint) -> 40:bigint, DoubleColModuloDoubleScalar(col 33:double, val -26.28)(children: DoubleColAddDoubleColumn(col 6:double, col 39:double)(children: DoubleColMultiplyDoubleColumn(col 41:double, col 33:double)(children: DoubleColMultiplyDoubleColumn(col 33:double, col 39:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2:double) -> 33:double, DoubleColUnaryMinus(col 2:double) -> 39:double) -> 41:double, CastLongToDouble(col 35:bigint)(children: LongColUnaryMinus(col 4:bigint) -> 35:bigi
nt) -> 33:double) -> 39:double) -> 33:double) -> 39:double
Statistics: Num rows: 3072 Data size: 1542740 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: timestamp), _col1 (type: string), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: double), _col11 (type: tinyint), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double), _col22 (type: double), _col23 (type: double), _col24 (type: double), _col25 (type: double), _col26 (type: double), _col27 (type: tinyint), _col28 (type: double), _col29 (type: double), _col30 (type: double), _col31 (type: double), _col32 (type: decimal(8,6)), _col33 (type: double), _col34 (type: bigint), _col35 (type: double), _col36 (type: bigint), _col37 (type: bigint), _col38 (type: double)
sort order: +++++++++++++++++++++++++++++++++++++++
- Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 3072 Data size: 1542740 Basic stats: COMPLETE Column stats: COMPLETE
TopN Hash Memory Usage: 0.1
Reducer 3
@@ -3222,24 +3104,15 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
Reducer 2
- Execution mode: vectorized, llap
+ Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "var_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceSampleEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: max(VALUE._col0), sum(VALUE._col1), var_samp(VALUE._col2), avg(VALUE._col3), min(VALUE._col4), var_pop(VALUE._col5), sum(VALUE._col6), stddev_samp(VALUE._col7), stddev_pop(VALUE._col8), avg(VALUE._col9)
- Group By Vectorization:
- aggregators: VectorUDAFMaxDouble(col 1:float) -> float, VectorUDAFSumLong(col 2:bigint) -> bigint, VectorUDAFVarFinal(col 3:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_samp, VectorUDAFAvgFinal(col 4:struct<count:bigint,sum:double,input:double>) -> double, VectorUDAFMinLong(col 5:bigint) -> bigint, VectorUDAFVarFinal(col 6:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_pop, VectorUDAFSumLong(col 7:bigint) -> bigint, VectorUDAFVarFinal(col 8:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFVarFinal(col 9:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFAvgFinal(col 10:struct<count:bigint,sum:double,input:int>) -> double
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:boolean
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
keys: KEY._col0 (type: boolean)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
@@ -3247,19 +3120,10 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: boolean), _col1 (type: float), (- _col1) (type: float), (-26.28 / UDFToDouble(_col1)) (type: double), _col2 (type: bigint), (CAST( _col2 AS decimal(19,0)) - 10.175) (type: decimal(23,3)), _col3 (type: double), (_col3 % UDFToDouble(_col1)) (type: double), (10.175 + (- _col1)) (type: float), _col4 (type: double), (UDFToDouble((CAST( _col2 AS decimal(19,0)) - 10.175)) + _col3) (type: double), _col5 (type: bigint), _col6 (type: double), (- (10.175 + (- _col1))) (type: float), (79.553 / _col6) (type: double), (_col3 % (79.553 / _col6)) (type: double), _col7 (type: bigint), _col8 (type: double), (-1.389 * CAST( _col5 AS decimal(19,0))) (type: decimal(24,3)), (CAST( _col7 AS decimal(19,0)) - (-1.389 * CAST( _col5 AS decimal(19,0)))) (type: decimal(25,3)), _col9 (type: double), (- (CAST( _col7 AS decimal(19,0)) - (-1.389 * CAST( _col5 AS decimal(19,0))))) (type: decimal(25,3)), _col10 (type: double), (- _col10) (type: double), (_col10 * UDFToDoubl
e(_col7)) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1, 11, 12, 2, 14, 3, 15, 17, 4, 19, 5, 6, 16, 20, 22, 7, 8, 23, 26, 9, 28, 10, 21, 30]
- selectExpressions: DoubleColUnaryMinus(col 1:float) -> 11:float, DoubleScalarDivideDoubleColumn(val -26.28, col 1:double)(children: col 1:float) -> 12:double, DecimalColSubtractDecimalScalar(col 13:decimal(19,0), val 10.175)(children: CastLongToDecimal(col 2:bigint) -> 13:decimal(19,0)) -> 14:decimal(23,3), DoubleColModuloDoubleColumn(col 3:double, col 1:double)(children: col 1:float) -> 15:double, DoubleScalarAddDoubleColumn(val 10.175000190734863, col 16:float)(children: DoubleColUnaryMinus(col 1:float) -> 16:float) -> 17:float, DoubleColAddDoubleColumn(col 16:double, col 3:double)(children: CastDecimalToDouble(col 18:decimal(23,3))(children: DecimalColSubtractDecimalScalar(col 13:decimal(19,0), val 10.175)(children: CastLongToDecimal(col 2:bigint) -> 13:decimal(19,0)) -> 18:decimal(23,3)) -> 16:double) -> 19:double, DoubleColUnaryMinus(col 20:float)(children: DoubleScalarAddDoubleColumn(val 10.175000190734863, col 16:float)(children: DoubleColUnaryMinus(col
1:float) -> 16:float) -> 20:float) -> 16:float, DoubleScalarDivideDoubleColumn(val 79.553, col 6:double) -> 20:double, DoubleColModuloDoubleColumn(col 3:double, col 21:double)(children: DoubleScalarDivideDoubleColumn(val 79.553, col 6:double) -> 21:double) -> 22:double, DecimalScalarMultiplyDecimalColumn(val -1.389, col 13:decimal(19,0))(children: CastLongToDecimal(col 5:bigint) -> 13:decimal(19,0)) -> 23:decimal(24,3), DecimalColSubtractDecimalColumn(col 13:decimal(19,0), col 25:decimal(24,3))(children: CastLongToDecimal(col 7:bigint) -> 13:decimal(19,0), DecimalScalarMultiplyDecimalColumn(val -1.389, col 24:decimal(19,0))(children: CastLongToDecimal(col 5:bigint) -> 24:decimal(19,0)) -> 25:decimal(24,3)) -> 26:decimal(25,3), FuncNegateDecimalToDecimal(col 27:decimal(25,3))(children: DecimalColSubtractDecimalColumn(col 13:decimal(19,0), col 25:decimal(24,3))(children: CastLongToDecimal(col 7:bigint) -> 13:decimal(19,0), DecimalScalarMultiplyDecimalColumn(val -1.389, col 24:decimal(
19,0))(children: CastLongToDecimal(col 5:bigint) -> 24:decimal(19,0)) -> 25:decimal(24,3)) -> 27:decimal(25,3)) -> 28:decimal(25,3), DoubleColUnaryMinus(col 10:double) -> 21:double, DoubleColMultiplyDoubleColumn(col 10:double, col 29:double)(children: CastLongToDouble(col 7:bigint) -> 29:double) -> 30:double
Statistics: Num rows: 3 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: boolean)
sort order: +
- Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 3 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: float), _col2 (type: float), _col3 (type: double), _col4 (type: bigint), _col5 (type: decimal(23,3)), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: double), _col13 (type: float), _col14 (type: double), _col15 (type: double), _col17 (type: bigint), _col18 (type: double), _col19 (type: decimal(24,3)), _col20 (type: decimal(25,3)), _col21 (type: double), _col22 (type: decimal(25,3)), _col23 (type: double), _col24 (type: double), _col25 (type: double)
Reducer 3
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out b/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out
index 00286ce..4fb3466 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out
@@ -43,6 +43,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@table_6
POSTHOOK: Lineage: table_6.int_col_0 SCRIPT []
+Warning: Map Join MAPJOIN[16][bigTable=?] in task 'Map 1' is a cross product
PREHOOK: query: SELECT t1.decimal0801_col
FROM table_19 t1
WHERE (SELECT max(tt1.int_col_0) AS int_col FROM table_6 tt1) IN (t1.int_col_1) AND decimal0801_col is not null
@@ -88,6 +89,7 @@ POSTHOOK: Input: default@table_19
POSTHOOK: Input: default@table_6
#### A masked pattern was here ####
418.9
+Warning: Map Join MAPJOIN[16][bigTable=?] in task 'Map 1' is a cross product
PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
SELECT t1.decimal0801_col
FROM table_19 t1
@@ -120,69 +122,45 @@ STAGE PLANS:
TableScan
alias: t1
Statistics: Num rows: 5 Data size: 580 Basic stats: COMPLETE Column stats: COMPLETE
- TableScan Vectorization:
- native: true
- vectorizationSchemaColumns: [0:decimal0801_col:decimal(8,1), 1:int_col_1:int, 2:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:decimal(8,1)), SelectColumnIsNotNull(col 1:int))
- predicate: (decimal0801_col is not null and int_col_1 is not null) (type: boolean)
- Statistics: Num rows: 4 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE
+ predicate: decimal0801_col is not null (type: boolean)
+ Statistics: Num rows: 5 Data size: 580 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: decimal0801_col (type: decimal(8,1)), int_col_1 (type: int)
outputColumnNames: _col0, _col1
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1]
- Statistics: Num rows: 4 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 5 Data size: 580 Basic stats: COMPLETE Column stats: COMPLETE
Map Join Operator
condition map:
- Inner Join 0 to 1
+ Left Outer Join 0 to 1
keys:
- 0 _col1 (type: int)
- 1 _col0 (type: int)
- Map Join Vectorization:
- bigTableKeyColumnNums: [1]
- bigTableRetainedColumnNums: [0]
- bigTableValueColumnNums: [0]
- className: VectorMapJoinInnerBigOnlyLongOperator
- native: true
- nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true
- projectedOutputColumnNums: [0]
- outputColumnNames: _col0
+ 0
+ 1
+ outputColumnNames: _col0, _col1, _col2
input vertices:
1 Reducer 3
- Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
- Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Execution mode: vectorized, llap
+ Statistics: Num rows: 5 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (_col2) IN (_col1) (type: boolean)
+ Statistics: Num rows: 2 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: decimal(8,1))
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: []
- featureSupportInUse: []
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 2
- includeColumns: [0, 1]
- dataColumns: decimal0801_col:decimal(8,1), int_col_1:int
- partitionColumnCount: 0
- scratchColumnTypeNames: []
+ notVectorizedReason: FILTER operator: Vectorizing IN expression only supported for constant values
+ vectorized: false
Map 2
Map Operator Tree:
TableScan
@@ -266,24 +244,16 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
+ Reduce Output Operator
+ sort order:
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkEmptyKeyOperator
+ keyColumnNums: []
native: true
- predicateExpression: SelectColumnIsNotNull(col 0:int)
- predicate: _col0 is not null (type: boolean)
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: [0]
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- keyColumnNums: [0]
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: []
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int)
Stage: Stage-0
Fetch Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out b/ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out
index 1724751..e26f92b 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out
@@ -170,13 +170,12 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
Reducer 2
- Execution mode: vectorized, llap
+ Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4)
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out b/ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out
index 05e34d6..92c6a73 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out
@@ -325,24 +325,15 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
Reducer 2
- Execution mode: vectorized, llap
+ Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4), max(VALUE._col5)
- Group By Vectorization:
- aggregators: VectorUDAFMaxLong(col 1:int) -> int, VectorUDAFMinLong(col 2:smallint) -> smallint, VectorUDAFCountMerge(col 3:bigint) -> bigint, VectorUDAFAvgFinal(col 4:struct<count:bigint,sum:double,input:float>) -> double, VectorUDAFVarFinal(col 5:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFMaxDecimal(col 6:decimal(4,2)) -> decimal(4,2)
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:tinyint
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
keys: KEY._col0 (type: tinyint)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
@@ -350,10 +341,6 @@ STAGE PLANS:
Reduce Output Operator
key expressions: _col0 (type: tinyint)
sort order: +
- Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 3 Data size: 444 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: decimal(4,2))
Reducer 3
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out b/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out
index 0e38b0b..9652d36 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out
@@ -544,38 +544,20 @@ STAGE PLANS:
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
- Execution mode: vectorized, llap
+ Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- reduceColumnNullOrder:
- reduceColumnSortOrder:
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 7
- dataColumns: VALUE._col0:struct<count:bigint,sum:double,variance:double>, VALUE._col1:struct<count:bigint,sum:double,variance:double>, VALUE._col2:struct<count:bigint,sum:double,variance:double>, VALUE._col3:struct<count:bigint,sum:double,variance:double>, VALUE._col4:struct<count:bigint,sum:double,variance:double>, VALUE._col5:struct<count:bigint,sum:double,variance:double>, VALUE._col6:struct<count:bigint,sum:double,variance:double>
- partitionColumnCount: 0
- scratchColumnTypeNames: []
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "variance" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: variance(VALUE._col0), var_pop(VALUE._col1), var_samp(VALUE._col2), std(VALUE._col3), stddev(VALUE._col4), stddev_pop(VALUE._col5), stddev_samp(VALUE._col6)
- Group By Vectorization:
- aggregators: VectorUDAFVarFinal(col 0:struct<count:bigint,sum:double,variance:double>) -> double aggregation: variance, VectorUDAFVarFinal(col 1:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_pop, VectorUDAFVarFinal(col 2:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_samp, VectorUDAFVarFinal(col 3:struct<count:bigint,sum:double,variance:double>) -> double aggregation: std, VectorUDAFVarFinal(col 4:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev, VectorUDAFVarFinal(col 5:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFVarFinal(col 6:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- native: false
- vectorProcessingMode: GLOBAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6]
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out b/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out
index 7986494..cefa349 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out
@@ -1102,40 +1102,24 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
Reducer 2
- Execution mode: vectorized, llap
+ Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: true
- vectorized: true
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "variance" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7)
- Group By Vectorization:
- aggregators: VectorUDAFAvgFinal(col 0:struct<count:bigint,sum:double,input:timestamp>) -> double, VectorUDAFVarFinal(col 1:struct<count:bigint,sum:double,variance:double>) -> double aggregation: variance, VectorUDAFVarFinal(col 2:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_pop, VectorUDAFVarFinal(col 3:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_samp, VectorUDAFVarFinal(col 4:struct<count:bigint,sum:double,variance:double>) -> double aggregation: std, VectorUDAFVarFinal(col 5:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev, VectorUDAFVarFinal(col 6:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFVarFinal(col 7:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- native: false
- vectorProcessingMode: GLOBAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7]
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: round(_col0, 0) (type: double), _col1 BETWEEN 8.97077295279421E19 AND 8.97077295279422E19 (type: boolean), _col2 BETWEEN 8.97077295279421E19 AND 8.97077295279422E19 (type: boolean), _col3 BETWEEN 9.20684592523616E19 AND 9.20684592523617E19 (type: boolean), round(_col4, 3) (type: double), round(_col5, 3) (type: double), round(_col6, 3) (type: double), round(_col7, 3) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [8, 9, 10, 11, 12, 13, 14, 15]
- selectExpressions: RoundWithNumDigitsDoubleToDouble(col 0, decimalPlaces 0) -> 8:double, VectorUDFAdaptor(_col1 BETWEEN 8.97077295279421E19 AND 8.97077295279422E19) -> 9:boolean, VectorUDFAdaptor(_col2 BETWEEN 8.97077295279421E19 AND 8.97077295279422E19) -> 10:boolean, VectorUDFAdaptor(_col3 BETWEEN 9.20684592523616E19 AND 9.20684592523617E19) -> 11:boolean, RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 3) -> 12:double, RoundWithNumDigitsDoubleToDouble(col 5, decimalPlaces 3) -> 13:double, RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 3) -> 14:double, RoundWithNumDigitsDoubleToDouble(col 7, decimalPlaces 3) -> 15:double
Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out
index b3e2f17..1bbc2a0 100644
--- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out
+++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out
@@ -426,13 +426,11 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
Reducer 2
- Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "variance" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7)
@@ -928,13 +926,11 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
Reducer 2
- Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "variance" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7)
@@ -1430,13 +1426,11 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
Reducer 2
- Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "variance" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7)
@@ -1636,47 +1630,23 @@ STAGE PLANS:
partitionColumnCount: 0
scratchColumnTypeNames: [decimal(13,3), double]
Reducer 2
- Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- reduceColumnNullOrder:
- reduceColumnSortOrder:
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 6
- dataColumns: VALUE._col0:struct<count:bigint,sum:double,input:bigint>, VALUE._col1:struct<count:bigint,sum:double,variance:double>, VALUE._col2:struct<count:bigint,sum:double,variance:double>, VALUE._col3:bigint, VALUE._col4:double, VALUE._col5:tinyint
- partitionColumnCount: 0
- scratchColumnTypeNames: []
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: avg(VALUE._col0), stddev_pop(VALUE._col1), var_samp(VALUE._col2), count(VALUE._col3), sum(VALUE._col4), min(VALUE._col5)
- Group By Vectorization:
- aggregators: VectorUDAFAvgFinal(col 0:struct<count:bigint,sum:double,input:bigint>) -> double, VectorUDAFVarFinal(col 1:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFVarFinal(col 2:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_samp, VectorUDAFCountMerge(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFMinLong(col 5:tinyint) -> tinyint
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- native: false
- vectorProcessingMode: GLOBAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: double), (- _col0) (type: double), (-6432.0 + _col0) (type: double), _col1 (type: double), (- (-6432.0 + _col0)) (type: double), ((- (-6432.0 + _col0)) + (-6432.0 + _col0)) (type: double), _col2 (type: double), (- (-6432.0 + _col0)) (type: double), (-6432.0 + (- (-6432.0 + _col0))) (type: double), (- (-6432.0 + _col0)) (type: double), ((- (-6432.0 + _col0)) / (- (-6432.0 + _col0))) (type: double), _col3 (type: bigint), _col4 (type: double), (_col2 % _col1) (type: double), (- _col2) (type: double), ((- (-6432.0 + _col0)) * (- _col0)) (type: double), _col5 (type: tinyint), (- _col5) (type: tinyint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 6, 7, 1, 9, 11, 2, 10, 8, 13, 12, 3, 4, 14, 15, 18, 5, 19]
- selectExpressions: DoubleColUnaryMinus(col 0:double) -> 6:double, DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 7:double, DoubleColUnaryMinus(col 8:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 8:double) -> 9:double, DoubleColAddDoubleColumn(col 10:double, col 8:double)(children: DoubleColUnaryMinus(col 8:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 8:double) -> 10:double, DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 8:double) -> 11:double, DoubleColUnaryMinus(col 8:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 8:double) -> 10:double, DoubleScalarAddDoubleColumn(val -6432.0, col 12:double)(children: DoubleColUnaryMinus(col 8:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 8:double) -> 12:double) -> 8:double, DoubleColUnaryMinus(col 12:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 12:double)
-> 13:double, DoubleColDivideDoubleColumn(col 14:double, col 15:double)(children: DoubleColUnaryMinus(col 12:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 12:double) -> 14:double, DoubleColUnaryMinus(col 12:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 12:double) -> 15:double) -> 12:double, DoubleColModuloDoubleColumn(col 2:double, col 1:double) -> 14:double, DoubleColUnaryMinus(col 2:double) -> 15:double, DoubleColMultiplyDoubleColumn(col 17:double, col 16:double)(children: DoubleColUnaryMinus(col 16:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0:double) -> 16:double) -> 17:double, DoubleColUnaryMinus(col 0:double) -> 16:double) -> 18:double, LongColUnaryMinus(col 5:tinyint) -> 19:tinyint
Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
[5/6] hive git commit: HIVE-18756: Vectorization: VectorUDAFVarFinal
produces Wrong Results (Matt McCline, reviewed by Sergey Shelukhin)
Posted by mm...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/llap/vectorization_13.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorization_13.q.out b/ql/src/test/results/clientpositive/llap/vectorization_13.q.out
index e50f3e2..23914f8 100644
--- a/ql/src/test/results/clientpositive/llap/vectorization_13.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorization_13.q.out
@@ -148,31 +148,15 @@ STAGE PLANS:
partitionColumnCount: 0
scratchColumnTypeNames: [double, decimal(11,4)]
Reducer 2
- Execution mode: vectorized, llap
+ Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- reduceColumnNullOrder: aaaaa
- reduceColumnSortOrder: +++++
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 11
- dataColumns: KEY._col0:boolean, KEY._col1:tinyint, KEY._col2:timestamp, KEY._col3:float, KEY._col4:string, VALUE._col0:tinyint, VALUE._col1:double, VALUE._col2:struct<count:bigint,sum:double,variance:double>, VALUE._col3:struct<count:bigint,sum:double,variance:double>, VALUE._col4:float, VALUE._col5:tinyint
- partitionColumnCount: 0
- scratchColumnTypeNames: []
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: max(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), min(VALUE._col5)
- Group By Vectorization:
- aggregators: VectorUDAFMaxLong(col 5:tinyint) -> tinyint, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFVarFinal(col 7:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFVarFinal(col 8:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFMaxDouble(col 9:float) -> float, VectorUDAFMinLong(col 10:tinyint) -> tinyint
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:boolean, col 1:tinyint, col 2:timestamp, col 3:float, col 4:string
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
@@ -180,21 +164,10 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), _col7 (type: double), (- _col6) (type: double), _col8 (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28 / (- (- _col6))) (type: double), _col9 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col10 (type: tinyint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 11, 5, 13, 6, 16, 15, 17, 7, 18, 8, 20, 22, 21, 9, 25, 10]
- selectExpressions: LongColUnaryMinus(col 1:tinyint) -> 11:tinyint, LongColAddLongColumn(col 12:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 12:tinyint) -> 13:tinyint, DoubleColMultiplyDoubleColumn(col 6:double, col 15:double)(children: CastLongToDouble(col 14:tinyint)(children: LongColAddLongColumn(col 12:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 12:tinyint) -> 14:tinyint) -> 15:double) -> 16:double, DoubleColUnaryMinus(col 6:double) -> 15:double, DoubleScalarMultiplyDoubleColumn(val 79.5530014038086, col 3:float) -> 17:float, DoubleColUnaryMinus(col 6:double) -> 18:double, DecimalColSubtractDecimalScalar(col 19:decimal(3,0), val 10.175)(children: CastLongToDecimal(col 14:tinyint)(children: LongColAddLongColumn(col 12:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 12:tinyint) -> 14:tinyint) -> 19:decimal(3,0)) -> 20:decimal(7,3), DoubleColUnaryMinus(col 21:double)(children: DoubleColUna
ryMinus(col 6:double) -> 21:double) -> 22:double, DoubleScalarDivideDoubleColumn(val -26.28, col 23:double)(children: DoubleColUnaryMinus(col 21:double)(children: DoubleColUnaryMinus(col 6:double) -> 21:double) -> 23:double) -> 21:double, DoubleColDivideDoubleColumn(col 24:double, col 23:double)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 23:double)(children: CastLongToDouble(col 14:tinyint)(children: LongColAddLongColumn(col 12:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 12:tinyint) -> 14:tinyint) -> 23:double) -> 24:double, CastLongToDouble(col 1:tinyint) -> 23:double) -> 25:double
Statistics: Num rows: 1365 Data size: 446640 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), _col5 (type: tinyint), _col6 (type: tinyint), _col7 (type: tinyint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: decimal(7,3)), _col16 (type: double), _col17 (type: double), _col18 (type: float), _col19 (type: double), _col20 (type: tinyint)
sort order: +++++++++++++++++++++
- Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
- keyColumnNums: [0, 1, 2, 3, 4, 11, 5, 13, 6, 16, 15, 17, 7, 18, 8, 20, 22, 21, 9, 25, 10]
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: []
Statistics: Num rows: 1365 Data size: 446640 Basic stats: COMPLETE Column stats: COMPLETE
TopN Hash Memory Usage: 0.1
Reducer 3
@@ -493,24 +466,15 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
Reducer 2
- Execution mode: vectorized, llap
+ Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: max(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), min(VALUE._col5)
- Group By Vectorization:
- aggregators: VectorUDAFMaxLong(col 5:tinyint) -> tinyint, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFVarFinal(col 7:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFVarFinal(col 8:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFMaxDouble(col 9:float) -> float, VectorUDAFMinLong(col 10:tinyint) -> tinyint
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:boolean, col 1:tinyint, col 2:timestamp, col 3:float, col 4:string
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
@@ -518,19 +482,10 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), _col7 (type: double), (- _col6) (type: double), _col8 (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28 / (- (- _col6))) (type: double), _col9 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col10 (type: tinyint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 11, 5, 13, 6, 16, 15, 17, 7, 18, 8, 20, 22, 21, 9, 25, 10]
- selectExpressions: LongColUnaryMinus(col 1:tinyint) -> 11:tinyint, LongColAddLongColumn(col 12:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 12:tinyint) -> 13:tinyint, DoubleColMultiplyDoubleColumn(col 6:double, col 15:double)(children: CastLongToDouble(col 14:tinyint)(children: LongColAddLongColumn(col 12:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 12:tinyint) -> 14:tinyint) -> 15:double) -> 16:double, DoubleColUnaryMinus(col 6:double) -> 15:double, DoubleScalarMultiplyDoubleColumn(val 79.5530014038086, col 3:float) -> 17:float, DoubleColUnaryMinus(col 6:double) -> 18:double, DecimalColSubtractDecimalScalar(col 19:decimal(3,0), val 10.175)(children: CastLongToDecimal(col 14:tinyint)(children: LongColAddLongColumn(col 12:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 12:tinyint) -> 14:tinyint) -> 19:decimal(3,0)) -> 20:decimal(7,3), DoubleColUnaryMinus(col 21:double)(children: DoubleColUna
ryMinus(col 6:double) -> 21:double) -> 22:double, DoubleScalarDivideDoubleColumn(val -26.28, col 23:double)(children: DoubleColUnaryMinus(col 21:double)(children: DoubleColUnaryMinus(col 6:double) -> 21:double) -> 23:double) -> 21:double, DoubleColDivideDoubleColumn(col 24:double, col 23:double)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 23:double)(children: CastLongToDouble(col 14:tinyint)(children: LongColAddLongColumn(col 12:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 12:tinyint) -> 14:tinyint) -> 23:double) -> 24:double, CastLongToDouble(col 1:tinyint) -> 23:double) -> 25:double
Statistics: Num rows: 1365 Data size: 446640 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), _col5 (type: tinyint), _col6 (type: tinyint), _col7 (type: tinyint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: decimal(7,3)), _col16 (type: double), _col17 (type: double), _col18 (type: float), _col19 (type: double), _col20 (type: tinyint)
sort order: +++++++++++++++++++++
- Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 1365 Data size: 446640 Basic stats: COMPLETE Column stats: COMPLETE
TopN Hash Memory Usage: 0.1
Reducer 3
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/llap/vectorization_14.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorization_14.q.out b/ql/src/test/results/clientpositive/llap/vectorization_14.q.out
index 0f77070..c7a32e3 100644
--- a/ql/src/test/results/clientpositive/llap/vectorization_14.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorization_14.q.out
@@ -149,31 +149,15 @@ STAGE PLANS:
partitionColumnCount: 0
scratchColumnTypeNames: [double, double]
Reducer 2
- Execution mode: vectorized, llap
+ Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- reduceColumnNullOrder: aaaaa
- reduceColumnSortOrder: +++++
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 11
- dataColumns: KEY._col0:string, KEY._col1:float, KEY._col2:double, KEY._col3:timestamp, KEY._col4:boolean, VALUE._col0:struct<count:bigint,sum:double,variance:double>, VALUE._col1:float, VALUE._col2:struct<count:bigint,sum:double,variance:double>, VALUE._col3:bigint, VALUE._col4:struct<count:bigint,sum:double,variance:double>, VALUE._col5:struct<count:bigint,sum:double,variance:double>
- partitionColumnCount: 0
- scratchColumnTypeNames: []
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: stddev_samp(VALUE._col0), max(VALUE._col1), stddev_pop(VALUE._col2), count(VALUE._col3), var_pop(VALUE._col4), var_samp(VALUE._col5)
- Group By Vectorization:
- aggregators: VectorUDAFVarFinal(col 5:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFMaxDouble(col 6:float) -> float, VectorUDAFVarFinal(col 7:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFCountMerge(col 8:bigint) -> bigint, VectorUDAFVarFinal(col 9:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_pop, VectorUDAFVarFinal(col 10:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_samp
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:string, col 1:float, col 2:double, col 3:timestamp, col 4:boolean
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
keys: KEY._col0 (type: string), KEY._col1 (type: float), KEY._col2 (type: double), KEY._col3 (type: timestamp), KEY._col4 (type: boolean)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
@@ -181,21 +165,10 @@ STAGE PLANS:
Select Operator
expressions: _col3 (type: timestamp), _col1 (type: float), _col0 (type: string), _col4 (type: boolean), _col2 (type: double), (-26.28 + _col2) (type: double), (- (-26.28 + _col2)) (type: double), _col5 (type: double), (_col1 * -26.28) (type: float), _col6 (type: float), (- _col1) (type: float), (- _col6) (type: float), ((- (-26.28 + _col2)) / 10.175) (type: double), _col7 (type: double), _col8 (type: bigint), (- ((- (-26.28 + _col2)) / 10.175)) (type: double), (-1.389 % _col5) (type: double), (UDFToDouble(_col1) - _col2) (type: double), _col9 (type: double), (_col9 % 10.175) (type: double), _col10 (type: double), (- (UDFToDouble(_col1) - _col2)) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [3, 1, 0, 4, 2, 11, 13, 5, 12, 6, 14, 15, 16, 7, 8, 18, 17, 19, 9, 20, 10, 22]
- selectExpressions: DoubleScalarAddDoubleColumn(val -26.28, col 2:double) -> 11:double, DoubleColUnaryMinus(col 12:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 2:double) -> 12:double) -> 13:double, DoubleColMultiplyDoubleScalar(col 1:float, val -26.280000686645508) -> 12:float, DoubleColUnaryMinus(col 1:float) -> 14:float, DoubleColUnaryMinus(col 6:float) -> 15:float, DoubleColDivideDoubleScalar(col 17:double, val 10.175)(children: DoubleColUnaryMinus(col 16:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 2:double) -> 16:double) -> 17:double) -> 16:double, DoubleColUnaryMinus(col 17:double)(children: DoubleColDivideDoubleScalar(col 18:double, val 10.175)(children: DoubleColUnaryMinus(col 17:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 2:double) -> 17:double) -> 18:double) -> 17:double) -> 18:double, DoubleScalarModuloDoubleColumn(val -1.389, col 5:double) -> 17:double, DoubleColSubtractDoubleColumn(col 1:double, co
l 2:double)(children: col 1:float) -> 19:double, DoubleColModuloDoubleScalar(col 9:double, val 10.175) -> 20:double, DoubleColUnaryMinus(col 21:double)(children: DoubleColSubtractDoubleColumn(col 1:double, col 2:double)(children: col 1:float) -> 21:double) -> 22:double
Statistics: Num rows: 151 Data size: 36700 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp)
sort order: ++++
- Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
- keyColumnNums: [0, 1, 2, 3]
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: [4, 11, 13, 5, 12, 6, 14, 15, 16, 7, 8, 18, 17, 19, 9, 20, 10, 22]
Statistics: Num rows: 151 Data size: 36700 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col3 (type: boolean), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: float), _col10 (type: float), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double)
Reducer 3
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/llap/vectorization_16.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorization_16.q.out b/ql/src/test/results/clientpositive/llap/vectorization_16.q.out
index 7cf60ae..e7d873f 100644
--- a/ql/src/test/results/clientpositive/llap/vectorization_16.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorization_16.q.out
@@ -121,31 +121,15 @@ STAGE PLANS:
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
- Execution mode: vectorized, llap
+ Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- reduceColumnNullOrder: aaa
- reduceColumnSortOrder: +++
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 6
- dataColumns: KEY._col0:double, KEY._col1:string, KEY._col2:timestamp, VALUE._col0:bigint, VALUE._col1:struct<count:bigint,sum:double,variance:double>, VALUE._col2:double
- partitionColumnCount: 0
- scratchColumnTypeNames: []
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), min(VALUE._col2)
- Group By Vectorization:
- aggregators: VectorUDAFCountMerge(col 3:bigint) -> bigint, VectorUDAFVarFinal(col 4:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFMinDouble(col 5:double) -> double
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:double, col 1:string, col 2:timestamp
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0, 1, 2]
keys: KEY._col0 (type: double), KEY._col1 (type: string), KEY._col2 (type: timestamp)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
@@ -153,17 +137,9 @@ STAGE PLANS:
Select Operator
expressions: _col1 (type: string), _col0 (type: double), _col2 (type: timestamp), (_col0 - 9763215.5639) (type: double), (- (_col0 - 9763215.5639)) (type: double), _col3 (type: bigint), _col4 (type: double), (- _col4) (type: double), (_col4 * UDFToDouble(_col3)) (type: double), _col5 (type: double), (9763215.5639 / _col0) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), _col4 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [1, 0, 2, 6, 8, 3, 4, 7, 10, 5, 9, 12, 4]
- selectExpressions: DoubleColSubtractDoubleScalar(col 0:double, val 9763215.5639) -> 6:double, DoubleColUnaryMinus(col 7:double)(children: DoubleColSubtractDoubleScalar(col 0:double, val 9763215.5639) -> 7:double) -> 8:double, DoubleColUnaryMinus(col 4:double) -> 7:double, DoubleColMultiplyDoubleColumn(col 4:double, col 9:double)(children: CastLongToDouble(col 3:bigint) -> 9:double) -> 10:double, DoubleScalarDivideDoubleColumn(val 9763215.5639, col 0:double) -> 9:double, DecimalColDivideDecimalScalar(col 11:decimal(19,0), val -1.389)(children: CastLongToDecimal(col 3:bigint) -> 11:decimal(19,0)) -> 12:decimal(28,6)
Statistics: Num rows: 1024 Data size: 307406 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 1024 Data size: 307406 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/llap/vectorization_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorization_2.q.out b/ql/src/test/results/clientpositive/llap/vectorization_2.q.out
index 4dfb73d..2d4b703 100644
--- a/ql/src/test/results/clientpositive/llap/vectorization_2.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorization_2.q.out
@@ -123,47 +123,24 @@ STAGE PLANS:
partitionColumnCount: 0
scratchColumnTypeNames: [double]
Reducer 2
- Execution mode: vectorized, llap
+ Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- reduceColumnNullOrder:
- reduceColumnSortOrder:
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 6
- dataColumns: VALUE._col0:struct<count:bigint,sum:double,input:smallint>, VALUE._col1:double, VALUE._col2:struct<count:bigint,sum:double,variance:double>, VALUE._col3:bigint, VALUE._col4:tinyint, VALUE._col5:struct<count:bigint,sum:double,input:double>
- partitionColumnCount: 0
- scratchColumnTypeNames: []
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "var_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: avg(VALUE._col0), sum(VALUE._col1), var_pop(VALUE._col2), count(VALUE._col3), min(VALUE._col4), avg(VALUE._col5)
- Group By Vectorization:
- aggregators: VectorUDAFAvgFinal(col 0:struct<count:bigint,sum:double,input:smallint>) -> double, VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFVarFinal(col 2:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_pop, VectorUDAFCountMerge(col 3:bigint) -> bigint, VectorUDAFMinLong(col 4:tinyint) -> tinyint, VectorUDAFAvgFinal(col 5:struct<count:bigint,sum:double,input:double>) -> double
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- native: false
- vectorProcessingMode: GLOBAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: double), (_col0 % -563.0) (type: double), (_col0 + 762.0) (type: double), _col1 (type: double), _col2 (type: double), (- _col2) (type: double), (_col1 - _col0) (type: double), _col3 (type: bigint), (- (_col1 - _col0)) (type: double), (_col2 - 762.0) (type: double), _col4 (type: tinyint), ((- _col2) + UDFToDouble(_col4)) (type: double), _col5 (type: double), (((- _col2) + UDFToDouble(_col4)) - _col1) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 6, 7, 1, 2, 8, 9, 3, 11, 10, 4, 14, 5, 12]
- selectExpressions: DoubleColModuloDoubleScalar(col 0:double, val -563.0) -> 6:double, DoubleColAddDoubleScalar(col 0:double, val 762.0) -> 7:double, DoubleColUnaryMinus(col 2:double) -> 8:double, DoubleColSubtractDoubleColumn(col 1:double, col 0:double) -> 9:double, DoubleColUnaryMinus(col 10:double)(children: DoubleColSubtractDoubleColumn(col 1:double, col 0:double) -> 10:double) -> 11:double, DoubleColSubtractDoubleScalar(col 2:double, val 762.0) -> 10:double, DoubleColAddDoubleColumn(col 12:double, col 13:double)(children: DoubleColUnaryMinus(col 2:double) -> 12:double, CastLongToDouble(col 4:tinyint) -> 13:double) -> 14:double, DoubleColSubtractDoubleColumn(col 15:double, col 1:double)(children: DoubleColAddDoubleColumn(col 12:double, col 13:double)(children: DoubleColUnaryMinus(col 2:double) -> 12:double, CastLongToDouble(col 4:tinyint) -> 13:double) -> 15:double) -> 12:double
Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/llap/vectorization_3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorization_3.q.out b/ql/src/test/results/clientpositive/llap/vectorization_3.q.out
index 6bff739..9de4fcd 100644
--- a/ql/src/test/results/clientpositive/llap/vectorization_3.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorization_3.q.out
@@ -128,47 +128,24 @@ STAGE PLANS:
partitionColumnCount: 0
scratchColumnTypeNames: [double, decimal(22,3), decimal(8,3)]
Reducer 2
- Execution mode: vectorized, llap
+ Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- reduceColumnNullOrder:
- reduceColumnSortOrder:
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 6
- dataColumns: VALUE._col0:struct<count:bigint,sum:double,variance:double>, VALUE._col1:struct<count:bigint,sum:double,variance:double>, VALUE._col2:struct<count:bigint,sum:double,variance:double>, VALUE._col3:double, VALUE._col4:struct<count:bigint,sum:double,input:int>, VALUE._col5:struct<count:bigint,sum:double,variance:double>
- partitionColumnCount: 0
- scratchColumnTypeNames: []
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: stddev_samp(VALUE._col0), stddev_pop(VALUE._col1), stddev_samp(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), stddev_pop(VALUE._col5)
- Group By Vectorization:
- aggregators: VectorUDAFVarFinal(col 0:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFVarFinal(col 1:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFVarFinal(col 2:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFAvgFinal(col 4:struct<count:bigint,sum:double,input:int>) -> double, VectorUDAFVarFinal(col 5:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- native: false
- vectorProcessingMode: GLOBAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: double), (_col0 - 10.175) (type: double), _col1 (type: double), (_col0 * (_col0 - 10.175)) (type: double), (- _col1) (type: double), (_col0 % 79.553) (type: double), (- (_col0 * (_col0 - 10.175))) (type: double), _col2 (type: double), (- _col0) (type: double), _col3 (type: double), ((- (_col0 * (_col0 - 10.175))) / (_col0 - 10.175)) (type: double), (- (_col0 - 10.175)) (type: double), _col4 (type: double), (-3728.0 - _col0) (type: double), _col5 (type: double), (_col4 / _col2) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 6, 1, 8, 7, 9, 10, 2, 11, 3, 14, 13, 4, 12, 5, 15]
- selectExpressions: DoubleColSubtractDoubleScalar(col 0:double, val 10.175) -> 6:double, DoubleColMultiplyDoubleColumn(col 0:double, col 7:double)(children: DoubleColSubtractDoubleScalar(col 0:double, val 10.175) -> 7:double) -> 8:double, DoubleColUnaryMinus(col 1:double) -> 7:double, DoubleColModuloDoubleScalar(col 0:double, val 79.553) -> 9:double, DoubleColUnaryMinus(col 11:double)(children: DoubleColMultiplyDoubleColumn(col 0:double, col 10:double)(children: DoubleColSubtractDoubleScalar(col 0:double, val 10.175) -> 10:double) -> 11:double) -> 10:double, DoubleColUnaryMinus(col 0:double) -> 11:double, DoubleColDivideDoubleColumn(col 12:double, col 13:double)(children: DoubleColUnaryMinus(col 13:double)(children: DoubleColMultiplyDoubleColumn(col 0:double, col 12:double)(children: DoubleColSubtractDoubleScalar(col 0:double, val 10.175) -> 12:double) -> 13:double) -> 12:double, DoubleColSubtractDoubleScalar(col 0:double, val 10.175) -> 13:double) -> 14:double,
DoubleColUnaryMinus(col 12:double)(children: DoubleColSubtractDoubleScalar(col 0:double, val 10.175) -> 12:double) -> 13:double, DoubleScalarSubtractDoubleColumn(val -3728.0, col 0:double) -> 12:double, DoubleColDivideDoubleColumn(col 4:double, col 2:double) -> 15:double
Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/llap/vectorization_4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorization_4.q.out b/ql/src/test/results/clientpositive/llap/vectorization_4.q.out
index a38c77c..a2142b7 100644
--- a/ql/src/test/results/clientpositive/llap/vectorization_4.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorization_4.q.out
@@ -123,47 +123,24 @@ STAGE PLANS:
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
- Execution mode: vectorized, llap
+ Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- reduceColumnNullOrder:
- reduceColumnSortOrder:
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 5
- dataColumns: VALUE._col0:bigint, VALUE._col1:struct<count:bigint,sum:double,variance:double>, VALUE._col2:struct<count:bigint,sum:double,input:double>, VALUE._col3:struct<count:bigint,sum:double,variance:double>, VALUE._col4:tinyint
- partitionColumnCount: 0
- scratchColumnTypeNames: []
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: sum(VALUE._col0), stddev_pop(VALUE._col1), avg(VALUE._col2), var_pop(VALUE._col3), min(VALUE._col4)
- Group By Vectorization:
- aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint, VectorUDAFVarFinal(col 1:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFAvgFinal(col 2:struct<count:bigint,sum:double,input:double>) -> double, VectorUDAFVarFinal(col 3:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_pop, VectorUDAFMinLong(col 4:tinyint) -> tinyint
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- native: false
- vectorProcessingMode: GLOBAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4]
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: bigint), (_col0 * -563) (type: bigint), (-3728 + _col0) (type: bigint), _col1 (type: double), (- _col1) (type: double), _col2 (type: double), ((_col0 * -563) % _col0) (type: bigint), (UDFToDouble(((_col0 * -563) % _col0)) / _col2) (type: double), _col3 (type: double), (- (UDFToDouble(((_col0 * -563) % _col0)) / _col2)) (type: double), ((-3728 + _col0) - (_col0 * -563)) (type: bigint), _col4 (type: tinyint), _col4 (type: tinyint), (UDFToDouble(_col4) * (- (UDFToDouble(((_col0 * -563) % _col0)) / _col2))) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 5, 6, 1, 7, 2, 9, 12, 3, 11, 14, 4, 4, 16]
- selectExpressions: LongColMultiplyLongScalar(col 0:bigint, val -563) -> 5:bigint, LongScalarAddLongColumn(val -3728, col 0:bigint) -> 6:bigint, DoubleColUnaryMinus(col 1:double) -> 7:double, LongColModuloLongColumn(col 8:bigint, col 0:bigint)(children: LongColMultiplyLongScalar(col 0:bigint, val -563) -> 8:bigint) -> 9:bigint, DoubleColDivideDoubleColumn(col 11:double, col 2:double)(children: CastLongToDouble(col 10:bigint)(children: LongColModuloLongColumn(col 8:bigint, col 0:bigint)(children: LongColMultiplyLongScalar(col 0:bigint, val -563) -> 8:bigint) -> 10:bigint) -> 11:double) -> 12:double, DoubleColUnaryMinus(col 13:double)(children: DoubleColDivideDoubleColumn(col 11:double, col 2:double)(children: CastLongToDouble(col 10:bigint)(children: LongColModuloLongColumn(col 8:bigint, col 0:bigint)(children: LongColMultiplyLongScalar(col 0:bigint, val -563) -> 8:bigint) -> 10:bigint) -> 11:double) -> 13:double) -> 11:double, LongColSubtractLongColumn(col 8:big
int, col 10:bigint)(children: LongScalarAddLongColumn(val -3728, col 0:bigint) -> 8:bigint, LongColMultiplyLongScalar(col 0:bigint, val -563) -> 10:bigint) -> 14:bigint, DoubleColMultiplyDoubleColumn(col 13:double, col 15:double)(children: CastLongToDouble(col 4:tinyint) -> 13:double, DoubleColUnaryMinus(col 16:double)(children: DoubleColDivideDoubleColumn(col 15:double, col 2:double)(children: CastLongToDouble(col 10:bigint)(children: LongColModuloLongColumn(col 8:bigint, col 0:bigint)(children: LongColMultiplyLongScalar(col 0:bigint, val -563) -> 8:bigint) -> 10:bigint) -> 15:double) -> 16:double) -> 15:double) -> 16:double
Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/llap/vectorization_9.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorization_9.q.out b/ql/src/test/results/clientpositive/llap/vectorization_9.q.out
index 7cf60ae..e7d873f 100644
--- a/ql/src/test/results/clientpositive/llap/vectorization_9.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorization_9.q.out
@@ -121,31 +121,15 @@ STAGE PLANS:
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
- Execution mode: vectorized, llap
+ Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- reduceColumnNullOrder: aaa
- reduceColumnSortOrder: +++
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 6
- dataColumns: KEY._col0:double, KEY._col1:string, KEY._col2:timestamp, VALUE._col0:bigint, VALUE._col1:struct<count:bigint,sum:double,variance:double>, VALUE._col2:double
- partitionColumnCount: 0
- scratchColumnTypeNames: []
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), min(VALUE._col2)
- Group By Vectorization:
- aggregators: VectorUDAFCountMerge(col 3:bigint) -> bigint, VectorUDAFVarFinal(col 4:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFMinDouble(col 5:double) -> double
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:double, col 1:string, col 2:timestamp
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0, 1, 2]
keys: KEY._col0 (type: double), KEY._col1 (type: string), KEY._col2 (type: timestamp)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
@@ -153,17 +137,9 @@ STAGE PLANS:
Select Operator
expressions: _col1 (type: string), _col0 (type: double), _col2 (type: timestamp), (_col0 - 9763215.5639) (type: double), (- (_col0 - 9763215.5639)) (type: double), _col3 (type: bigint), _col4 (type: double), (- _col4) (type: double), (_col4 * UDFToDouble(_col3)) (type: double), _col5 (type: double), (9763215.5639 / _col0) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), _col4 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [1, 0, 2, 6, 8, 3, 4, 7, 10, 5, 9, 12, 4]
- selectExpressions: DoubleColSubtractDoubleScalar(col 0:double, val 9763215.5639) -> 6:double, DoubleColUnaryMinus(col 7:double)(children: DoubleColSubtractDoubleScalar(col 0:double, val 9763215.5639) -> 7:double) -> 8:double, DoubleColUnaryMinus(col 4:double) -> 7:double, DoubleColMultiplyDoubleColumn(col 4:double, col 9:double)(children: CastLongToDouble(col 3:bigint) -> 9:double) -> 10:double, DoubleScalarDivideDoubleColumn(val 9763215.5639, col 0:double) -> 9:double, DecimalColDivideDecimalScalar(col 11:decimal(19,0), val -1.389)(children: CastLongToDecimal(col 3:bigint) -> 11:decimal(19,0)) -> 12:decimal(28,6)
Statistics: Num rows: 1024 Data size: 307406 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 1024 Data size: 307406 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/llap/vectorization_input_format_excludes.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorization_input_format_excludes.q.out b/ql/src/test/results/clientpositive/llap/vectorization_input_format_excludes.q.out
index bd0a731..ad6bae7 100644
--- a/ql/src/test/results/clientpositive/llap/vectorization_input_format_excludes.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorization_input_format_excludes.q.out
@@ -200,13 +200,12 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
Reducer 2
- Execution mode: vectorized, llap
+ Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4)
@@ -522,13 +521,12 @@ STAGE PLANS:
enabledConditionsNotMet: Row deserialization of vectorized input format not supported IS false, hive.vectorized.use.vectorized.input.format IS true AND hive.vectorized.input.format.excludes NOT CONTAINS org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat IS false
inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
Reducer 2
- Execution mode: vectorized, llap
+ Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4)
@@ -854,13 +852,12 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
Reducer 2
- Execution mode: vectorized, llap
+ Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4)
@@ -1224,13 +1221,12 @@ STAGE PLANS:
enabledConditionsNotMet: hive.vectorized.use.vectorized.input.format IS true AND hive.vectorized.input.format.excludes NOT CONTAINS org.apache.hadoop.hive.ql.io.orc.OrcInputFormat IS false
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
Reducer 2
- Execution mode: vectorized, llap
+ Execution mode: llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4)
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out b/ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out
index e46c7f4..130e137 100644
--- a/ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out
@@ -70,15 +70,15 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: alltypesorc_part
- Statistics: Num rows: 200 Data size: 1592 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: (cdouble + 2.0) (type: double)
outputColumnNames: _col0
- Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: PARTIAL
Reduce Output Operator
key expressions: _col0 (type: double)
sort order: +
- Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: PARTIAL
TopN Hash Memory Usage: 0.1
Execution mode: vectorized, llap
LLAP IO: all inputs
@@ -103,13 +103,13 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: double)
outputColumnNames: _col0
- Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: PARTIAL
Limit
Number of rows: 10
- Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: PARTIAL
File Output Operator
compressed: false
- Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: PARTIAL
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
[2/6] hive git commit: HIVE-18756: Vectorization: VectorUDAFVarFinal
produces Wrong Results (Matt McCline, reviewed by Sergey Shelukhin)
Posted by mm...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/spark/vectorization_12.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_12.q.out b/ql/src/test/results/clientpositive/spark/vectorization_12.q.out
index f4a5b55..591de4b 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_12.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_12.q.out
@@ -144,31 +144,14 @@ STAGE PLANS:
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
- Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- reduceColumnNullOrder: aaaa
- reduceColumnSortOrder: ++++
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 9
- dataColumns: KEY._col0:double, KEY._col1:bigint, KEY._col2:string, KEY._col3:boolean, VALUE._col0:bigint, VALUE._col1:struct<count:bigint,sum:double,variance:double>, VALUE._col2:struct<count:bigint,sum:double,input:double>, VALUE._col3:bigint, VALUE._col4:struct<count:bigint,sum:double,variance:double>
- partitionColumnCount: 0
- scratchColumnTypeNames: []
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), avg(VALUE._col2), sum(VALUE._col3), stddev_pop(VALUE._col4)
- Group By Vectorization:
- aggregators: VectorUDAFCountMerge(col 4:bigint) -> bigint, VectorUDAFVarFinal(col 5:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFAvgFinal(col 6:struct<count:bigint,sum:double,input:double>) -> double, VectorUDAFSumLong(col 7:bigint) -> bigint, VectorUDAFVarFinal(col 8:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:double, col 1:bigint, col 2:string, col 3:boolean
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4]
keys: KEY._col0 (type: double), KEY._col1 (type: bigint), KEY._col2 (type: string), KEY._col3 (type: boolean)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
@@ -176,21 +159,10 @@ STAGE PLANS:
Select Operator
expressions: _col1 (type: bigint), _col3 (type: boolean), _col2 (type: string), _col0 (type: double), (-6432.0 * _col0) (type: double), (- _col1) (type: bigint), _col4 (type: bigint), (_col1 * _col4) (type: bigint), _col5 (type: double), ((-6432.0 * _col0) / -6432.0) (type: double), (- ((-6432.0 * _col0) / -6432.0)) (type: double), _col6 (type: double), (- (-6432.0 * _col0)) (type: double), (-5638.15 + CAST( _col1 AS decimal(19,0))) (type: decimal(22,2)), _col7 (type: bigint), (_col6 / (-6432.0 * _col0)) (type: double), (- (- ((-6432.0 * _col0) / -6432.0))) (type: double), (((-6432.0 * _col0) / -6432.0) + (- (-6432.0 * _col0))) (type: double), _col8 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col17, _col18, _col19
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [1, 3, 2, 0, 9, 10, 4, 11, 5, 13, 12, 6, 15, 17, 7, 18, 19, 14, 8]
- selectExpressions: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 9:double, LongColUnaryMinus(col 1:bigint) -> 10:bigint, LongColMultiplyLongColumn(col 1:bigint, col 4:bigint) -> 11:bigint, DoubleColDivideDoubleScalar(col 12:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 12:double) -> 13:double, DoubleColUnaryMinus(col 14:double)(children: DoubleColDivideDoubleScalar(col 12:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 12:double) -> 14:double) -> 12:double, DoubleColUnaryMinus(col 14:double)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 14:double) -> 15:double, DecimalScalarAddDecimalColumn(val -5638.15, col 16:decimal(19,0))(children: CastLongToDecimal(col 1:bigint) -> 16:decimal(19,0)) -> 17:decimal(22,2), DoubleColDivideDoubleColumn(col 6:double, col 14:double)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:
double) -> 14:double) -> 18:double, DoubleColUnaryMinus(col 14:double)(children: DoubleColUnaryMinus(col 19:double)(children: DoubleColDivideDoubleScalar(col 14:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 14:double) -> 19:double) -> 14:double) -> 19:double, DoubleColAddDoubleColumn(col 20:double, col 21:double)(children: DoubleColDivideDoubleScalar(col 14:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 14:double) -> 20:double, DoubleColUnaryMinus(col 14:double)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 14:double) -> 21:double) -> 14:double
Statistics: Num rows: 1877 Data size: 403561 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col3 (type: double), _col0 (type: bigint), _col2 (type: string)
sort order: +++
- Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
- keyColumnNums: [0, 1, 2]
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: [3, 9, 10, 4, 11, 5, 13, 12, 6, 15, 17, 7, 18, 19, 14, 8]
Statistics: Num rows: 1877 Data size: 403561 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: boolean), _col4 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: decimal(22,2)), _col14 (type: bigint), _col15 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double)
Reducer 3
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/spark/vectorization_13.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_13.q.out b/ql/src/test/results/clientpositive/spark/vectorization_13.q.out
index 6d5c27f..e6c1f12 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_13.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_13.q.out
@@ -146,31 +146,14 @@ STAGE PLANS:
partitionColumnCount: 0
scratchColumnTypeNames: [double, decimal(11,4)]
Reducer 2
- Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- reduceColumnNullOrder: aaaaa
- reduceColumnSortOrder: +++++
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 11
- dataColumns: KEY._col0:boolean, KEY._col1:tinyint, KEY._col2:timestamp, KEY._col3:float, KEY._col4:string, VALUE._col0:tinyint, VALUE._col1:double, VALUE._col2:struct<count:bigint,sum:double,variance:double>, VALUE._col3:struct<count:bigint,sum:double,variance:double>, VALUE._col4:float, VALUE._col5:tinyint
- partitionColumnCount: 0
- scratchColumnTypeNames: []
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: max(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), min(VALUE._col5)
- Group By Vectorization:
- aggregators: VectorUDAFMaxLong(col 5:tinyint) -> tinyint, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFVarFinal(col 7:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFVarFinal(col 8:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFMaxDouble(col 9:float) -> float, VectorUDAFMinLong(col 10:tinyint) -> tinyint
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:boolean, col 1:tinyint, col 2:timestamp, col 3:float, col 4:string
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
@@ -178,21 +161,10 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), _col7 (type: double), (- _col6) (type: double), _col8 (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28 / (- (- _col6))) (type: double), _col9 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col10 (type: tinyint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 11, 5, 13, 6, 16, 15, 17, 7, 18, 8, 20, 22, 21, 9, 25, 10]
- selectExpressions: LongColUnaryMinus(col 1:tinyint) -> 11:tinyint, LongColAddLongColumn(col 12:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 12:tinyint) -> 13:tinyint, DoubleColMultiplyDoubleColumn(col 6:double, col 15:double)(children: CastLongToDouble(col 14:tinyint)(children: LongColAddLongColumn(col 12:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 12:tinyint) -> 14:tinyint) -> 15:double) -> 16:double, DoubleColUnaryMinus(col 6:double) -> 15:double, DoubleScalarMultiplyDoubleColumn(val 79.5530014038086, col 3:float) -> 17:float, DoubleColUnaryMinus(col 6:double) -> 18:double, DecimalColSubtractDecimalScalar(col 19:decimal(3,0), val 10.175)(children: CastLongToDecimal(col 14:tinyint)(children: LongColAddLongColumn(col 12:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 12:tinyint) -> 14:tinyint) -> 19:decimal(3,0)) -> 20:decimal(7,3), DoubleColUnaryMinus(col 21:double)(children: DoubleColUna
ryMinus(col 6:double) -> 21:double) -> 22:double, DoubleScalarDivideDoubleColumn(val -26.28, col 23:double)(children: DoubleColUnaryMinus(col 21:double)(children: DoubleColUnaryMinus(col 6:double) -> 21:double) -> 23:double) -> 21:double, DoubleColDivideDoubleColumn(col 24:double, col 23:double)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 23:double)(children: CastLongToDouble(col 14:tinyint)(children: LongColAddLongColumn(col 12:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 12:tinyint) -> 14:tinyint) -> 23:double) -> 24:double, CastLongToDouble(col 1:tinyint) -> 23:double) -> 25:double
Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), _col5 (type: tinyint), _col6 (type: tinyint), _col7 (type: tinyint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: decimal(7,3)), _col16 (type: double), _col17 (type: double), _col18 (type: float), _col19 (type: double), _col20 (type: tinyint)
sort order: +++++++++++++++++++++
- Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
- keyColumnNums: [0, 1, 2, 3, 4, 11, 5, 13, 6, 16, 15, 17, 7, 18, 8, 20, 22, 21, 9, 25, 10]
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: []
Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.1
Reducer 3
@@ -489,24 +461,14 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
Reducer 2
- Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: max(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), min(VALUE._col5)
- Group By Vectorization:
- aggregators: VectorUDAFMaxLong(col 5:tinyint) -> tinyint, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFVarFinal(col 7:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFVarFinal(col 8:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFMaxDouble(col 9:float) -> float, VectorUDAFMinLong(col 10:tinyint) -> tinyint
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:boolean, col 1:tinyint, col 2:timestamp, col 3:float, col 4:string
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
@@ -514,19 +476,10 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), _col7 (type: double), (- _col6) (type: double), _col8 (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28 / (- (- _col6))) (type: double), _col9 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col10 (type: tinyint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 11, 5, 13, 6, 16, 15, 17, 7, 18, 8, 20, 22, 21, 9, 25, 10]
- selectExpressions: LongColUnaryMinus(col 1:tinyint) -> 11:tinyint, LongColAddLongColumn(col 12:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 12:tinyint) -> 13:tinyint, DoubleColMultiplyDoubleColumn(col 6:double, col 15:double)(children: CastLongToDouble(col 14:tinyint)(children: LongColAddLongColumn(col 12:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 12:tinyint) -> 14:tinyint) -> 15:double) -> 16:double, DoubleColUnaryMinus(col 6:double) -> 15:double, DoubleScalarMultiplyDoubleColumn(val 79.5530014038086, col 3:float) -> 17:float, DoubleColUnaryMinus(col 6:double) -> 18:double, DecimalColSubtractDecimalScalar(col 19:decimal(3,0), val 10.175)(children: CastLongToDecimal(col 14:tinyint)(children: LongColAddLongColumn(col 12:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 12:tinyint) -> 14:tinyint) -> 19:decimal(3,0)) -> 20:decimal(7,3), DoubleColUnaryMinus(col 21:double)(children: DoubleColUna
ryMinus(col 6:double) -> 21:double) -> 22:double, DoubleScalarDivideDoubleColumn(val -26.28, col 23:double)(children: DoubleColUnaryMinus(col 21:double)(children: DoubleColUnaryMinus(col 6:double) -> 21:double) -> 23:double) -> 21:double, DoubleColDivideDoubleColumn(col 24:double, col 23:double)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 23:double)(children: CastLongToDouble(col 14:tinyint)(children: LongColAddLongColumn(col 12:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 12:tinyint) -> 14:tinyint) -> 23:double) -> 24:double, CastLongToDouble(col 1:tinyint) -> 23:double) -> 25:double
Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), _col5 (type: tinyint), _col6 (type: tinyint), _col7 (type: tinyint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: decimal(7,3)), _col16 (type: double), _col17 (type: double), _col18 (type: float), _col19 (type: double), _col20 (type: tinyint)
sort order: +++++++++++++++++++++
- Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.1
Reducer 3
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/spark/vectorization_14.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_14.q.out b/ql/src/test/results/clientpositive/spark/vectorization_14.q.out
index 3016203..6fa51e5 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_14.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_14.q.out
@@ -147,31 +147,14 @@ STAGE PLANS:
partitionColumnCount: 0
scratchColumnTypeNames: [double, double]
Reducer 2
- Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- reduceColumnNullOrder: aaaaa
- reduceColumnSortOrder: +++++
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 11
- dataColumns: KEY._col0:string, KEY._col1:float, KEY._col2:double, KEY._col3:timestamp, KEY._col4:boolean, VALUE._col0:struct<count:bigint,sum:double,variance:double>, VALUE._col1:float, VALUE._col2:struct<count:bigint,sum:double,variance:double>, VALUE._col3:bigint, VALUE._col4:struct<count:bigint,sum:double,variance:double>, VALUE._col5:struct<count:bigint,sum:double,variance:double>
- partitionColumnCount: 0
- scratchColumnTypeNames: []
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: stddev_samp(VALUE._col0), max(VALUE._col1), stddev_pop(VALUE._col2), count(VALUE._col3), var_pop(VALUE._col4), var_samp(VALUE._col5)
- Group By Vectorization:
- aggregators: VectorUDAFVarFinal(col 5:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFMaxDouble(col 6:float) -> float, VectorUDAFVarFinal(col 7:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFCountMerge(col 8:bigint) -> bigint, VectorUDAFVarFinal(col 9:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_pop, VectorUDAFVarFinal(col 10:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_samp
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:string, col 1:float, col 2:double, col 3:timestamp, col 4:boolean
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
keys: KEY._col0 (type: string), KEY._col1 (type: float), KEY._col2 (type: double), KEY._col3 (type: timestamp), KEY._col4 (type: boolean)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
@@ -179,21 +162,10 @@ STAGE PLANS:
Select Operator
expressions: _col3 (type: timestamp), _col1 (type: float), _col0 (type: string), _col4 (type: boolean), _col2 (type: double), (-26.28 + _col2) (type: double), (- (-26.28 + _col2)) (type: double), _col5 (type: double), (_col1 * -26.28) (type: float), _col6 (type: float), (- _col1) (type: float), (- _col6) (type: float), ((- (-26.28 + _col2)) / 10.175) (type: double), _col7 (type: double), _col8 (type: bigint), (- ((- (-26.28 + _col2)) / 10.175)) (type: double), (-1.389 % _col5) (type: double), (UDFToDouble(_col1) - _col2) (type: double), _col9 (type: double), (_col9 % 10.175) (type: double), _col10 (type: double), (- (UDFToDouble(_col1) - _col2)) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [3, 1, 0, 4, 2, 11, 13, 5, 12, 6, 14, 15, 16, 7, 8, 18, 17, 19, 9, 20, 10, 22]
- selectExpressions: DoubleScalarAddDoubleColumn(val -26.28, col 2:double) -> 11:double, DoubleColUnaryMinus(col 12:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 2:double) -> 12:double) -> 13:double, DoubleColMultiplyDoubleScalar(col 1:float, val -26.280000686645508) -> 12:float, DoubleColUnaryMinus(col 1:float) -> 14:float, DoubleColUnaryMinus(col 6:float) -> 15:float, DoubleColDivideDoubleScalar(col 17:double, val 10.175)(children: DoubleColUnaryMinus(col 16:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 2:double) -> 16:double) -> 17:double) -> 16:double, DoubleColUnaryMinus(col 17:double)(children: DoubleColDivideDoubleScalar(col 18:double, val 10.175)(children: DoubleColUnaryMinus(col 17:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 2:double) -> 17:double) -> 18:double) -> 17:double) -> 18:double, DoubleScalarModuloDoubleColumn(val -1.389, col 5:double) -> 17:double, DoubleColSubtractDoubleColumn(col 1:double, co
l 2:double)(children: col 1:float) -> 19:double, DoubleColModuloDoubleScalar(col 9:double, val 10.175) -> 20:double, DoubleColUnaryMinus(col 21:double)(children: DoubleColSubtractDoubleColumn(col 1:double, col 2:double)(children: col 1:float) -> 21:double) -> 22:double
Statistics: Num rows: 303 Data size: 65146 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp)
sort order: ++++
- Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
- keyColumnNums: [0, 1, 2, 3]
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: [4, 11, 13, 5, 12, 6, 14, 15, 16, 7, 8, 18, 17, 19, 9, 20, 10, 22]
Statistics: Num rows: 303 Data size: 65146 Basic stats: COMPLETE Column stats: NONE
value expressions: _col3 (type: boolean), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: float), _col10 (type: float), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double)
Reducer 3
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/spark/vectorization_16.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_16.q.out b/ql/src/test/results/clientpositive/spark/vectorization_16.q.out
index b270aea..24cae48 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_16.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_16.q.out
@@ -119,31 +119,14 @@ STAGE PLANS:
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
- Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- reduceColumnNullOrder: aaa
- reduceColumnSortOrder: +++
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 6
- dataColumns: KEY._col0:double, KEY._col1:string, KEY._col2:timestamp, VALUE._col0:bigint, VALUE._col1:struct<count:bigint,sum:double,variance:double>, VALUE._col2:double
- partitionColumnCount: 0
- scratchColumnTypeNames: []
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), min(VALUE._col2)
- Group By Vectorization:
- aggregators: VectorUDAFCountMerge(col 3:bigint) -> bigint, VectorUDAFVarFinal(col 4:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFMinDouble(col 5:double) -> double
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:double, col 1:string, col 2:timestamp
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0, 1, 2]
keys: KEY._col0 (type: double), KEY._col1 (type: string), KEY._col2 (type: timestamp)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
@@ -151,17 +134,9 @@ STAGE PLANS:
Select Operator
expressions: _col1 (type: string), _col0 (type: double), _col2 (type: timestamp), (_col0 - 9763215.5639) (type: double), (- (_col0 - 9763215.5639)) (type: double), _col3 (type: bigint), _col4 (type: double), (- _col4) (type: double), (_col4 * UDFToDouble(_col3)) (type: double), _col5 (type: double), (9763215.5639 / _col0) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), _col4 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [1, 0, 2, 6, 8, 3, 4, 7, 10, 5, 9, 12, 4]
- selectExpressions: DoubleColSubtractDoubleScalar(col 0:double, val 9763215.5639) -> 6:double, DoubleColUnaryMinus(col 7:double)(children: DoubleColSubtractDoubleScalar(col 0:double, val 9763215.5639) -> 7:double) -> 8:double, DoubleColUnaryMinus(col 4:double) -> 7:double, DoubleColMultiplyDoubleColumn(col 4:double, col 9:double)(children: CastLongToDouble(col 3:bigint) -> 9:double) -> 10:double, DoubleScalarDivideDoubleColumn(val 9763215.5639, col 0:double) -> 9:double, DecimalColDivideDecimalScalar(col 11:decimal(19,0), val -1.389)(children: CastLongToDecimal(col 3:bigint) -> 11:decimal(19,0)) -> 12:decimal(28,6)
Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/spark/vectorization_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_2.q.out b/ql/src/test/results/clientpositive/spark/vectorization_2.q.out
index 09267d7..f11854c 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_2.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_2.q.out
@@ -121,47 +121,23 @@ STAGE PLANS:
partitionColumnCount: 0
scratchColumnTypeNames: [double]
Reducer 2
- Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- reduceColumnNullOrder:
- reduceColumnSortOrder:
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 6
- dataColumns: VALUE._col0:struct<count:bigint,sum:double,input:smallint>, VALUE._col1:double, VALUE._col2:struct<count:bigint,sum:double,variance:double>, VALUE._col3:bigint, VALUE._col4:tinyint, VALUE._col5:struct<count:bigint,sum:double,input:double>
- partitionColumnCount: 0
- scratchColumnTypeNames: []
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "var_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: avg(VALUE._col0), sum(VALUE._col1), var_pop(VALUE._col2), count(VALUE._col3), min(VALUE._col4), avg(VALUE._col5)
- Group By Vectorization:
- aggregators: VectorUDAFAvgFinal(col 0:struct<count:bigint,sum:double,input:smallint>) -> double, VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFVarFinal(col 2:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_pop, VectorUDAFCountMerge(col 3:bigint) -> bigint, VectorUDAFMinLong(col 4:tinyint) -> tinyint, VectorUDAFAvgFinal(col 5:struct<count:bigint,sum:double,input:double>) -> double
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- native: false
- vectorProcessingMode: GLOBAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: double), (_col0 % -563.0) (type: double), (_col0 + 762.0) (type: double), _col1 (type: double), _col2 (type: double), (- _col2) (type: double), (_col1 - _col0) (type: double), _col3 (type: bigint), (- (_col1 - _col0)) (type: double), (_col2 - 762.0) (type: double), _col4 (type: tinyint), ((- _col2) + UDFToDouble(_col4)) (type: double), _col5 (type: double), (((- _col2) + UDFToDouble(_col4)) - _col1) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 6, 7, 1, 2, 8, 9, 3, 11, 10, 4, 14, 5, 12]
- selectExpressions: DoubleColModuloDoubleScalar(col 0:double, val -563.0) -> 6:double, DoubleColAddDoubleScalar(col 0:double, val 762.0) -> 7:double, DoubleColUnaryMinus(col 2:double) -> 8:double, DoubleColSubtractDoubleColumn(col 1:double, col 0:double) -> 9:double, DoubleColUnaryMinus(col 10:double)(children: DoubleColSubtractDoubleColumn(col 1:double, col 0:double) -> 10:double) -> 11:double, DoubleColSubtractDoubleScalar(col 2:double, val 762.0) -> 10:double, DoubleColAddDoubleColumn(col 12:double, col 13:double)(children: DoubleColUnaryMinus(col 2:double) -> 12:double, CastLongToDouble(col 4:tinyint) -> 13:double) -> 14:double, DoubleColSubtractDoubleColumn(col 15:double, col 1:double)(children: DoubleColAddDoubleColumn(col 12:double, col 13:double)(children: DoubleColUnaryMinus(col 2:double) -> 12:double, CastLongToDouble(col 4:tinyint) -> 13:double) -> 15:double) -> 12:double
Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/spark/vectorization_3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_3.q.out b/ql/src/test/results/clientpositive/spark/vectorization_3.q.out
index 444b534..253e12c 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_3.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_3.q.out
@@ -126,47 +126,23 @@ STAGE PLANS:
partitionColumnCount: 0
scratchColumnTypeNames: [double, decimal(22,3), decimal(8,3)]
Reducer 2
- Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- reduceColumnNullOrder:
- reduceColumnSortOrder:
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 6
- dataColumns: VALUE._col0:struct<count:bigint,sum:double,variance:double>, VALUE._col1:struct<count:bigint,sum:double,variance:double>, VALUE._col2:struct<count:bigint,sum:double,variance:double>, VALUE._col3:double, VALUE._col4:struct<count:bigint,sum:double,input:int>, VALUE._col5:struct<count:bigint,sum:double,variance:double>
- partitionColumnCount: 0
- scratchColumnTypeNames: []
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: stddev_samp(VALUE._col0), stddev_pop(VALUE._col1), stddev_samp(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), stddev_pop(VALUE._col5)
- Group By Vectorization:
- aggregators: VectorUDAFVarFinal(col 0:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFVarFinal(col 1:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFVarFinal(col 2:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFAvgFinal(col 4:struct<count:bigint,sum:double,input:int>) -> double, VectorUDAFVarFinal(col 5:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- native: false
- vectorProcessingMode: GLOBAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: double), (_col0 - 10.175) (type: double), _col1 (type: double), (_col0 * (_col0 - 10.175)) (type: double), (- _col1) (type: double), (_col0 % 79.553) (type: double), (- (_col0 * (_col0 - 10.175))) (type: double), _col2 (type: double), (- _col0) (type: double), _col3 (type: double), ((- (_col0 * (_col0 - 10.175))) / (_col0 - 10.175)) (type: double), (- (_col0 - 10.175)) (type: double), _col4 (type: double), (-3728.0 - _col0) (type: double), _col5 (type: double), (_col4 / _col2) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 6, 1, 8, 7, 9, 10, 2, 11, 3, 14, 13, 4, 12, 5, 15]
- selectExpressions: DoubleColSubtractDoubleScalar(col 0:double, val 10.175) -> 6:double, DoubleColMultiplyDoubleColumn(col 0:double, col 7:double)(children: DoubleColSubtractDoubleScalar(col 0:double, val 10.175) -> 7:double) -> 8:double, DoubleColUnaryMinus(col 1:double) -> 7:double, DoubleColModuloDoubleScalar(col 0:double, val 79.553) -> 9:double, DoubleColUnaryMinus(col 11:double)(children: DoubleColMultiplyDoubleColumn(col 0:double, col 10:double)(children: DoubleColSubtractDoubleScalar(col 0:double, val 10.175) -> 10:double) -> 11:double) -> 10:double, DoubleColUnaryMinus(col 0:double) -> 11:double, DoubleColDivideDoubleColumn(col 12:double, col 13:double)(children: DoubleColUnaryMinus(col 13:double)(children: DoubleColMultiplyDoubleColumn(col 0:double, col 12:double)(children: DoubleColSubtractDoubleScalar(col 0:double, val 10.175) -> 12:double) -> 13:double) -> 12:double, DoubleColSubtractDoubleScalar(col 0:double, val 10.175) -> 13:double) -> 14:double,
DoubleColUnaryMinus(col 12:double)(children: DoubleColSubtractDoubleScalar(col 0:double, val 10.175) -> 12:double) -> 13:double, DoubleScalarSubtractDoubleColumn(val -3728.0, col 0:double) -> 12:double, DoubleColDivideDoubleColumn(col 4:double, col 2:double) -> 15:double
Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/spark/vectorization_4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_4.q.out b/ql/src/test/results/clientpositive/spark/vectorization_4.q.out
index 664ff5e..436fdd9 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_4.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_4.q.out
@@ -121,47 +121,23 @@ STAGE PLANS:
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
- Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- reduceColumnNullOrder:
- reduceColumnSortOrder:
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 5
- dataColumns: VALUE._col0:bigint, VALUE._col1:struct<count:bigint,sum:double,variance:double>, VALUE._col2:struct<count:bigint,sum:double,input:double>, VALUE._col3:struct<count:bigint,sum:double,variance:double>, VALUE._col4:tinyint
- partitionColumnCount: 0
- scratchColumnTypeNames: []
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: sum(VALUE._col0), stddev_pop(VALUE._col1), avg(VALUE._col2), var_pop(VALUE._col3), min(VALUE._col4)
- Group By Vectorization:
- aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint, VectorUDAFVarFinal(col 1:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_pop, VectorUDAFAvgFinal(col 2:struct<count:bigint,sum:double,input:double>) -> double, VectorUDAFVarFinal(col 3:struct<count:bigint,sum:double,variance:double>) -> double aggregation: var_pop, VectorUDAFMinLong(col 4:tinyint) -> tinyint
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- native: false
- vectorProcessingMode: GLOBAL
- projectedOutputColumnNums: [0, 1, 2, 3, 4]
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: bigint), (_col0 * -563) (type: bigint), (-3728 + _col0) (type: bigint), _col1 (type: double), (- _col1) (type: double), _col2 (type: double), ((_col0 * -563) % _col0) (type: bigint), (UDFToDouble(((_col0 * -563) % _col0)) / _col2) (type: double), _col3 (type: double), (- (UDFToDouble(((_col0 * -563) % _col0)) / _col2)) (type: double), ((-3728 + _col0) - (_col0 * -563)) (type: bigint), _col4 (type: tinyint), _col4 (type: tinyint), (UDFToDouble(_col4) * (- (UDFToDouble(((_col0 * -563) % _col0)) / _col2))) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 5, 6, 1, 7, 2, 9, 12, 3, 11, 14, 4, 4, 16]
- selectExpressions: LongColMultiplyLongScalar(col 0:bigint, val -563) -> 5:bigint, LongScalarAddLongColumn(val -3728, col 0:bigint) -> 6:bigint, DoubleColUnaryMinus(col 1:double) -> 7:double, LongColModuloLongColumn(col 8:bigint, col 0:bigint)(children: LongColMultiplyLongScalar(col 0:bigint, val -563) -> 8:bigint) -> 9:bigint, DoubleColDivideDoubleColumn(col 11:double, col 2:double)(children: CastLongToDouble(col 10:bigint)(children: LongColModuloLongColumn(col 8:bigint, col 0:bigint)(children: LongColMultiplyLongScalar(col 0:bigint, val -563) -> 8:bigint) -> 10:bigint) -> 11:double) -> 12:double, DoubleColUnaryMinus(col 13:double)(children: DoubleColDivideDoubleColumn(col 11:double, col 2:double)(children: CastLongToDouble(col 10:bigint)(children: LongColModuloLongColumn(col 8:bigint, col 0:bigint)(children: LongColMultiplyLongScalar(col 0:bigint, val -563) -> 8:bigint) -> 10:bigint) -> 11:double) -> 13:double) -> 11:double, LongColSubtractLongColumn(col 8:big
int, col 10:bigint)(children: LongScalarAddLongColumn(val -3728, col 0:bigint) -> 8:bigint, LongColMultiplyLongScalar(col 0:bigint, val -563) -> 10:bigint) -> 14:bigint, DoubleColMultiplyDoubleColumn(col 13:double, col 15:double)(children: CastLongToDouble(col 4:tinyint) -> 13:double, DoubleColUnaryMinus(col 16:double)(children: DoubleColDivideDoubleColumn(col 15:double, col 2:double)(children: CastLongToDouble(col 10:bigint)(children: LongColModuloLongColumn(col 8:bigint, col 0:bigint)(children: LongColMultiplyLongScalar(col 0:bigint, val -563) -> 8:bigint) -> 10:bigint) -> 15:double) -> 16:double) -> 15:double) -> 16:double
Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/spark/vectorization_9.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_9.q.out b/ql/src/test/results/clientpositive/spark/vectorization_9.q.out
index b270aea..24cae48 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_9.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_9.q.out
@@ -119,31 +119,14 @@ STAGE PLANS:
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
- Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- reduceColumnNullOrder: aaa
- reduceColumnSortOrder: +++
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 6
- dataColumns: KEY._col0:double, KEY._col1:string, KEY._col2:timestamp, VALUE._col0:bigint, VALUE._col1:struct<count:bigint,sum:double,variance:double>, VALUE._col2:double
- partitionColumnCount: 0
- scratchColumnTypeNames: []
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), min(VALUE._col2)
- Group By Vectorization:
- aggregators: VectorUDAFCountMerge(col 3:bigint) -> bigint, VectorUDAFVarFinal(col 4:struct<count:bigint,sum:double,variance:double>) -> double aggregation: stddev_samp, VectorUDAFMinDouble(col 5:double) -> double
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:double, col 1:string, col 2:timestamp
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0, 1, 2]
keys: KEY._col0 (type: double), KEY._col1 (type: string), KEY._col2 (type: timestamp)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
@@ -151,17 +134,9 @@ STAGE PLANS:
Select Operator
expressions: _col1 (type: string), _col0 (type: double), _col2 (type: timestamp), (_col0 - 9763215.5639) (type: double), (- (_col0 - 9763215.5639)) (type: double), _col3 (type: bigint), _col4 (type: double), (- _col4) (type: double), (_col4 * UDFToDouble(_col3)) (type: double), _col5 (type: double), (9763215.5639 / _col0) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), _col4 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [1, 0, 2, 6, 8, 3, 4, 7, 10, 5, 9, 12, 4]
- selectExpressions: DoubleColSubtractDoubleScalar(col 0:double, val 9763215.5639) -> 6:double, DoubleColUnaryMinus(col 7:double)(children: DoubleColSubtractDoubleScalar(col 0:double, val 9763215.5639) -> 7:double) -> 8:double, DoubleColUnaryMinus(col 4:double) -> 7:double, DoubleColMultiplyDoubleColumn(col 4:double, col 9:double)(children: CastLongToDouble(col 3:bigint) -> 9:double) -> 10:double, DoubleScalarDivideDoubleColumn(val 9763215.5639, col 0:double) -> 9:double, DecimalColDivideDecimalScalar(col 11:decimal(19,0), val -1.389)(children: CastLongToDecimal(col 3:bigint) -> 11:decimal(19,0)) -> 12:decimal(28,6)
Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/8975924e/ql/src/test/results/clientpositive/spark/vectorization_input_format_excludes.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_input_format_excludes.q.out b/ql/src/test/results/clientpositive/spark/vectorization_input_format_excludes.q.out
index 18295e1..01eb4b4 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_input_format_excludes.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_input_format_excludes.q.out
@@ -197,13 +197,11 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
Reducer 2
- Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4)
@@ -514,13 +512,11 @@ STAGE PLANS:
enabledConditionsNotMet: Row deserialization of vectorized input format not supported IS false, hive.vectorized.use.vectorized.input.format IS true AND hive.vectorized.input.format.excludes NOT CONTAINS org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat IS false
inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
Reducer 2
- Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4)
@@ -843,13 +839,11 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
Reducer 2
- Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4)
@@ -1208,13 +1202,11 @@ STAGE PLANS:
enabledConditionsNotMet: hive.vectorized.use.vectorized.input.format IS true AND hive.vectorized.input.format.excludes NOT CONTAINS org.apache.hadoop.hive.ql.io.orc.OrcInputFormat IS false
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
Reducer 2
- Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4)