You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2018/03/22 16:57:35 UTC
[12/34] hive git commit: HIVE-18979: Enable
AggregateReduceFunctionsRule from Calcite (Jesus Camacho Rodriguez,
reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/5cb8867b/ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out
index df85ce3..fd2947d 100644
--- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out
+++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out
@@ -94,26 +94,27 @@ STAGE PLANS:
predicate: (((cboolean1 >= cboolean2) or (UDFToShort(ctinyint) <> csmallint)) and ((cstring1 like '%a') or ((cboolean2 <= 1) and (cbigint >= UDFToLong(csmallint)))) and ctimestamp1 is null) (type: boolean)
Statistics: Num rows: 3754 Data size: 45048 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: cbigint (type: bigint), cdouble (type: double), cstring1 (type: string), cboolean1 (type: boolean)
- outputColumnNames: cbigint, cdouble, cstring1, cboolean1
+ expressions: cbigint (type: bigint), cboolean1 (type: boolean), cstring1 (type: string), cdouble (type: double), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double), (cdouble * cdouble) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [3, 5, 6, 10]
+ projectedOutputColumnNums: [3, 10, 6, 5, 13, 16, 14]
+ selectExpressions: CastLongToDouble(col 3:bigint) -> 13:double, DoubleColMultiplyDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 3:bigint) -> 14:double, CastLongToDouble(col 3:bigint) -> 15:double) -> 16:double, DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 14:double
Statistics: Num rows: 3754 Data size: 45048 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(cbigint), stddev_samp(cbigint), avg(cdouble), sum(cbigint), stddev_pop(cdouble)
+ aggregations: count(_col0), sum(_col5), sum(_col4), sum(_col3), count(_col3), sum(_col0), sum(_col6)
Group By Vectorization:
- aggregators: VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFVarLong(col 3:bigint) -> struct<count:bigint,sum:double,variance:double> aggregation: stddev_samp, VectorUDAFAvgDouble(col 5:double) -> struct<count:bigint,sum:double,input:double>, VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFVarDouble(col 5:double) -> struct<count:bigint,sum:double,variance:double> aggregation: stddev_pop
+ aggregators: VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCount(col 5:double) -> bigint, VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 14:double) -> double
className: VectorGroupByOperator
groupByMode: HASH
keyExpressions: col 5:double, col 3:bigint, col 6:string, col 10:boolean
native: false
vectorProcessingMode: HASH
- projectedOutputColumnNums: [0, 1, 2, 3, 4]
- keys: cdouble (type: double), cbigint (type: bigint), cstring1 (type: string), cboolean1 (type: boolean)
+ projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6]
+ keys: _col3 (type: double), _col0 (type: bigint), _col2 (type: string), _col1 (type: boolean)
mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
Statistics: Num rows: 3754 Data size: 45048 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: double), _col1 (type: bigint), _col2 (type: string), _col3 (type: boolean)
@@ -124,9 +125,9 @@ STAGE PLANS:
keyColumnNums: [0, 1, 2, 3]
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: [4, 5, 6, 7, 8]
+ valueColumnNums: [4, 5, 6, 7, 8, 9, 10]
Statistics: Num rows: 3754 Data size: 45048 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col4 (type: bigint), _col5 (type: struct<count:bigint,sum:double,variance:double>), _col6 (type: struct<count:bigint,sum:double,input:double>), _col7 (type: bigint), _col8 (type: struct<count:bigint,sum:double,variance:double>)
+ value expressions: _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: double)
Execution mode: vectorized
Map Vectorization:
enabled: true
@@ -142,27 +143,55 @@ STAGE PLANS:
includeColumns: [0, 1, 3, 5, 6, 8, 10, 11]
dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
partitionColumnCount: 0
- scratchColumnTypeNames: []
+ scratchColumnTypeNames: [double, double, double, double]
Reducer 2
+ Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator
- vectorized: false
+ reduceColumnNullOrder: aaaa
+ reduceColumnSortOrder: ++++
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 11
+ dataColumns: KEY._col0:double, KEY._col1:bigint, KEY._col2:string, KEY._col3:boolean, VALUE._col0:bigint, VALUE._col1:double, VALUE._col2:double, VALUE._col3:double, VALUE._col4:bigint, VALUE._col5:bigint, VALUE._col6:double
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
Reduce Operator Tree:
Group By Operator
- aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), avg(VALUE._col2), sum(VALUE._col3), stddev_pop(VALUE._col4)
+ aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6)
+ Group By Vectorization:
+ aggregators: VectorUDAFCountMerge(col 4:bigint) -> bigint, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFSumDouble(col 7:double) -> double, VectorUDAFCountMerge(col 8:bigint) -> bigint, VectorUDAFSumLong(col 9:bigint) -> bigint, VectorUDAFSumDouble(col 10:double) -> double
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ keyExpressions: col 0:double, col 1:bigint, col 2:string, col 3:boolean
+ native: false
+ vectorProcessingMode: MERGE_PARTIAL
+ projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6]
keys: KEY._col0 (type: double), KEY._col1 (type: bigint), KEY._col2 (type: string), KEY._col3 (type: boolean)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
Statistics: Num rows: 1877 Data size: 22524 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col1 (type: bigint), _col3 (type: boolean), _col2 (type: string), _col0 (type: double), (-6432.0D * _col0) (type: double), (- _col1) (type: bigint), _col4 (type: bigint), (_col1 * _col4) (type: bigint), _col5 (type: double), ((-6432.0D * _col0) / -6432.0D) (type: double), (- ((-6432.0D * _col0) / -6432.0D)) (type: double), _col6 (type: double), (- (-6432.0D * _col0)) (type: double), (-5638.15 + CAST( _col1 AS decimal(19,0))) (type: decimal(22,2)), _col7 (type: bigint), (_col6 / (-6432.0D * _col0)) (type: double), (- (- ((-6432.0D * _col0) / -6432.0D))) (type: double), (((-6432.0D * _col0) / -6432.0D) + (- (-6432.0D * _col0))) (type: double), _col8 (type: double)
+ expressions: _col1 (type: bigint), _col3 (type: boolean), _col2 (type: string), _col0 (type: double), (-6432.0D * _col0) (type: double), (- _col1) (type: bigint), _col4 (type: bigint), (_col1 * _col4) (type: bigint), power(((_col5 - ((_col6 * _col6) / _col4)) / CASE WHEN ((_col4 = 1L)) THEN (null) ELSE ((_col4 - 1)) END), 0.5) (type: double), ((-6432.0D * _col0) / -6432.0D) (type: double), (- ((-6432.0D * _col0) / -6432.0D)) (type: double), (_col7 / _col8) (type: double), (- (-6432.0D * _col0)) (type: double), (-5638.15 + CAST( _col1 AS decimal(19,0))) (type: decimal(22,2)), _col9 (type: bigint), ((_col7 / _col8) / (-6432.0D * _col0)) (type: double), (- (- ((-6432.0D * _col0) / -6432.0D))) (type: double), (((-6432.0D * _col0) / -6432.0D) + (- (-6432.0D * _col0))) (type: double), power(((_col10 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col17, _col18, _col19
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [1, 3, 2, 0, 11, 12, 4, 13, 14, 19, 15, 20, 22, 24, 9, 26, 25, 21, 27]
+ selectExpressions: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 11:double, LongColUnaryMinus(col 1:bigint) -> 12:bigint, LongColMultiplyLongColumn(col 1:bigint, col 4:bigint) -> 13:bigint, FuncPowerDoubleToDouble(col 15:double)(children: DoubleColDivideLongColumn(col 14:double, col 18:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 15:double)(children: DoubleColDivideLongColumn(col 14:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 14:double) -> 15:double) -> 14:double, IfExprNullCondExpr(col 16:boolean, null, col 17:bigint)(children: LongColEqualLongScalar(col 4:bigint, val 1) -> 16:boolean, LongColSubtractLongScalar(col 4:bigint, val 1) -> 17:bigint) -> 18:bigint) -> 15:double) -> 14:double, DoubleColDivideDoubleScalar(col 15:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 15:double) -> 19:double, DoubleColUnaryMinus(col 20:double)(ch
ildren: DoubleColDivideDoubleScalar(col 15:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 15:double) -> 20:double) -> 15:double, DoubleColDivideLongColumn(col 7:double, col 8:bigint) -> 20:double, DoubleColUnaryMinus(col 21:double)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 21:double) -> 22:double, DecimalScalarAddDecimalColumn(val -5638.15, col 23:decimal(19,0))(children: CastLongToDecimal(col 1:bigint) -> 23:decimal(19,0)) -> 24:decimal(22,2), DoubleColDivideDoubleColumn(col 21:double, col 25:double)(children: DoubleColDivideLongColumn(col 7:double, col 8:bigint) -> 21:double, DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 25:double) -> 26:double, DoubleColUnaryMinus(col 21:double)(children: DoubleColUnaryMinus(col 25:double)(children: DoubleColDivideDoubleScalar(col 21:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 21:double) -> 25:double) ->
21:double) -> 25:double, DoubleColAddDoubleColumn(col 27:double, col 28:double)(children: DoubleColDivideDoubleScalar(col 21:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 21:double) -> 27:double, DoubleColUnaryMinus(col 21:double)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 21:double) -> 28:double) -> 21:double, FuncPowerDoubleToDouble(col 28:double)(children: DoubleColDivideLongColumn(col 27:double, col 8:bigint)(children: DoubleColSubtractDoubleColumn(col 10:double, col 28:double)(children: DoubleColDivideLongColumn(col 27:double, col 8:bigint)(children: DoubleColMultiplyDoubleColumn(col 7:double, col 7:double) -> 27:double) -> 28:double) -> 27:double) -> 28:double) -> 27:double
Statistics: Num rows: 1877 Data size: 22524 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col3 (type: double), _col0 (type: bigint), _col2 (type: string)
sort order: +++
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumnNums: [0, 1, 2]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: [3, 11, 12, 4, 13, 14, 19, 15, 20, 22, 24, 9, 26, 25, 21, 27]
Statistics: Num rows: 1877 Data size: 22524 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: boolean), _col4 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: decimal(22,2)), _col14 (type: bigint), _col15 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double)
Reducer 3
http://git-wip-us.apache.org/repos/asf/hive/blob/5cb8867b/ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out
index e4db32c..bce1f8a 100644
--- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out
+++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out
@@ -96,26 +96,27 @@ STAGE PLANS:
predicate: (((UDFToDouble(ctimestamp1) > 11.0D) and (UDFToDouble(ctimestamp2) <> 12.0D) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639)) or ((cfloat < 3569) and (cdouble <= 10.175D) and (cboolean1 <> 1))) (type: boolean)
Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: ctinyint (type: tinyint), cfloat (type: float), cstring1 (type: string), ctimestamp1 (type: timestamp), cboolean1 (type: boolean)
- outputColumnNames: ctinyint, cfloat, cstring1, ctimestamp1, cboolean1
+ expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [0, 4, 6, 8, 10]
+ projectedOutputColumnNums: [10, 0, 8, 4, 6, 4, 13, 15, 18]
+ selectExpressions: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 13:double, CastLongToDouble(col 0:tinyint) -> 15:double, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: CastLongToDouble(col 0:tinyint) -> 16:double, CastLongToDouble(col 0:tinyint) -> 17:double) -> 18:double
Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: max(ctinyint), sum(cfloat), stddev_pop(cfloat), stddev_pop(ctinyint), max(cfloat), min(ctinyint)
+ aggregations: max(_col1), sum(_col3), sum(_col6), sum(_col5), count(_col3), sum(_col8), sum(_col7), count(_col1), max(_col3), min(_col1)
Group By Vectorization:
- aggregators: VectorUDAFMaxLong(col 0:tinyint) -> tinyint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFVarDouble(col 4:float) -> struct<count:bigint,sum:double,variance:double> aggregation: stddev_pop, VectorUDAFVarLong(col 0:tinyint) -> struct<count:bigint,sum:double,variance:double> aggregation: stddev_pop, VectorUDAFMaxDouble(col 4:float) -> float, VectorUDAFMinLong(col 0:tinyint) -> tinyint
+ aggregators: VectorUDAFMaxLong(col 0:tinyint) -> tinyint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFSumDouble(col 15:double) -> double, VectorUDAFCount(col 0:tinyint) -> bigint, VectorUDAFMaxDouble(col 4:float) -> float, VectorUDAFMinLong(col 0:tinyint) -> tinyint
className: VectorGroupByOperator
groupByMode: HASH
keyExpressions: col 10:boolean, col 0:tinyint, col 8:timestamp, col 4:float, col 6:string
native: false
vectorProcessingMode: HASH
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
- keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string)
+ projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+ keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string)
mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string)
@@ -126,9 +127,9 @@ STAGE PLANS:
keyColumnNums: [0, 1, 2, 3, 4]
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: [5, 6, 7, 8, 9, 10]
+ valueColumnNums: [5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct<count:bigint,sum:double,variance:double>), _col8 (type: struct<count:bigint,sum:double,variance:double>), _col9 (type: float), _col10 (type: tinyint)
+ value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: bigint), _col13 (type: float), _col14 (type: tinyint)
Execution mode: vectorized
Map Vectorization:
enabled: true
@@ -144,27 +145,55 @@ STAGE PLANS:
includeColumns: [0, 4, 5, 6, 8, 9, 10]
dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
partitionColumnCount: 0
- scratchColumnTypeNames: [double, decimal(11,4)]
+ scratchColumnTypeNames: [double, decimal(11,4), double, double, double, double]
Reducer 2
+ Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator
- vectorized: false
+ reduceColumnNullOrder: aaaaa
+ reduceColumnSortOrder: +++++
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 15
+ dataColumns: KEY._col0:boolean, KEY._col1:tinyint, KEY._col2:timestamp, KEY._col3:float, KEY._col4:string, VALUE._col0:tinyint, VALUE._col1:double, VALUE._col2:double, VALUE._col3:double, VALUE._col4:bigint, VALUE._col5:double, VALUE._col6:double, VALUE._col7:bigint, VALUE._col8:float, VALUE._col9:tinyint
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
Reduce Operator Tree:
Group By Operator
- aggregations: max(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), min(VALUE._col5)
+ aggregations: max(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9)
+ Group By Vectorization:
+ aggregators: VectorUDAFMaxLong(col 5:tinyint) -> tinyint, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFSumDouble(col 7:double) -> double, VectorUDAFSumDouble(col 8:double) -> double, VectorUDAFCountMerge(col 9:bigint) -> bigint, VectorUDAFSumDouble(col 10:double) -> double, VectorUDAFSumDouble(col 11:double) -> double, VectorUDAFCountMerge(col 12:bigint) -> bigint, VectorUDAFMaxDouble(col 13:float) -> float, VectorUDAFMinLong(col 14:tinyint) -> tinyint
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ keyExpressions: col 0:boolean, col 1:tinyint, col 2:timestamp, col 3:float, col 4:string
+ native: false
+ vectorProcessingMode: MERGE_PARTIAL
+ projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), _col7 (type: double), (- _col6) (type: double), _col8 (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28D / (- (- _col6))) (type: double), _col9 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col10 (type: tinyint)
+ expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), power(((_col7 - ((_col8 * _col8) / _col9)) / _col9), 0.5) (type: double), (- _col6) (type: double), power(((_col10 - ((_col11 * _col11) / _col12)) / _col12), 0.5) (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28D / (- (- _col6))) (type: double), _col13 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col14 (type: tinyint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 2, 3, 4, 15, 5, 17, 6, 20, 19, 21, 22, 23, 24, 27, 28, 25, 13, 31, 14]
+ selectExpressions: LongColUnaryMinus(col 1:tinyint) -> 15:tinyint, LongColAddLongColumn(col 16:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 16:tinyint) -> 17:tinyint, DoubleColMultiplyDoubleColumn(col 6:double, col 19:double)(children: CastLongToDouble(col 18:tinyint)(children: LongColAddLongColumn(col 16:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 16:tinyint) -> 18:tinyint) -> 19:double) -> 20:double, DoubleColUnaryMinus(col 6:double) -> 19:double, DoubleScalarMultiplyDoubleColumn(val 79.5530014038086, col 3:float) -> 21:float, FuncPowerDoubleToDouble(col 23:double)(children: DoubleColDivideLongColumn(col 22:double, col 9:bigint)(children: DoubleColSubtractDoubleColumn(col 7:double, col 23:double)(children: DoubleColDivideLongColumn(col 22:double, col 9:bigint)(children: DoubleColMultiplyDoubleColumn(col 8:double, col 8:double) -> 22:double) -> 23:double) -> 22:double) -> 23:double) -> 22:double, DoubleColUnar
yMinus(col 6:double) -> 23:double, FuncPowerDoubleToDouble(col 25:double)(children: DoubleColDivideLongColumn(col 24:double, col 12:bigint)(children: DoubleColSubtractDoubleColumn(col 10:double, col 25:double)(children: DoubleColDivideLongColumn(col 24:double, col 12:bigint)(children: DoubleColMultiplyDoubleColumn(col 11:double, col 11:double) -> 24:double) -> 25:double) -> 24:double) -> 25:double) -> 24:double, DecimalColSubtractDecimalScalar(col 26:decimal(3,0), val 10.175)(children: CastLongToDecimal(col 18:tinyint)(children: LongColAddLongColumn(col 16:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 16:tinyint) -> 18:tinyint) -> 26:decimal(3,0)) -> 27:decimal(7,3), DoubleColUnaryMinus(col 25:double)(children: DoubleColUnaryMinus(col 6:double) -> 25:double) -> 28:double, DoubleScalarDivideDoubleColumn(val -26.28, col 29:double)(children: DoubleColUnaryMinus(col 25:double)(children: DoubleColUnaryMinus(col 6:double) -> 25:double) -> 29:double) -> 25:double, D
oubleColDivideDoubleColumn(col 30:double, col 29:double)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 29:double)(children: CastLongToDouble(col 18:tinyint)(children: LongColAddLongColumn(col 16:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 16:tinyint) -> 18:tinyint) -> 29:double) -> 30:double, CastLongToDouble(col 1:tinyint) -> 29:double) -> 31:double
Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), _col5 (type: tinyint), _col6 (type: tinyint), _col7 (type: tinyint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: decimal(7,3)), _col16 (type: double), _col17 (type: double), _col18 (type: float), _col19 (type: double), _col20 (type: tinyint)
sort order: +++++++++++++++++++++
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumnNums: [0, 1, 2, 3, 4, 15, 5, 17, 6, 20, 19, 21, 22, 23, 24, 27, 28, 25, 13, 31, 14]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: []
Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.1
Reducer 3
@@ -419,26 +448,27 @@ STAGE PLANS:
predicate: (((UDFToDouble(ctimestamp1) > -1.388D) and (UDFToDouble(ctimestamp2) <> -1.3359999999999999D) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639)) or ((cfloat < 3569) and (cdouble <= 10.175D) and (cboolean1 <> 1))) (type: boolean)
Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: ctinyint (type: tinyint), cfloat (type: float), cstring1 (type: string), ctimestamp1 (type: timestamp), cboolean1 (type: boolean)
- outputColumnNames: ctinyint, cfloat, cstring1, ctimestamp1, cboolean1
+ expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [0, 4, 6, 8, 10]
+ projectedOutputColumnNums: [10, 0, 8, 4, 6, 4, 13, 15, 18]
+ selectExpressions: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 13:double, CastLongToDouble(col 0:tinyint) -> 15:double, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: CastLongToDouble(col 0:tinyint) -> 16:double, CastLongToDouble(col 0:tinyint) -> 17:double) -> 18:double
Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: max(ctinyint), sum(cfloat), stddev_pop(cfloat), stddev_pop(ctinyint), max(cfloat), min(ctinyint)
+ aggregations: max(_col1), sum(_col3), sum(_col6), sum(_col5), count(_col3), sum(_col8), sum(_col7), count(_col1), max(_col3), min(_col1)
Group By Vectorization:
- aggregators: VectorUDAFMaxLong(col 0:tinyint) -> tinyint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFVarDouble(col 4:float) -> struct<count:bigint,sum:double,variance:double> aggregation: stddev_pop, VectorUDAFVarLong(col 0:tinyint) -> struct<count:bigint,sum:double,variance:double> aggregation: stddev_pop, VectorUDAFMaxDouble(col 4:float) -> float, VectorUDAFMinLong(col 0:tinyint) -> tinyint
+ aggregators: VectorUDAFMaxLong(col 0:tinyint) -> tinyint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFSumDouble(col 15:double) -> double, VectorUDAFCount(col 0:tinyint) -> bigint, VectorUDAFMaxDouble(col 4:float) -> float, VectorUDAFMinLong(col 0:tinyint) -> tinyint
className: VectorGroupByOperator
groupByMode: HASH
keyExpressions: col 10:boolean, col 0:tinyint, col 8:timestamp, col 4:float, col 6:string
native: false
vectorProcessingMode: HASH
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
- keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string)
+ projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+ keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string)
mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string)
@@ -449,7 +479,7 @@ STAGE PLANS:
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct<count:bigint,sum:double,variance:double>), _col8 (type: struct<count:bigint,sum:double,variance:double>), _col9 (type: float), _col10 (type: tinyint)
+ value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: bigint), _col13 (type: float), _col14 (type: tinyint)
Execution mode: vectorized
Map Vectorization:
enabled: true
@@ -461,25 +491,44 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
Reducer 2
+ Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator
- vectorized: false
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Group By Operator
- aggregations: max(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), min(VALUE._col5)
+ aggregations: max(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9)
+ Group By Vectorization:
+ aggregators: VectorUDAFMaxLong(col 5:tinyint) -> tinyint, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFSumDouble(col 7:double) -> double, VectorUDAFSumDouble(col 8:double) -> double, VectorUDAFCountMerge(col 9:bigint) -> bigint, VectorUDAFSumDouble(col 10:double) -> double, VectorUDAFSumDouble(col 11:double) -> double, VectorUDAFCountMerge(col 12:bigint) -> bigint, VectorUDAFMaxDouble(col 13:float) -> float, VectorUDAFMinLong(col 14:tinyint) -> tinyint
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ keyExpressions: col 0:boolean, col 1:tinyint, col 2:timestamp, col 3:float, col 4:string
+ native: false
+ vectorProcessingMode: MERGE_PARTIAL
+ projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), _col7 (type: double), (- _col6) (type: double), _col8 (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28D / (- (- _col6))) (type: double), _col9 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col10 (type: tinyint)
+ expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), power(((_col7 - ((_col8 * _col8) / _col9)) / _col9), 0.5) (type: double), (- _col6) (type: double), power(((_col10 - ((_col11 * _col11) / _col12)) / _col12), 0.5) (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28D / (- (- _col6))) (type: double), _col13 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col14 (type: tinyint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 2, 3, 4, 15, 5, 17, 6, 20, 19, 21, 22, 23, 24, 27, 28, 25, 13, 31, 14]
+ selectExpressions: LongColUnaryMinus(col 1:tinyint) -> 15:tinyint, LongColAddLongColumn(col 16:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 16:tinyint) -> 17:tinyint, DoubleColMultiplyDoubleColumn(col 6:double, col 19:double)(children: CastLongToDouble(col 18:tinyint)(children: LongColAddLongColumn(col 16:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 16:tinyint) -> 18:tinyint) -> 19:double) -> 20:double, DoubleColUnaryMinus(col 6:double) -> 19:double, DoubleScalarMultiplyDoubleColumn(val 79.5530014038086, col 3:float) -> 21:float, FuncPowerDoubleToDouble(col 23:double)(children: DoubleColDivideLongColumn(col 22:double, col 9:bigint)(children: DoubleColSubtractDoubleColumn(col 7:double, col 23:double)(children: DoubleColDivideLongColumn(col 22:double, col 9:bigint)(children: DoubleColMultiplyDoubleColumn(col 8:double, col 8:double) -> 22:double) -> 23:double) -> 22:double) -> 23:double) -> 22:double, DoubleColUnar
yMinus(col 6:double) -> 23:double, FuncPowerDoubleToDouble(col 25:double)(children: DoubleColDivideLongColumn(col 24:double, col 12:bigint)(children: DoubleColSubtractDoubleColumn(col 10:double, col 25:double)(children: DoubleColDivideLongColumn(col 24:double, col 12:bigint)(children: DoubleColMultiplyDoubleColumn(col 11:double, col 11:double) -> 24:double) -> 25:double) -> 24:double) -> 25:double) -> 24:double, DecimalColSubtractDecimalScalar(col 26:decimal(3,0), val 10.175)(children: CastLongToDecimal(col 18:tinyint)(children: LongColAddLongColumn(col 16:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 16:tinyint) -> 18:tinyint) -> 26:decimal(3,0)) -> 27:decimal(7,3), DoubleColUnaryMinus(col 25:double)(children: DoubleColUnaryMinus(col 6:double) -> 25:double) -> 28:double, DoubleScalarDivideDoubleColumn(val -26.28, col 29:double)(children: DoubleColUnaryMinus(col 25:double)(children: DoubleColUnaryMinus(col 6:double) -> 25:double) -> 29:double) -> 25:double, D
oubleColDivideDoubleColumn(col 30:double, col 29:double)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 29:double)(children: CastLongToDouble(col 18:tinyint)(children: LongColAddLongColumn(col 16:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 16:tinyint) -> 18:tinyint) -> 29:double) -> 30:double, CastLongToDouble(col 1:tinyint) -> 29:double) -> 31:double
Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), _col5 (type: tinyint), _col6 (type: tinyint), _col7 (type: tinyint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: decimal(7,3)), _col16 (type: double), _col17 (type: double), _col18 (type: float), _col19 (type: double), _col20 (type: tinyint)
sort order: +++++++++++++++++++++
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.1
Reducer 3
http://git-wip-us.apache.org/repos/asf/hive/blob/5cb8867b/ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out
index aef374a..03afcc1 100644
--- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out
+++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out
@@ -96,27 +96,27 @@ STAGE PLANS:
predicate: (((UDFToDouble(cint) <= cdouble) or (ctimestamp2 < ctimestamp1)) and ((cbigint > -257L) or (cfloat < UDFToFloat(cint))) and (UDFToLong(ctinyint) <= cbigint) and (cdouble < UDFToDouble(ctinyint))) (type: boolean)
Statistics: Num rows: 606 Data size: 7272 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double), (- (-26.28D + cdouble)) (type: double)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ expressions: ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double), (- (-26.28D + cdouble)) (type: double), ((- (-26.28D + cdouble)) * (- (-26.28D + cdouble))) (type: double), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [8, 4, 6, 10, 5, 14]
- selectExpressions: DoubleColUnaryMinus(col 13:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5:double) -> 13:double) -> 14:double
+ projectedOutputColumnNums: [8, 4, 6, 10, 5, 14, 13, 4, 15]
+ selectExpressions: DoubleColUnaryMinus(col 13:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5:double) -> 13:double) -> 14:double, DoubleColMultiplyDoubleColumn(col 15:double, col 16:double)(children: DoubleColUnaryMinus(col 13:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5:double) -> 13:double) -> 15:double, DoubleColUnaryMinus(col 13:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5:double) -> 13:double) -> 16:double) -> 13:double, DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 15:double
Statistics: Num rows: 606 Data size: 7272 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: stddev_samp(_col5), max(_col1), stddev_pop(_col1), count(_col1), var_pop(_col1), var_samp(_col1)
+ aggregations: sum(_col6), sum(_col5), count(_col5), max(_col1), sum(_col8), sum(_col7), count(_col1)
Group By Vectorization:
- aggregators: VectorUDAFVarDouble(col 14:double) -> struct<count:bigint,sum:double,variance:double> aggregation: stddev_samp, VectorUDAFMaxDouble(col 4:float) -> float, VectorUDAFVarDouble(col 4:float) -> struct<count:bigint,sum:double,variance:double> aggregation: stddev_pop, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFVarDouble(col 4:float) -> struct<count:bigint,sum:double,variance:double> aggregation: var_pop, VectorUDAFVarDouble(col 4:float) -> struct<count:bigint,sum:double,variance:double> aggregation: var_samp
+ aggregators: VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 14:double) -> double, VectorUDAFCount(col 14:double) -> bigint, VectorUDAFMaxDouble(col 4:float) -> float, VectorUDAFSumDouble(col 15:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCount(col 4:float) -> bigint
className: VectorGroupByOperator
groupByMode: HASH
keyExpressions: col 6:string, col 4:float, col 5:double, col 8:timestamp, col 10:boolean
native: false
vectorProcessingMode: HASH
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
+ projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6]
keys: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp), _col3 (type: boolean)
mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
Statistics: Num rows: 606 Data size: 7272 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean)
@@ -127,9 +127,9 @@ STAGE PLANS:
keyColumnNums: [0, 1, 2, 3, 4]
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: [5, 6, 7, 8, 9, 10]
+ valueColumnNums: [5, 6, 7, 8, 9, 10, 11]
Statistics: Num rows: 606 Data size: 7272 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col5 (type: struct<count:bigint,sum:double,variance:double>), _col6 (type: float), _col7 (type: struct<count:bigint,sum:double,variance:double>), _col8 (type: bigint), _col9 (type: struct<count:bigint,sum:double,variance:double>), _col10 (type: struct<count:bigint,sum:double,variance:double>)
+ value expressions: _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: bigint)
Execution mode: vectorized
Map Vectorization:
enabled: true
@@ -145,27 +145,55 @@ STAGE PLANS:
includeColumns: [0, 2, 3, 4, 5, 6, 8, 9, 10]
dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
partitionColumnCount: 0
- scratchColumnTypeNames: [double, double]
+ scratchColumnTypeNames: [double, double, double, double]
Reducer 2
+ Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator
- vectorized: false
+ reduceColumnNullOrder: aaaaa
+ reduceColumnSortOrder: +++++
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 12
+ dataColumns: KEY._col0:string, KEY._col1:float, KEY._col2:double, KEY._col3:timestamp, KEY._col4:boolean, VALUE._col0:double, VALUE._col1:double, VALUE._col2:bigint, VALUE._col3:float, VALUE._col4:double, VALUE._col5:double, VALUE._col6:bigint
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
Reduce Operator Tree:
Group By Operator
- aggregations: stddev_samp(VALUE._col0), max(VALUE._col1), stddev_pop(VALUE._col2), count(VALUE._col3), var_pop(VALUE._col4), var_samp(VALUE._col5)
+ aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2), max(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), count(VALUE._col6)
+ Group By Vectorization:
+ aggregators: VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFCountMerge(col 7:bigint) -> bigint, VectorUDAFMaxDouble(col 8:float) -> float, VectorUDAFSumDouble(col 9:double) -> double, VectorUDAFSumDouble(col 10:double) -> double, VectorUDAFCountMerge(col 11:bigint) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ keyExpressions: col 0:string, col 1:float, col 2:double, col 3:timestamp, col 4:boolean
+ native: false
+ vectorProcessingMode: MERGE_PARTIAL
+ projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6]
keys: KEY._col0 (type: string), KEY._col1 (type: float), KEY._col2 (type: double), KEY._col3 (type: timestamp), KEY._col4 (type: boolean)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
Statistics: Num rows: 303 Data size: 3636 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col3 (type: timestamp), _col1 (type: float), _col0 (type: string), _col4 (type: boolean), _col2 (type: double), (-26.28D + _col2) (type: double), (- (-26.28D + _col2)) (type: double), _col5 (type: double), (_col1 * -26.28) (type: float), _col6 (type: float), (- _col1) (type: float), (- _col6) (type: float), ((- (-26.28D + _col2)) / 10.175D) (type: double), _col7 (type: double), _col8 (type: bigint), (- ((- (-26.28D + _col2)) / 10.175D)) (type: double), (-1.389D % _col5) (type: double), (UDFToDouble(_col1) - _col2) (type: double), _col9 (type: double), (_col9 % 10.175D) (type: double), _col10 (type: double), (- (UDFToDouble(_col1) - _col2)) (type: double)
+ expressions: _col3 (type: timestamp), _col1 (type: float), _col0 (type: string), _col4 (type: boolean), _col2 (type: double), (-26.28D + _col2) (type: double), (- (-26.28D + _col2)) (type: double), power(((_col5 - ((_col6 * _col6) / _col7)) / CASE WHEN ((_col7 = 1L)) THEN (null) ELSE ((_col7 - 1)) END), 0.5) (type: double), (_col1 * -26.28) (type: float), _col8 (type: float), (- _col1) (type: float), (- _col8) (type: float), ((- (-26.28D + _col2)) / 10.175D) (type: double), power(((_col9 - ((_col10 * _col10) / _col11)) / _col11), 0.5) (type: double), _col11 (type: bigint), (- ((- (-26.28D + _col2)) / 10.175D)) (type: double), (-1.389D % power(((_col5 - ((_col6 * _col6) / _col7)) / CASE WHEN ((_col7 = 1L)) THEN (null) ELSE ((_col7 - 1)) END), 0.5)) (type: double), (UDFToDouble(_col1) - _col2) (type: double), ((_col9 - ((_col10 * _col10) / _col11)) / _col11) (type: double), (((_col9 - ((_col10 * _col10) / _col11)) / _col11) % 10.175D) (type: double), ((_col9 - ((_col
10 * _col10) / _col11)) / CASE WHEN ((_col11 = 1L)) THEN (null) ELSE ((_col11 - 1)) END) (type: double), (- (UDFToDouble(_col1) - _col2)) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [3, 1, 0, 4, 2, 12, 14, 13, 15, 8, 19, 20, 21, 22, 11, 24, 25, 23, 29, 28, 31, 34]
+ selectExpressions: DoubleScalarAddDoubleColumn(val -26.28, col 2:double) -> 12:double, DoubleColUnaryMinus(col 13:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 2:double) -> 13:double) -> 14:double, FuncPowerDoubleToDouble(col 15:double)(children: DoubleColDivideLongColumn(col 13:double, col 18:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 15:double)(children: DoubleColDivideLongColumn(col 13:double, col 7:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 13:double) -> 15:double) -> 13:double, IfExprNullCondExpr(col 16:boolean, null, col 17:bigint)(children: LongColEqualLongScalar(col 7:bigint, val 1) -> 16:boolean, LongColSubtractLongScalar(col 7:bigint, val 1) -> 17:bigint) -> 18:bigint) -> 15:double) -> 13:double, DoubleColMultiplyDoubleScalar(col 1:float, val -26.280000686645508) -> 15:float, DoubleColUnaryMinus(col 1:float) -> 19:float, DoubleColUnaryMinus(col 8:float) -> 20:float, DoubleColDi
videDoubleScalar(col 22:double, val 10.175)(children: DoubleColUnaryMinus(col 21:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 2:double) -> 21:double) -> 22:double) -> 21:double, FuncPowerDoubleToDouble(col 23:double)(children: DoubleColDivideLongColumn(col 22:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 23:double)(children: DoubleColDivideLongColumn(col 22:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 22:double) -> 23:double) -> 22:double) -> 23:double) -> 22:double, DoubleColUnaryMinus(col 23:double)(children: DoubleColDivideDoubleScalar(col 24:double, val 10.175)(children: DoubleColUnaryMinus(col 23:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 2:double) -> 23:double) -> 24:double) -> 23:double) -> 24:double, DoubleScalarModuloDoubleColumn(val -1.389, col 23:double)(children: FuncPowerDoubleToDouble(col 25:double)(children: DoubleColDivideLongColumn(col 23:double,
col 27:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 25:double)(children: DoubleColDivideLongColumn(col 23:double, col 7:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 23:double) -> 25:double) -> 23:double, IfExprNullCondExpr(col 18:boolean, null, col 26:bigint)(children: LongColEqualLongScalar(col 7:bigint, val 1) -> 18:boolean, LongColSubtractLongScalar(col 7:bigint, val 1) -> 26:bigint) -> 27:bigint) -> 25:double) -> 23:double) -> 25:double, DoubleColSubtractDoubleColumn(col 1:double, col 2:double)(children: col 1:float) -> 23:double, DoubleColDivideLongColumn(col 28:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 29:double)(children: DoubleColDivideLongColumn(col 28:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 28:double) -> 29:double) -> 28:double) -> 29:double, DoubleColModuloDoubleScalar(col 30:double, val 10.175)(children: DoubleColDivideLon
gColumn(col 28:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 30:double)(children: DoubleColDivideLongColumn(col 28:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 28:double) -> 30:double) -> 28:double) -> 30:double) -> 28:double, DoubleColDivideLongColumn(col 30:double, col 33:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 31:double)(children: DoubleColDivideLongColumn(col 30:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 30:double) -> 31:double) -> 30:double, IfExprNullCondExpr(col 27:boolean, null, col 32:bigint)(children: LongColEqualLongScalar(col 11:bigint, val 1) -> 27:boolean, LongColSubtractLongScalar(col 11:bigint, val 1) -> 32:bigint) -> 33:bigint) -> 31:double, DoubleColUnaryMinus(col 30:double)(children: DoubleColSubtractDoubleColumn(col 1:double, col 2:double)(children: col 1:float) -> 30:double) -> 34:double
Statistics: Num rows: 303 Data size: 3636 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp)
sort order: ++++
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumnNums: [0, 1, 2, 3]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: [4, 12, 14, 13, 15, 8, 19, 20, 21, 22, 11, 24, 25, 23, 29, 28, 31, 34]
Statistics: Num rows: 303 Data size: 3636 Basic stats: COMPLETE Column stats: NONE
value expressions: _col3 (type: boolean), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: float), _col10 (type: float), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double)
Reducer 3
http://git-wip-us.apache.org/repos/asf/hive/blob/5cb8867b/ql/src/test/results/clientpositive/spark/parquet_vectorization_15.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_15.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_15.q.out
index 24cdf06..126cfd0 100644
--- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_15.q.out
+++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_15.q.out
@@ -92,26 +92,27 @@ STAGE PLANS:
predicate: (((cint >= -75) and (UDFToShort(ctinyint) = csmallint) and (cdouble >= -3728.0D)) or (cstring1 like '10%') or (cstring2 like '%ss%')) (type: boolean)
Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: ctinyint (type: tinyint), cint (type: int), cfloat (type: float), cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp), cboolean1 (type: boolean)
- outputColumnNames: ctinyint, cint, cfloat, cdouble, cstring1, ctimestamp1, cboolean1
+ expressions: cfloat (type: float), cboolean1 (type: boolean), cdouble (type: double), cstring1 (type: string), ctinyint (type: tinyint), cint (type: int), ctimestamp1 (type: timestamp), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [0, 2, 4, 5, 6, 8, 10]
+ projectedOutputColumnNums: [4, 10, 5, 6, 0, 2, 8, 4, 13, 14, 17, 15, 19]
+ selectExpressions: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 13:double, CastLongToDouble(col 0:tinyint) -> 14:double, DoubleColMultiplyDoubleColumn(col 15:double, col 16:double)(children: CastLongToDouble(col 0:tinyint) -> 15:double, CastLongToDouble(col 0:tinyint) -> 16:double) -> 17:double, CastLongToDouble(col 2:int) -> 15:double, DoubleColMultiplyDoubleColumn(col 16:double, col 18:double)(children: CastLongToDouble(col 2:int) -> 16:double, CastLongToDouble(col 2:int) -> 18:double) -> 19:double
Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: stddev_samp(cfloat), min(cdouble), stddev_samp(ctinyint), var_pop(ctinyint), var_samp(cint), stddev_pop(cint)
+ aggregations: sum(_col8), sum(_col7), count(_col0), min(_col2), sum(_col10), sum(_col9), count(_col4), sum(_col12), sum(_col11), count(_col5)
Group By Vectorization:
- aggregators: VectorUDAFVarDouble(col 4:float) -> struct<count:bigint,sum:double,variance:double> aggregation: stddev_samp, VectorUDAFMinDouble(col 5:double) -> double, VectorUDAFVarLong(col 0:tinyint) -> struct<count:bigint,sum:double,variance:double> aggregation: stddev_samp, VectorUDAFVarLong(col 0:tinyint) -> struct<count:bigint,sum:double,variance:double> aggregation: var_pop, VectorUDAFVarLong(col 2:int) -> struct<count:bigint,sum:double,variance:double> aggregation: var_samp, VectorUDAFVarLong(col 2:int) -> struct<count:bigint,sum:double,variance:double> aggregation: stddev_pop
+ aggregators: VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFMinDouble(col 5:double) -> double, VectorUDAFSumDouble(col 17:double) -> double, VectorUDAFSumDouble(col 14:double) -> double, VectorUDAFCount(col 0:tinyint) -> bigint, VectorUDAFSumDouble(col 19:double) -> double, VectorUDAFSumDouble(col 15:double) -> double, VectorUDAFCount(col 2:int) -> bigint
className: VectorGroupByOperator
groupByMode: HASH
keyExpressions: col 4:float, col 10:boolean, col 5:double, col 6:string, col 0:tinyint, col 2:int, col 8:timestamp
native: false
vectorProcessingMode: HASH
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
- keys: cfloat (type: float), cboolean1 (type: boolean), cdouble (type: double), cstring1 (type: string), ctinyint (type: tinyint), cint (type: int), ctimestamp1 (type: timestamp)
+ projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+ keys: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp)
mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16
Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp)
@@ -122,9 +123,9 @@ STAGE PLANS:
keyColumnNums: [0, 1, 2, 3, 4, 5, 6]
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: [7, 8, 9, 10, 11, 12]
+ valueColumnNums: [7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col7 (type: struct<count:bigint,sum:double,variance:double>), _col8 (type: double), _col9 (type: struct<count:bigint,sum:double,variance:double>), _col10 (type: struct<count:bigint,sum:double,variance:double>), _col11 (type: struct<count:bigint,sum:double,variance:double>), _col12 (type: struct<count:bigint,sum:double,variance:double>)
+ value expressions: _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: bigint), _col14 (type: double), _col15 (type: double), _col16 (type: bigint)
Execution mode: vectorized
Map Vectorization:
enabled: true
@@ -140,7 +141,7 @@ STAGE PLANS:
includeColumns: [0, 1, 2, 4, 5, 6, 7, 8, 10]
dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
partitionColumnCount: 0
- scratchColumnTypeNames: []
+ scratchColumnTypeNames: [double, double, double, double, double, double, double]
Reducer 2
Reduce Vectorization:
enabled: false
@@ -148,13 +149,13 @@ STAGE PLANS:
enableConditionsNotMet: hive.vectorized.execution.reduce.enabled IS false
Reduce Operator Tree:
Group By Operator
- aggregations: stddev_samp(VALUE._col0), min(VALUE._col1), stddev_samp(VALUE._col2), var_pop(VALUE._col3), var_samp(VALUE._col4), stddev_pop(VALUE._col5)
+ aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2), min(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), count(VALUE._col6), sum(VALUE._col7), sum(VALUE._col8), count(VALUE._col9)
keys: KEY._col0 (type: float), KEY._col1 (type: boolean), KEY._col2 (type: double), KEY._col3 (type: string), KEY._col4 (type: tinyint), KEY._col5 (type: int), KEY._col6 (type: timestamp)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16
Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp), _col7 (type: double), (-26.28 - CAST( _col5 AS decimal(10,0))) (type: decimal(13,2)), _col8 (type: double), (_col2 * 79.553D) (type: double), (33.0 % _col0) (type: float), _col9 (type: double), _col10 (type: double), (-23.0D % _col2) (type: double), (- _col4) (type: tinyint), _col11 (type: double), (UDFToFloat(_col5) - _col0) (type: float), (-23 % UDFToInteger(_col4)) (type: int), (- (-26.28 - CAST( _col5 AS decimal(10,0)))) (type: decimal(13,2)), _col12 (type: double)
+ expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp), power(((_col7 - ((_col8 * _col8) / _col9)) / CASE WHEN ((_col9 = 1L)) THEN (null) ELSE ((_col9 - 1)) END), 0.5) (type: double), (-26.28 - CAST( _col5 AS decimal(10,0))) (type: decimal(13,2)), _col10 (type: double), (_col2 * 79.553D) (type: double), (33.0 % _col0) (type: float), power(((_col11 - ((_col12 * _col12) / _col13)) / CASE WHEN ((_col13 = 1L)) THEN (null) ELSE ((_col13 - 1)) END), 0.5) (type: double), ((_col11 - ((_col12 * _col12) / _col13)) / _col13) (type: double), (-23.0D % _col2) (type: double), (- _col4) (type: tinyint), ((_col14 - ((_col15 * _col15) / _col16)) / CASE WHEN ((_col16 = 1L)) THEN (null) ELSE ((_col16 - 1)) END) (type: double), (UDFToFloat(_col5) - _col0) (type: float), (-23 % UDFToInteger(_col4)) (type: int), (- (-26.28 - CAST( _col5 AS decimal(10,0)))) (type: decimal(13,2)
), power(((_col14 - ((_col15 * _col15) / _col16)) / _col16), 0.5) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20
Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/5cb8867b/ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out
index a35c9c5..303702c 100644
--- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out
+++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out
@@ -69,39 +69,40 @@ STAGE PLANS:
predicate: (((cdouble >= -1.389D) or (cstring1 < 'a')) and (cstring2 like '%b%')) (type: boolean)
Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp)
- outputColumnNames: cdouble, cstring1, ctimestamp1
+ expressions: cstring1 (type: string), cdouble (type: double), ctimestamp1 (type: timestamp), (cdouble * cdouble) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [5, 6, 8]
+ projectedOutputColumnNums: [6, 5, 8, 13]
+ selectExpressions: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 13:double
Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(cdouble), stddev_samp(cdouble), min(cdouble)
+ aggregations: count(_col1), sum(_col3), sum(_col1), min(_col1)
Group By Vectorization:
- aggregators: VectorUDAFCount(col 5:double) -> bigint, VectorUDAFVarDouble(col 5:double) -> struct<count:bigint,sum:double,variance:double> aggregation: stddev_samp, VectorUDAFMinDouble(col 5:double) -> double
+ aggregators: VectorUDAFCount(col 5:double) -> bigint, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFMinDouble(col 5:double) -> double
className: VectorGroupByOperator
groupByMode: HASH
- keyExpressions: col 5:double, col 6:string, col 8:timestamp
+ keyExpressions: col 6:string, col 5:double, col 8:timestamp
native: false
vectorProcessingMode: HASH
- projectedOutputColumnNums: [0, 1, 2]
- keys: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp)
+ projectedOutputColumnNums: [0, 1, 2, 3]
+ keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp)
mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp)
+ key expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp)
sort order: +++
- Map-reduce partition columns: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp)
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp)
Reduce Sink Vectorization:
className: VectorReduceSinkMultiKeyOperator
keyColumnNums: [0, 1, 2]
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: [3, 4, 5]
+ valueColumnNums: [3, 4, 5, 6]
Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col3 (type: bigint), _col4 (type: struct<count:bigint,sum:double,variance:double>), _col5 (type: double)
+ value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: double)
Execution mode: vectorized
Map Vectorization:
enabled: true
@@ -117,26 +118,51 @@ STAGE PLANS:
includeColumns: [5, 6, 7, 8]
dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
partitionColumnCount: 0
- scratchColumnTypeNames: []
+ scratchColumnTypeNames: [double]
Reducer 2
+ Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator
- vectorized: false
+ reduceColumnNullOrder: aaa
+ reduceColumnSortOrder: +++
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 7
+ dataColumns: KEY._col0:string, KEY._col1:double, KEY._col2:timestamp, VALUE._col0:bigint, VALUE._col1:double, VALUE._col2:double, VALUE._col3:double
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
Reduce Operator Tree:
Group By Operator
- aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), min(VALUE._col2)
- keys: KEY._col0 (type: double), KEY._col1 (type: string), KEY._col2 (type: timestamp)
+ aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), min(VALUE._col3)
+ Group By Vectorization:
+ aggregators: VectorUDAFCountMerge(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFMinDouble(col 6:double) -> double
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ keyExpressions: col 0:string, col 1:double, col 2:timestamp
+ native: false
+ vectorProcessingMode: MERGE_PARTIAL
+ projectedOutputColumnNums: [0, 1, 2, 3]
+ keys: KEY._col0 (type: string), KEY._col1 (type: double), KEY._col2 (type: timestamp)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 2048 Data size: 24576 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col1 (type: string), _col0 (type: double), _col2 (type: timestamp), (_col0 - 9763215.5639D) (type: double), (- (_col0 - 9763215.5639D)) (type: double), _col3 (type: bigint), _col4 (type: double), (- _col4) (type: double), (_col4 * UDFToDouble(_col3)) (type: double), _col5 (type: double), (9763215.5639D / _col0) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), _col4 (type: double)
+ expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp), (_col1 - 9763215.5639D) (type: double), (- (_col1 - 9763215.5639D)) (type: double), _col3 (type: bigint), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double), (- power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5)) (type: double), (power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) * UDFToDouble(_col3)) (type: double), _col6 (type: double), (9763215.5639D / _col1) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 2, 7, 9, 3, 8, 14, 20, 6, 10, 22, 17]
+ selectExpressions: DoubleColSubtractDoubleScalar(col 1:double, val 9763215.5639) -> 7:double, DoubleColUnaryMinus(col 8:double)(children: DoubleColSubtractDoubleScalar(col 1:double, val 9763215.5639) -> 8:double) -> 9:double, FuncPowerDoubleToDouble(col 10:double)(children: DoubleColDivideLongColumn(col 8:double, col 13:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 10:double)(children: DoubleColDivideLongColumn(col 8:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 8:double) -> 10:double) -> 8:double, IfExprNullCondExpr(col 11:boolean, null, col 12:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 11:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 12:bigint) -> 13:bigint) -> 10:double) -> 8:double, DoubleColUnaryMinus(col 10:double)(children: FuncPowerDoubleToDouble(col 14:double)(children: DoubleColDivideLongColumn(col 10:double, col 16:bigint)(children: DoubleColSubtract
DoubleColumn(col 4:double, col 14:double)(children: DoubleColDivideLongColumn(col 10:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 10:double) -> 14:double) -> 10:double, IfExprNullCondExpr(col 13:boolean, null, col 15:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 13:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 15:bigint) -> 16:bigint) -> 14:double) -> 10:double) -> 14:double, DoubleColMultiplyDoubleColumn(col 10:double, col 17:double)(children: FuncPowerDoubleToDouble(col 17:double)(children: DoubleColDivideLongColumn(col 10:double, col 19:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 17:double)(children: DoubleColDivideLongColumn(col 10:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 10:double) -> 17:double) -> 10:double, IfExprNullCondExpr(col 16:boolean, null, col 18:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 16:boo
lean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 18:bigint) -> 19:bigint) -> 17:double) -> 10:double, CastLongToDouble(col 3:bigint) -> 17:double) -> 20:double, DoubleScalarDivideDoubleColumn(val 9763215.5639, col 1:double) -> 10:double, DecimalColDivideDecimalScalar(col 21:decimal(19,0), val -1.389)(children: CastLongToDecimal(col 3:bigint) -> 21:decimal(19,0)) -> 22:decimal(28,6), FuncPowerDoubleToDouble(col 23:double)(children: DoubleColDivideLongColumn(col 17:double, col 25:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 23:double)(children: DoubleColDivideLongColumn(col 17:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 17:double) -> 23:double) -> 17:double, IfExprNullCondExpr(col 19:boolean, null, col 24:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 19:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 24:bigint) -> 25:bigint) -> 23:double) -> 17:double
Statistics: Num rows: 2048 Data size: 24576 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 2048 Data size: 24576 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat