You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2018/03/22 16:57:26 UTC
[03/34] hive git commit: HIVE-18979: Enable
AggregateReduceFunctionsRule from Calcite (Jesus Camacho Rodriguez,
reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/5cb8867b/ql/src/test/results/clientpositive/vector_aggregate_9.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_aggregate_9.q.out b/ql/src/test/results/clientpositive/vector_aggregate_9.q.out
index 6cf99bb..0f7fcc1 100644
--- a/ql/src/test/results/clientpositive/vector_aggregate_9.q.out
+++ b/ql/src/test/results/clientpositive/vector_aggregate_9.q.out
@@ -134,9 +134,9 @@ STAGE PLANS:
projectedOutputColumnNums: [6]
Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: min(dc), max(dc), sum(dc), avg(dc)
+ aggregations: min(dc), max(dc), sum(dc), count(dc)
Group By Vectorization:
- aggregators: VectorUDAFMinDecimal(col 6:decimal(38,18)) -> decimal(38,18), VectorUDAFMaxDecimal(col 6:decimal(38,18)) -> decimal(38,18), VectorUDAFSumDecimal(col 6:decimal(38,18)) -> decimal(38,18), VectorUDAFAvgDecimal(col 6:decimal(38,18)) -> struct<count:bigint,sum:decimal(38,18),input:decimal(38,18)>
+ aggregators: VectorUDAFMinDecimal(col 6:decimal(38,18)) -> decimal(38,18), VectorUDAFMaxDecimal(col 6:decimal(38,18)) -> decimal(38,18), VectorUDAFSumDecimal(col 6:decimal(38,18)) -> decimal(38,18), VectorUDAFCount(col 6:decimal(38,18)) -> bigint
className: VectorGroupByOperator
groupByMode: HASH
native: false
@@ -144,7 +144,7 @@ STAGE PLANS:
projectedOutputColumnNums: [0, 1, 2, 3]
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
Reduce Sink Vectorization:
@@ -152,8 +152,8 @@ STAGE PLANS:
native: false
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: decimal(38,18)), _col1 (type: decimal(38,18)), _col2 (type: decimal(38,18)), _col3 (type: struct<count:bigint,sum:decimal(38,18),input:decimal(38,18)>)
+ Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: decimal(38,18)), _col1 (type: decimal(38,18)), _col2 (type: decimal(38,18)), _col3 (type: bigint)
Execution mode: vectorized
Map Vectorization:
enabled: true
@@ -176,17 +176,21 @@ STAGE PLANS:
enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
Reduce Operator Tree:
Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), sum(VALUE._col2), avg(VALUE._col3)
+ aggregations: min(VALUE._col0), max(VALUE._col1), sum(VALUE._col2), count(VALUE._col3)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: decimal(38,18)), _col1 (type: decimal(38,18)), _col2 (type: decimal(38,18)), (_col2 / _col3) (type: decimal(38,18))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -236,9 +240,9 @@ STAGE PLANS:
projectedOutputColumnNums: [5]
Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: min(d), max(d), sum(d), avg(d)
+ aggregations: min(d), max(d), sum(d), count(d)
Group By Vectorization:
- aggregators: VectorUDAFMinDouble(col 5:double) -> double, VectorUDAFMaxDouble(col 5:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFAvgDouble(col 5:double) -> struct<count:bigint,sum:double,input:double>
+ aggregators: VectorUDAFMinDouble(col 5:double) -> double, VectorUDAFMaxDouble(col 5:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCount(col 5:double) -> bigint
className: VectorGroupByOperator
groupByMode: HASH
native: false
@@ -246,7 +250,7 @@ STAGE PLANS:
projectedOutputColumnNums: [0, 1, 2, 3]
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
Reduce Sink Vectorization:
@@ -254,8 +258,8 @@ STAGE PLANS:
native: false
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: struct<count:bigint,sum:double,input:double>)
+ Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: bigint)
Execution mode: vectorized
Map Vectorization:
enabled: true
@@ -278,17 +282,21 @@ STAGE PLANS:
enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
Reduce Operator Tree:
Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), sum(VALUE._col2), avg(VALUE._col3)
+ aggregations: min(VALUE._col0), max(VALUE._col1), sum(VALUE._col2), count(VALUE._col3)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), (_col2 / _col3) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -338,9 +346,9 @@ STAGE PLANS:
projectedOutputColumnNums: [10]
Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: min(ts), max(ts), sum(ts), avg(ts)
+ aggregations: min(ts), max(ts), sum(ts), count(ts)
Group By Vectorization:
- aggregators: VectorUDAFMinTimestamp(col 10:timestamp) -> timestamp, VectorUDAFMaxTimestamp(col 10:timestamp) -> timestamp, VectorUDAFSumTimestamp(col 10:timestamp) -> double, VectorUDAFAvgTimestamp(col 10:timestamp) -> struct<count:bigint,sum:double,input:timestamp>
+ aggregators: VectorUDAFMinTimestamp(col 10:timestamp) -> timestamp, VectorUDAFMaxTimestamp(col 10:timestamp) -> timestamp, VectorUDAFSumTimestamp(col 10:timestamp) -> double, VectorUDAFCount(col 10:timestamp) -> bigint
className: VectorGroupByOperator
groupByMode: HASH
native: false
@@ -348,7 +356,7 @@ STAGE PLANS:
projectedOutputColumnNums: [0, 1, 2, 3]
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
Reduce Sink Vectorization:
@@ -356,8 +364,8 @@ STAGE PLANS:
native: false
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: double), _col3 (type: struct<count:bigint,sum:double,input:timestamp>)
+ Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: double), _col3 (type: bigint)
Execution mode: vectorized
Map Vectorization:
enabled: true
@@ -380,17 +388,21 @@ STAGE PLANS:
enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
Reduce Operator Tree:
Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), sum(VALUE._col2), avg(VALUE._col3)
+ aggregations: min(VALUE._col0), max(VALUE._col1), sum(VALUE._col2), count(VALUE._col3)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: double), (_col2 / _col3) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/5cb8867b/ql/src/test/results/clientpositive/vector_cast_constant.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_cast_constant.q.out b/ql/src/test/results/clientpositive/vector_cast_constant.q.out
index 4b06016..3d3d761 100644
--- a/ql/src/test/results/clientpositive/vector_cast_constant.q.out
+++ b/ql/src/test/results/clientpositive/vector_cast_constant.q.out
@@ -136,18 +136,18 @@ STAGE PLANS:
projectedOutputColumnNums: [2]
Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: avg(50), avg(50.0D), avg(50)
+ aggregations: sum(50), count(), sum(50.0D), count(50.0D), sum(50), count(50)
Group By Vectorization:
- aggregators: VectorUDAFAvgLong(ConstantVectorExpression(val 50) -> 12:int) -> struct<count:bigint,sum:double,input:int>, VectorUDAFAvgDouble(ConstantVectorExpression(val 50.0) -> 13:double) -> struct<count:bigint,sum:double,input:double>, VectorUDAFAvgDecimal(ConstantVectorExpression(val 50) -> 14:decimal(10,0)) -> struct<count:bigint,sum:decimal(20,0),input:decimal(10,0)>
+ aggregators: VectorUDAFSumLong(ConstantVectorExpression(val 50) -> 12:int) -> bigint, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(ConstantVectorExpression(val 50.0) -> 13:double) -> double, VectorUDAFCount(ConstantVectorExpression(val 50.0) -> 14:double) -> bigint, VectorUDAFSumDecimal(ConstantVectorExpression(val 50) -> 15:decimal(10,0)) -> decimal(20,0), VectorUDAFCount(ConstantVectorExpression(val 50) -> 16:decimal(10,0)) -> bigint
className: VectorGroupByOperator
groupByMode: HASH
keyExpressions: col 2:int
native: false
vectorProcessingMode: HASH
- projectedOutputColumnNums: [0, 1, 2]
+ projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
keys: _col0 (type: int)
mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
@@ -160,7 +160,7 @@ STAGE PLANS:
nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.1
- value expressions: _col1 (type: struct<count:bigint,sum:double,input:int>), _col2 (type: struct<count:bigint,sum:double,input:double>), _col3 (type: struct<count:bigint,sum:decimal(12,0),input:decimal(10,0)>)
+ value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: double), _col4 (type: bigint), _col5 (type: decimal(12,0)), _col6 (type: bigint)
Execution mode: vectorized
Map Vectorization:
enabled: true
@@ -177,17 +177,21 @@ STAGE PLANS:
enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2)
+ aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), sum(VALUE._col4), count(VALUE._col5)
keys: KEY._col0 (type: int)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Select Operator
+ expressions: _col0 (type: int), (_col1 / _col2) (type: double), (_col3 / _col4) (type: double), CAST( (_col5 / _col6) AS decimal(6,4)) (type: decimal(6,4))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-2
Map Reduce
@@ -205,7 +209,7 @@ STAGE PLANS:
nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.1
- value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4))
+ value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(6,4))
Execution mode: vectorized
Map Vectorization:
enabled: true
@@ -222,7 +226,7 @@ STAGE PLANS:
enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
Reduce Operator Tree:
Select Operator
- expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: decimal(14,4))
+ expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: decimal(6,4))
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE
Limit
http://git-wip-us.apache.org/repos/asf/hive/blob/5cb8867b/ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out b/ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out
index c17951a..16c80f0 100644
--- a/ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out
+++ b/ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out
@@ -208,26 +208,27 @@ STAGE PLANS:
native: true
vectorizationSchemaColumns: [0:cdouble:double, 1:cdecimal1:decimal(20,10), 2:cdecimal2:decimal(23,14), 3:cint:int, 4:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
Select Operator
- expressions: cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)), cint (type: int)
- outputColumnNames: cdecimal1, cdecimal2, cint
+ expressions: cint (type: int), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)), UDFToDouble(cdecimal1) (type: double), (UDFToDouble(cdecimal1) * UDFToDouble(cdecimal1)) (type: double), UDFToDouble(cdecimal2) (type: double), (UDFToDouble(cdecimal2) * UDFToDouble(cdecimal2)) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [1, 2, 3]
+ projectedOutputColumnNums: [3, 1, 2, 5, 8, 6, 10]
+ selectExpressions: CastDecimalToDouble(col 1:decimal(20,10)) -> 5:double, DoubleColMultiplyDoubleColumn(col 6:double, col 7:double)(children: CastDecimalToDouble(col 1:decimal(20,10)) -> 6:double, CastDecimalToDouble(col 1:decimal(20,10)) -> 7:double) -> 8:double, CastDecimalToDouble(col 2:decimal(23,14)) -> 6:double, DoubleColMultiplyDoubleColumn(col 7:double, col 9:double)(children: CastDecimalToDouble(col 2:decimal(23,14)) -> 7:double, CastDecimalToDouble(col 2:decimal(23,14)) -> 9:double) -> 10:double
Statistics: Num rows: 12289 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), avg(cdecimal1), stddev_pop(cdecimal1), stddev_samp(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), avg(cdecimal2), stddev_pop(cdecimal2), stddev_samp(cdecimal2), count()
+ aggregations: count(_col1), max(_col1), min(_col1), sum(_col1), sum(_col4), sum(_col3), count(_col2), max(_col2), min(_col2), sum(_col2), sum(_col6), sum(_col5), count()
Group By Vectorization:
- aggregators: VectorUDAFCount(col 1:decimal(20,10)) -> bigint, VectorUDAFMaxDecimal(col 1:decimal(20,10)) -> decimal(20,10), VectorUDAFMinDecimal(col 1:decimal(20,10)) -> decimal(20,10), VectorUDAFSumDecimal(col 1:decimal(20,10)) -> decimal(30,10), VectorUDAFAvgDecimal(col 1:decimal(20,10)) -> struct<count:bigint,sum:decimal(30,10),input:decimal(20,10)>, VectorUDAFVarDecimal(col 1:decimal(20,10)) -> struct<count:bigint,sum:double,variance:double> aggregation: stddev_pop, VectorUDAFVarDecimal(col 1:decimal(20,10)) -> struct<count:bigint,sum:double,variance:double> aggregation: stddev_samp, VectorUDAFCount(col 2:decimal(23,14)) -> bigint, VectorUDAFMaxDecimal(col 2:decimal(23,14)) -> decimal(23,14), VectorUDAFMinDecimal(col 2:decimal(23,14)) -> decimal(23,14), VectorUDAFSumDecimal(col 2:decimal(23,14)) -> decimal(33,14), VectorUDAFAvgDecimal(col 2:decimal(23,14)) -> struct<count:bigint,sum:decimal(33,14),input:decimal(23,14)>, VectorUDAFVarDecimal(col 2:decimal(23,1
4)) -> struct<count:bigint,sum:double,variance:double> aggregation: stddev_pop, VectorUDAFVarDecimal(col 2:decimal(23,14)) -> struct<count:bigint,sum:double,variance:double> aggregation: stddev_samp, VectorUDAFCountStar(*) -> bigint
+ aggregators: VectorUDAFCount(col 1:decimal(20,10)) -> bigint, VectorUDAFMaxDecimal(col 1:decimal(20,10)) -> decimal(20,10), VectorUDAFMinDecimal(col 1:decimal(20,10)) -> decimal(20,10), VectorUDAFSumDecimal(col 1:decimal(20,10)) -> decimal(30,10), VectorUDAFSumDouble(col 8:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCount(col 2:decimal(23,14)) -> bigint, VectorUDAFMaxDecimal(col 2:decimal(23,14)) -> decimal(23,14), VectorUDAFMinDecimal(col 2:decimal(23,14)) -> decimal(23,14), VectorUDAFSumDecimal(col 2:decimal(23,14)) -> decimal(33,14), VectorUDAFSumDouble(col 10:double) -> double, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFCountStar(*) -> bigint
className: VectorGroupByOperator
groupByMode: HASH
keyExpressions: col 3:int
native: false
vectorProcessingMode: HASH
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
- keys: cint (type: int)
+ projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
+ keys: _col0 (type: int)
mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
Statistics: Num rows: 12289 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
@@ -239,7 +240,7 @@ STAGE PLANS:
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
Statistics: Num rows: 12289 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: struct<count:bigint,sum:decimal(30,10),input:decimal(20,10)>), _col6 (type: struct<count:bigint,sum:double,variance:double>), _col7 (type: struct<count:bigint,sum:double,variance:double>), _col8 (type: bigint), _col9 (type: decimal(23,14)), _col10 (type: decimal(23,14)), _col11 (type: decimal(33,14)), _col12 (type: struct<count:bigint,sum:decimal(33,14),input:decimal(23,14)>), _col13 (type: struct<count:bigint,sum:double,variance:double>), _col14 (type: struct<count:bigint,sum:double,variance:double>), _col15 (type: bigint)
+ value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: decimal(23,14)), _col9 (type: decimal(23,14)), _col10 (type: decimal(33,14)), _col11 (type: double), _col12 (type: double), _col13 (type: bigint)
Execution mode: vectorized
Map Vectorization:
enabled: true
@@ -255,23 +256,23 @@ STAGE PLANS:
includeColumns: [1, 2, 3]
dataColumns: cdouble:double, cdecimal1:decimal(20,10), cdecimal2:decimal(23,14), cint:int
partitionColumnCount: 0
- scratchColumnTypeNames: []
+ scratchColumnTypeNames: [double, double, double, double, double, double]
Reduce Vectorization:
enabled: false
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
Reduce Operator Tree:
Group By Operator
- aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), stddev_pop(VALUE._col5), stddev_samp(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9), sum(VALUE._col10), avg(VALUE._col11), stddev_pop(VALUE._col12), stddev_samp(VALUE._col13), count(VALUE._col14)
+ aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), count(VALUE._col6), max(VALUE._col7), min(VALUE._col8), sum(VALUE._col9), sum(VALUE._col10), sum(VALUE._col11), count(VALUE._col12)
keys: KEY._col0 (type: int)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
Statistics: Num rows: 6144 Data size: 1082441 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (_col15 > 1L) (type: boolean)
+ predicate: (_col13 > 1L) (type: boolean)
Statistics: Num rows: 2048 Data size: 360813 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: decimal(24,14)), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: decimal(23,14)), _col10 (type: decimal(23,14)), _col11 (type: decimal(33,14)), _col12 (type: decimal(27,18)), _col13 (type: double), _col14 (type: double)
+ expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), (CAST( _col4 AS decimal(24,14)) / _col1) (type: decimal(38,28)), power(((_col5 - ((_col6 * _col6) / _col1)) / _col1), 0.5) (type: double), power(((_col5 - ((_col6 * _col6) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5) (type: double), _col7 (type: bigint), _col8 (type: decimal(23,14)), _col9 (type: decimal(23,14)), _col10 (type: decimal(33,14)), (CAST( _col10 AS decimal(27,18)) / _col7) (type: decimal(38,29)), power(((_col11 - ((_col12 * _col12) / _col7)) / _col7), 0.5) (type: double), power(((_col11 - ((_col12 * _col12) / _col7)) / CASE WHEN ((_col7 = 1L)) THEN (null) ELSE ((_col7 - 1)) END), 0.5) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
Statistics: Num rows: 2048 Data size: 360813 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -306,14 +307,14 @@ POSTHOOK: query: SELECT cint,
POSTHOOK: type: QUERY
POSTHOOK: Input: default@decimal_vgby
#### A masked pattern was here ####
--3728 6 5831542.2692483780 -3367.6517567568 5817556.0411483778 969592.67352472963333 2174330.2092403853 2381859.406131774 6 6984454.21109769200000 -4033.44576923076900 6967702.86724384584710 1161283.811207307641183333 2604201.2704476737 2852759.5602156054
--563 2 -515.6210729730 -3367.6517567568 -3883.2728297298 -1941.63641486490000 1426.0153418918999 2016.6902366556308 2 -617.56077692307690 -4033.44576923076900 -4651.00654615384590 -2325.503273076922950000 1707.9424961538462 2415.395441814127
-253665376 1024 9767.0054054054 -9779.5486486487 -347484.0818378374 -339.33992366976309 5708.9563478862 5711.745967572779 1024 11697.96923076923100 -11712.99230769231000 -416182.64030769233089 -406.428359675480791885 6837.632716002934 6840.973851172274
-528534767 1024 5831542.2692483780 -9777.1594594595 11646372.8607481068 11373.41099682432305 257528.92988206653 257654.7686043977 1024 6984454.21109769200000 -11710.13076923077100 13948892.79980307629003 13621.965624807691689482 308443.1074570801 308593.82484083984
-626923679 1024 9723.4027027027 -9778.9513513514 10541.0525297287 10.29399661106318 5742.09145323734 5744.897264034267 1024 11645.74615384615400 -11712.27692307692300 12625.04759999997746 12.329148046874977988 6877.318722794877 6880.679250101603
-6981 3 5831542.2692483780 -515.6210729730 5830511.0271024320 1943503.67570081066667 2749258.455012492 3367140.1929065133 3 6984454.21109769200000 -617.56077692307690 6983219.08954384584620 2327739.696514615282066667 3292794.4113115156 4032833.0678006653
-762 2 5831542.2692483780 1531.2194054054 5833073.4886537834 2916536.74432689170000 2915005.5249214866 4122440.3477364695 2 6984454.21109769200000 1833.94569230769250 6986288.15678999969250 3493144.078394999846250000 3491310.1327026924 4937458.140118758
-NULL 3072 9318.4351351351 -4298.1513513514 5018444.1081079808 1633.60810810806667 5695.483082135364 5696.4103077145055 3072 11160.71538461538500 -5147.90769230769300 6010604.30769230735360 1956.576923076922966667 6821.495748565159 6822.606289190924
+-3728 6 5831542.2692483780 -3367.6517567568 5817556.0411483778 969592.6735247296333333333333333333 2174330.209240386 2381859.406131774 6 6984454.21109769200000 -4033.44576923076900 6967702.86724384584710 1161283.81120730764118333333333333333 2604201.2704476737 2852759.5602156054
+-563 2 -515.6210729730 -3367.6517567568 -3883.2728297298 -1941.6364148649000000000000000000 1426.0153418918997 2016.6902366556305 2 -617.56077692307690 -4033.44576923076900 -4651.00654615384590 -2325.50327307692295000000000000000 1707.9424961538462 2415.395441814127
+253665376 1024 9767.0054054054 -9779.5486486487 -347484.0818378374 -339.3399236697630859375000000000 5708.956347886203 5711.745967572781 1024 11697.96923076923100 -11712.99230769231000 -416182.64030769233089 -406.42835967548079188476562500000 6837.632716002931 6840.973851172272
+528534767 1024 5831542.2692483780 -9777.1594594595 11646372.8607481068 11373.4109968243230468750000000000 257528.9298820665 257654.7686043977 1024 6984454.21109769200000 -11710.13076923077100 13948892.79980307629003 13621.96562480769168948242187500000 308443.1074570797 308593.82484083937
+626923679 1024 9723.4027027027 -9778.9513513514 10541.0525297287 10.2939966110631835937500000000 5742.091453237337 5744.897264034264 1024 11645.74615384615400 -11712.27692307692300 12625.04759999997746 12.32914804687497798828125000000 6877.318722794881 6880.679250101608
+6981 3 5831542.2692483780 -515.6210729730 5830511.0271024320 1943503.6757008106666666666666666667 2749258.455012492 3367140.1929065133 3 6984454.21109769200000 -617.56077692307690 6983219.08954384584620 2327739.69651461528206666666666666667 3292794.4113115156 4032833.0678006653
+762 2 5831542.2692483780 1531.2194054054 5833073.4886537834 2916536.7443268917000000000000000000 2915005.5249214866 4122440.3477364695 2 6984454.21109769200000 1833.94569230769250 6986288.15678999969250 3493144.07839499984625000000000000000 3491310.1327026924 4937458.140118757
+NULL 3072 9318.4351351351 -4298.1513513514 5018444.1081079808 1633.6081081080666666666666666667 5695.483082135323 5696.410307714464 3072 11160.71538461538500 -5147.90769230769300 6010604.30769230735360 1956.57692307692296666666666666667 6821.495748565151 6822.606289190915
PREHOOK: query: CREATE TABLE decimal_vgby_small STORED AS TEXTFILE AS
SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(11,5)) AS cdecimal1,
CAST (((cdouble*9.3)/13) AS DECIMAL(16,0)) AS cdecimal2,
@@ -543,26 +544,27 @@ STAGE PLANS:
native: true
vectorizationSchemaColumns: [0:cdouble:double, 1:cdecimal1:decimal(11,5)/DECIMAL_64, 2:cdecimal2:decimal(16,0)/DECIMAL_64, 3:cint:int, 4:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
Select Operator
- expressions: cdecimal1 (type: decimal(11,5)), cdecimal2 (type: decimal(16,0)), cint (type: int)
- outputColumnNames: cdecimal1, cdecimal2, cint
+ expressions: cint (type: int), cdecimal1 (type: decimal(11,5)), cdecimal2 (type: decimal(16,0)), UDFToDouble(cdecimal1) (type: double), (UDFToDouble(cdecimal1) * UDFToDouble(cdecimal1)) (type: double), UDFToDouble(cdecimal2) (type: double), (UDFToDouble(cdecimal2) * UDFToDouble(cdecimal2)) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [1, 2, 3]
+ projectedOutputColumnNums: [3, 1, 2, 6, 9, 7, 12]
+ selectExpressions: CastDecimalToDouble(col 5:decimal(11,5))(children: ConvertDecimal64ToDecimal(col 1:decimal(11,5)/DECIMAL_64) -> 5:decimal(11,5)) -> 6:double, DoubleColMultiplyDoubleColumn(col 7:double, col 8:double)(children: CastDecimalToDouble(col 5:decimal(11,5))(children: ConvertDecimal64ToDecimal(col 1:decimal(11,5)/DECIMAL_64) -> 5:decimal(11,5)) -> 7:double, CastDecimalToDouble(col 5:decimal(11,5))(children: ConvertDecimal64ToDecimal(col 1:decimal(11,5)/DECIMAL_64) -> 5:decimal(11,5)) -> 8:double) -> 9:double, CastDecimalToDouble(col 10:decimal(16,0))(children: ConvertDecimal64ToDecimal(col 2:decimal(16,0)/DECIMAL_64) -> 10:decimal(16,0)) -> 7:double, DoubleColMultiplyDoubleColumn(col 8:double, col 11:double)(children: CastDecimalToDouble(col 10:decimal(16,0))(children: ConvertDecimal64ToDecimal(col 2:decimal(16,0)/DECIMAL_64) -> 10:decimal(16,0)) -> 8:double, CastDecimalToDouble(col 10:decimal(16,0))(children: ConvertDecimal64ToDecimal(col 2:decimal(16,0
)/DECIMAL_64) -> 10:decimal(16,0)) -> 11:double) -> 12:double
Statistics: Num rows: 12289 Data size: 346472 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), avg(cdecimal1), stddev_pop(cdecimal1), stddev_samp(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), avg(cdecimal2), stddev_pop(cdecimal2), stddev_samp(cdecimal2), count()
+ aggregations: count(_col1), max(_col1), min(_col1), sum(_col1), sum(_col4), sum(_col3), count(_col2), max(_col2), min(_col2), sum(_col2), sum(_col6), sum(_col5), count()
Group By Vectorization:
- aggregators: VectorUDAFCount(col 1:decimal(11,5)/DECIMAL_64) -> bigint, VectorUDAFMaxDecimal64(col 1:decimal(11,5)/DECIMAL_64) -> decimal(11,5)/DECIMAL_64, VectorUDAFMinDecimal64(col 1:decimal(11,5)/DECIMAL_64) -> decimal(11,5)/DECIMAL_64, VectorUDAFSumDecimal64ToDecimal(col 1:decimal(11,5)/DECIMAL_64) -> decimal(21,5), VectorUDAFAvgDecimal64ToDecimal(col 1:decimal(11,5)/DECIMAL_64) -> struct<count:bigint,sum:decimal(21,5),input:decimal(11,5)>, VectorUDAFVarDecimal(ConvertDecimal64ToDecimal(col 1:decimal(11,5)/DECIMAL_64) -> 5:decimal(11,5)) -> struct<count:bigint,sum:double,variance:double> aggregation: stddev_pop, VectorUDAFVarDecimal(ConvertDecimal64ToDecimal(col 1:decimal(11,5)/DECIMAL_64) -> 6:decimal(11,5)) -> struct<count:bigint,sum:double,variance:double> aggregation: stddev_samp, VectorUDAFCount(col 2:decimal(16,0)/DECIMAL_64) -> bigint, VectorUDAFMaxDecimal64(col 2:decimal(16,0)/DECIMAL_64) -> decimal(16,0)/DECIMAL_64, VectorUDAFMinDecimal64(col 2:decim
al(16,0)/DECIMAL_64) -> decimal(16,0)/DECIMAL_64, VectorUDAFSumDecimal64ToDecimal(col 2:decimal(16,0)/DECIMAL_64) -> decimal(26,0), VectorUDAFAvgDecimal64ToDecimal(col 2:decimal(16,0)/DECIMAL_64) -> struct<count:bigint,sum:decimal(26,0),input:decimal(16,0)>, VectorUDAFVarDecimal(ConvertDecimal64ToDecimal(col 2:decimal(16,0)/DECIMAL_64) -> 7:decimal(16,0)) -> struct<count:bigint,sum:double,variance:double> aggregation: stddev_pop, VectorUDAFVarDecimal(ConvertDecimal64ToDecimal(col 2:decimal(16,0)/DECIMAL_64) -> 8:decimal(16,0)) -> struct<count:bigint,sum:double,variance:double> aggregation: stddev_samp, VectorUDAFCountStar(*) -> bigint
+ aggregators: VectorUDAFCount(col 1:decimal(11,5)/DECIMAL_64) -> bigint, VectorUDAFMaxDecimal64(col 1:decimal(11,5)/DECIMAL_64) -> decimal(11,5)/DECIMAL_64, VectorUDAFMinDecimal64(col 1:decimal(11,5)/DECIMAL_64) -> decimal(11,5)/DECIMAL_64, VectorUDAFSumDecimal64ToDecimal(col 1:decimal(11,5)/DECIMAL_64) -> decimal(21,5), VectorUDAFSumDouble(col 9:double) -> double, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFCount(col 2:decimal(16,0)/DECIMAL_64) -> bigint, VectorUDAFMaxDecimal64(col 2:decimal(16,0)/DECIMAL_64) -> decimal(16,0)/DECIMAL_64, VectorUDAFMinDecimal64(col 2:decimal(16,0)/DECIMAL_64) -> decimal(16,0)/DECIMAL_64, VectorUDAFSumDecimal64ToDecimal(col 2:decimal(16,0)/DECIMAL_64) -> decimal(26,0), VectorUDAFSumDouble(col 12:double) -> double, VectorUDAFSumDouble(col 7:double) -> double, VectorUDAFCountStar(*) -> bigint
className: VectorGroupByOperator
groupByMode: HASH
keyExpressions: col 3:int
native: false
vectorProcessingMode: HASH
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
- keys: cint (type: int)
+ projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
+ keys: _col0 (type: int)
mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
Statistics: Num rows: 12289 Data size: 346472 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
@@ -574,7 +576,7 @@ STAGE PLANS:
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
Statistics: Num rows: 12289 Data size: 346472 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint), _col2 (type: decimal(11,5)), _col3 (type: decimal(11,5)), _col4 (type: decimal(21,5)), _col5 (type: struct<count:bigint,sum:decimal(21,5),input:decimal(11,5)>), _col6 (type: struct<count:bigint,sum:double,variance:double>), _col7 (type: struct<count:bigint,sum:double,variance:double>), _col8 (type: bigint), _col9 (type: decimal(16,0)), _col10 (type: decimal(16,0)), _col11 (type: decimal(26,0)), _col12 (type: struct<count:bigint,sum:decimal(26,0),input:decimal(16,0)>), _col13 (type: struct<count:bigint,sum:double,variance:double>), _col14 (type: struct<count:bigint,sum:double,variance:double>), _col15 (type: bigint)
+ value expressions: _col1 (type: bigint), _col2 (type: decimal(11,5)), _col3 (type: decimal(11,5)), _col4 (type: decimal(21,5)), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: decimal(16,0)), _col9 (type: decimal(16,0)), _col10 (type: decimal(26,0)), _col11 (type: double), _col12 (type: double), _col13 (type: bigint)
Execution mode: vectorized
Map Vectorization:
enabled: true
@@ -590,23 +592,23 @@ STAGE PLANS:
includeColumns: [1, 2, 3]
dataColumns: cdouble:double, cdecimal1:decimal(11,5)/DECIMAL_64, cdecimal2:decimal(16,0)/DECIMAL_64, cint:int
partitionColumnCount: 0
- scratchColumnTypeNames: [decimal(11,5), decimal(11,5), decimal(16,0), decimal(16,0)]
+ scratchColumnTypeNames: [decimal(11,5), double, double, double, double, decimal(16,0), double, double]
Reduce Vectorization:
enabled: false
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
Reduce Operator Tree:
Group By Operator
- aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), stddev_pop(VALUE._col5), stddev_samp(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9), sum(VALUE._col10), avg(VALUE._col11), stddev_pop(VALUE._col12), stddev_samp(VALUE._col13), count(VALUE._col14)
+ aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), count(VALUE._col6), max(VALUE._col7), min(VALUE._col8), sum(VALUE._col9), sum(VALUE._col10), sum(VALUE._col11), count(VALUE._col12)
keys: KEY._col0 (type: int)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
Statistics: Num rows: 6144 Data size: 173221 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (_col15 > 1L) (type: boolean)
+ predicate: (_col13 > 1L) (type: boolean)
Statistics: Num rows: 2048 Data size: 57740 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(11,5)), _col3 (type: decimal(11,5)), _col4 (type: decimal(21,5)), _col5 (type: decimal(15,9)), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: decimal(16,0)), _col10 (type: decimal(16,0)), _col11 (type: decimal(26,0)), _col12 (type: decimal(20,4)), _col13 (type: double), _col14 (type: double)
+ expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(11,5)), _col3 (type: decimal(11,5)), _col4 (type: decimal(21,5)), (CAST( _col4 AS decimal(15,9)) / _col1) (type: decimal(35,29)), power(((_col5 - ((_col6 * _col6) / _col1)) / _col1), 0.5) (type: double), power(((_col5 - ((_col6 * _col6) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5) (type: double), _col7 (type: bigint), _col8 (type: decimal(16,0)), _col9 (type: decimal(16,0)), _col10 (type: decimal(26,0)), CAST( (CAST( _col10 AS decimal(20,4)) / _col7) AS decimal(20,4)) (type: decimal(20,4)), power(((_col11 - ((_col12 * _col12) / _col7)) / _col7), 0.5) (type: double), power(((_col11 - ((_col12 * _col12) / _col7)) / CASE WHEN ((_col7 = 1L)) THEN (null) ELSE ((_col7 - 1)) END), 0.5) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
Statistics: Num rows: 2048 Data size: 57740 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -641,14 +643,14 @@ POSTHOOK: query: SELECT cint,
POSTHOOK: type: QUERY
POSTHOOK: Input: default@decimal_vgby_small
#### A masked pattern was here ####
--3728 5 -515.62107 -3367.65176 -13986.22811 -2797.245622000 1140.812276 1275.466899351126 6 6984454 -4033 6967704 1161284.0000 2604201.0914565204 2852759.364140621
--563 2 -515.62107 -3367.65176 -3883.27283 -1941.636415000 1426.0153450000003 2016.6902410511484 2 -618 -4033 -4651 -2325.5000 1707.5 2414.7696577520596
-253665376 1024 9767.00541 -9779.54865 -347484.08192 -339.339923750 5708.956347957812 5711.745967644425 1024 11698 -11713 -416183 -406.4287 6837.6426468206855 6840.983786842613
-528534767 1022 9777.75676 -9777.15946 -16711.67771 -16.351935137 5555.7621107931345 5558.482190324908 1024 6984454 -11710 13948890 13621.9629 308443.09823296947 308593.8156122219
-626923679 1024 9723.40270 -9778.95135 10541.05247 10.293996553 5742.091453325366 5744.897264122336 1024 11646 -11712 12641 12.3447 6877.306686989158 6880.6672084147185
-6981 2 -515.62107 -515.62107 -1031.24214 -515.621070000 0.0 0.0 3 6984454 -618 6983218 2327739.3333 3292794.518850853 4032833.1995089175
-762 1 1531.21941 1531.21941 1531.21941 1531.219410000 0.0 NULL 2 6984454 1834 6986288 3493144.0000 3491310.0 4937457.95244881
-NULL 3072 9318.43514 -4298.15135 5018444.11392 1633.608110000 5695.483083909642 5696.410309489072 3072 11161 -5148 6010880 1956.6667 6821.647911041892 6822.758476439734
+-3728 5 -515.62107 -3367.65176 -13986.22811 -2797.24562200000000000000000000000 1140.8122759999992 1275.466899351125 6 6984454 -4033 6967704 1161284.0000 2604201.0914565204 2852759.364140621
+-563 2 -515.62107 -3367.65176 -3883.27283 -1941.63641500000000000000000000000 1426.0153450000003 2016.6902410511484 2 -618 -4033 -4651 -2325.5000 1707.5 2414.7696577520596
+253665376 1024 9767.00541 -9779.54865 -347484.08192 -339.33992375000000000000000000000 5708.956347957812 5711.745967644425 1024 11698 -11713 -416183 -406.4287 6837.6426468206855 6840.983786842613
+528534767 1022 9777.75676 -9777.15946 -16711.67771 -16.35193513698630136986301369863 5555.762110793133 5558.482190324906 1024 6984454 -11710 13948890 13621.9629 308443.0982329696 308593.815612222
+626923679 1024 9723.40270 -9778.95135 10541.05247 10.29399655273437500000000000000 5742.091453325365 5744.897264122335 1024 11646 -11712 12641 12.3447 6877.306686989158 6880.6672084147185
+6981 2 -515.62107 -515.62107 -1031.24214 -515.62107000000000000000000000000 0.0 0.0 3 6984454 -618 6983218 2327739.3333 3292794.518850853 4032833.1995089175
+762 1 1531.21941 1531.21941 1531.21941 1531.21941000000000000000000000000 0.0 NULL 2 6984454 1834 6986288 3493144.0000 3491310.0 4937457.95244881
+NULL 3072 9318.43514 -4298.15135 5018444.11392 NULL 5695.4830839098695 5696.410309489299 3072 11161 -5148 6010880 1956.6667 6821.647911041892 6822.758476439734
PREHOOK: query: SELECT SUM(HASH(*))
FROM (SELECT cint,
COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1),
@@ -667,4 +669,4 @@ FROM (SELECT cint,
POSTHOOK: type: QUERY
POSTHOOK: Input: default@decimal_vgby_small
#### A masked pattern was here ####
-91757235680
+96673467876
http://git-wip-us.apache.org/repos/asf/hive/blob/5cb8867b/ql/src/test/results/clientpositive/vector_decimal_precision.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_decimal_precision.q.out b/ql/src/test/results/clientpositive/vector_decimal_precision.q.out
index 6e41f53..fd6d9c3 100644
--- a/ql/src/test/results/clientpositive/vector_decimal_precision.q.out
+++ b/ql/src/test/results/clientpositive/vector_decimal_precision.q.out
@@ -580,9 +580,9 @@ STAGE PLANS:
projectedOutputColumnNums: [0]
Statistics: Num rows: 75 Data size: 3472 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: avg(dec), sum(dec)
+ aggregations: sum(dec), count(dec)
Group By Vectorization:
- aggregators: VectorUDAFAvgDecimal(col 0:decimal(20,10)) -> struct<count:bigint,sum:decimal(30,10),input:decimal(20,10)>, VectorUDAFSumDecimal(col 0:decimal(20,10)) -> decimal(30,10)
+ aggregators: VectorUDAFSumDecimal(col 0:decimal(20,10)) -> decimal(30,10), VectorUDAFCount(col 0:decimal(20,10)) -> bigint
className: VectorGroupByOperator
groupByMode: HASH
native: false
@@ -590,7 +590,7 @@ STAGE PLANS:
projectedOutputColumnNums: [0, 1]
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
Reduce Sink Vectorization:
@@ -598,8 +598,8 @@ STAGE PLANS:
native: false
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: struct<count:bigint,sum:decimal(30,10),input:decimal(20,10)>), _col1 (type: decimal(30,10))
+ Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: decimal(30,10)), _col1 (type: bigint)
Execution mode: vectorized
Map Vectorization:
enabled: true
@@ -622,17 +622,21 @@ STAGE PLANS:
enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0), sum(VALUE._col1)
+ aggregations: sum(VALUE._col0), count(VALUE._col1)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: (_col0 / _col1) (type: decimal(38,18)), CAST( _col0 AS decimal(30,10)) (type: decimal(30,10))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -648,7 +652,7 @@ POSTHOOK: query: SELECT avg(`dec`), sum(`dec`) FROM DECIMAL_PRECISION
POSTHOOK: type: QUERY
POSTHOOK: Input: default@decimal_precision
#### A masked pattern was here ####
-88499534.57586576220645 2743485571.8518386284
+88499534.575865762206451613 2743485571.8518386284
PREHOOK: query: SELECT `dec` * cast('12345678901234567890.12345678' as decimal(38,18)) FROM DECIMAL_PRECISION LIMIT 1
PREHOOK: type: QUERY
PREHOOK: Input: default@decimal_precision
@@ -1161,9 +1165,9 @@ STAGE PLANS:
projectedOutputColumnNums: [0]
Statistics: Num rows: 1 Data size: 26610 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: avg(dec), sum(dec)
+ aggregations: sum(dec), count(dec)
Group By Vectorization:
- aggregators: VectorUDAFAvgDecimal(col 0:decimal(20,10)) -> struct<count:bigint,sum:decimal(30,10),input:decimal(20,10)>, VectorUDAFSumDecimal(col 0:decimal(20,10)) -> decimal(30,10)
+ aggregators: VectorUDAFSumDecimal(col 0:decimal(20,10)) -> decimal(30,10), VectorUDAFCount(col 0:decimal(20,10)) -> bigint
className: VectorGroupByOperator
groupByMode: HASH
native: false
@@ -1171,7 +1175,7 @@ STAGE PLANS:
projectedOutputColumnNums: [0, 1]
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
Reduce Sink Vectorization:
@@ -1179,8 +1183,8 @@ STAGE PLANS:
native: false
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: struct<count:bigint,sum:decimal(30,10),input:decimal(20,10)>), _col1 (type: decimal(30,10))
+ Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: decimal(30,10)), _col1 (type: bigint)
Execution mode: vectorized
Map Vectorization:
enabled: true
@@ -1203,17 +1207,21 @@ STAGE PLANS:
enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0), sum(VALUE._col1)
+ aggregations: sum(VALUE._col0), count(VALUE._col1)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: (_col0 / _col1) (type: decimal(38,18)), CAST( _col0 AS decimal(30,10)) (type: decimal(30,10))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -1229,7 +1237,7 @@ POSTHOOK: query: SELECT avg(`dec`), sum(`dec`) FROM DECIMAL_PRECISION_txt_small
POSTHOOK: type: QUERY
POSTHOOK: Input: default@decimal_precision_txt_small
#### A masked pattern was here ####
-88499534.57586576220645 2743485571.8518386284
+88499534.575865762206451613 2743485571.8518386284
PREHOOK: query: SELECT `dec` * cast('12345678901234567890.12345678' as decimal(38,18)) FROM DECIMAL_PRECISION_txt_small LIMIT 1
PREHOOK: type: QUERY
PREHOOK: Input: default@decimal_precision_txt_small
http://git-wip-us.apache.org/repos/asf/hive/blob/5cb8867b/ql/src/test/results/clientpositive/vector_groupby_reduce.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_groupby_reduce.q.out b/ql/src/test/results/clientpositive/vector_groupby_reduce.q.out
index 5ac1ea8..8a6135e 100644
--- a/ql/src/test/results/clientpositive/vector_groupby_reduce.q.out
+++ b/ql/src/test/results/clientpositive/vector_groupby_reduce.q.out
@@ -777,7 +777,7 @@ STAGE PLANS:
outputColumnNames: _col1, _col2, _col3, _col4
Statistics: Num rows: 250 Data size: 60301 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: sum(_col1), sum(_col2), avg(_col2), sum(_col3), avg(_col3), sum(_col4), avg(_col4)
+ aggregations: sum(_col1), sum(_col2), count(_col2), sum(_col3), count(_col3), sum(_col4), count(_col4)
keys: 1 (type: int)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
@@ -805,7 +805,7 @@ STAGE PLANS:
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
Statistics: Num rows: 250 Data size: 60301 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: struct<count:bigint,sum:double,input:int>), _col4 (type: double), _col5 (type: struct<count:bigint,sum:double,input:double>), _col6 (type: decimal(38,18)), _col7 (type: struct<count:bigint,sum:decimal(38,18),input:decimal(38,18)>)
+ value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: decimal(38,18)), _col7 (type: bigint)
Execution mode: vectorized
Map Vectorization:
enabled: true
@@ -822,13 +822,13 @@ STAGE PLANS:
enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
Reduce Operator Tree:
Group By Operator
- aggregations: sum(VALUE._col0), sum(VALUE._col1), avg(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), sum(VALUE._col5), avg(VALUE._col6)
+ aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), sum(VALUE._col5), count(VALUE._col6)
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
Statistics: Num rows: 125 Data size: 30150 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: 1 (type: int), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: decimal(38,18)), _col7 (type: decimal(38,18))
+ expressions: 1 (type: int), _col1 (type: bigint), _col2 (type: bigint), (UDFToDouble(_col2) / _col3) (type: double), _col4 (type: double), (_col4 / _col5) (type: double), _col6 (type: decimal(38,18)), (_col6 / _col7) (type: decimal(38,18))
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
Statistics: Num rows: 125 Data size: 30150 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -975,13 +975,13 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 500 Data size: 120602 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: sum(_col2), avg(_col2), sum(_col3), avg(_col3), sum(_col4), avg(_col4)
+ aggregations: sum(_col2), count(_col2), sum(_col3), count(_col3), sum(_col4), count(_col4)
keys: _col1 (type: int), _col0 (type: int)
mode: complete
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
Statistics: Num rows: 250 Data size: 60301 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: decimal(38,18)), _col7 (type: decimal(38,18))
+ expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), (UDFToDouble(_col2) / _col3) (type: double), _col4 (type: double), (_col4 / _col5) (type: double), _col6 (type: decimal(38,18)), (_col6 / _col7) (type: decimal(38,18))
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
Statistics: Num rows: 250 Data size: 60301 Basic stats: COMPLETE Column stats: NONE
File Output Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/5cb8867b/ql/src/test/results/clientpositive/vectorization_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vectorization_1.q.out b/ql/src/test/results/clientpositive/vectorization_1.q.out
index 97334a3..bb8e483 100644
--- a/ql/src/test/results/clientpositive/vectorization_1.q.out
+++ b/ql/src/test/results/clientpositive/vectorization_1.q.out
@@ -66,25 +66,26 @@ STAGE PLANS:
predicate: (((cdouble > UDFToDouble(ctinyint)) and (cboolean2 > 0)) or (UDFToLong(cint) > cbigint) or (cbigint < UDFToLong(ctinyint)) or (cboolean1 < 0)) (type: boolean)
Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: ctinyint (type: tinyint), cint (type: int), cfloat (type: float), cdouble (type: double)
- outputColumnNames: ctinyint, cint, cfloat, cdouble
+ expressions: ctinyint (type: tinyint), cfloat (type: float), cint (type: int), cdouble (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double), (cdouble * cdouble) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [0, 2, 4, 5]
+ projectedOutputColumnNums: [0, 4, 2, 5, 13, 16, 14]
+ selectExpressions: CastLongToDouble(col 0:tinyint) -> 13:double, DoubleColMultiplyDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 0:tinyint) -> 14:double, CastLongToDouble(col 0:tinyint) -> 15:double) -> 16:double, DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 14:double
Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: var_pop(ctinyint), sum(cfloat), max(ctinyint), max(cint), var_samp(cdouble), count(cint)
+ aggregations: sum(_col5), sum(_col4), count(_col0), sum(_col1), max(_col0), max(_col2), sum(_col6), sum(_col3), count(_col3), count(_col2)
Group By Vectorization:
- aggregators: VectorUDAFVarLong(col 0:tinyint) -> struct<count:bigint,sum:double,variance:double> aggregation: var_pop, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFMaxLong(col 0:tinyint) -> tinyint, VectorUDAFMaxLong(col 2:int) -> int, VectorUDAFVarDouble(col 5:double) -> struct<count:bigint,sum:double,variance:double> aggregation: var_samp, VectorUDAFCount(col 2:int) -> bigint
+ aggregators: VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFCount(col 0:tinyint) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFMaxLong(col 0:tinyint) -> tinyint, VectorUDAFMaxLong(col 2:int) -> int, VectorUDAFSumDouble(col 14:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCount(col 5:double) -> bigint, VectorUDAFCount(col 2:int) -> bigint
className: VectorGroupByOperator
groupByMode: HASH
native: false
vectorProcessingMode: HASH
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
+ projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
Reduce Sink Vectorization:
@@ -92,8 +93,8 @@ STAGE PLANS:
native: false
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
- Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: struct<count:bigint,sum:double,variance:double>), _col1 (type: double), _col2 (type: tinyint), _col3 (type: int), _col4 (type: struct<count:bigint,sum:double,variance:double>), _col5 (type: bigint)
+ Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: bigint), _col3 (type: double), _col4 (type: tinyint), _col5 (type: int), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: bigint)
Execution mode: vectorized
Map Vectorization:
enabled: true
@@ -109,24 +110,24 @@ STAGE PLANS:
includeColumns: [0, 2, 3, 4, 5, 10, 11]
dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
partitionColumnCount: 0
- scratchColumnTypeNames: [double]
+ scratchColumnTypeNames: [double, double, double, double]
Reduce Vectorization:
enabled: false
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
Reduce Operator Tree:
Group By Operator
- aggregations: var_pop(VALUE._col0), sum(VALUE._col1), max(VALUE._col2), max(VALUE._col3), var_samp(VALUE._col4), count(VALUE._col5)
+ aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2), sum(VALUE._col3), max(VALUE._col4), max(VALUE._col5), sum(VALUE._col6), sum(VALUE._col7), count(VALUE._col8), count(VALUE._col9)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: double), (_col0 / -26.28D) (type: double), _col1 (type: double), (-1.389D + _col1) (type: double), (_col1 * (-1.389D + _col1)) (type: double), _col2 (type: tinyint), (- (_col1 * (-1.389D + _col1))) (type: double), _col3 (type: int), (CAST( _col3 AS decimal(10,0)) * 79.553) (type: decimal(16,3)), _col4 (type: double), (10.175D % (- (_col1 * (-1.389D + _col1)))) (type: double), _col5 (type: bigint), (-563 % _col3) (type: int)
+ expressions: ((_col0 - ((_col1 * _col1) / _col2)) / _col2) (type: double), (((_col0 - ((_col1 * _col1) / _col2)) / _col2) / -26.28D) (type: double), _col3 (type: double), (-1.389D + _col3) (type: double), (_col3 * (-1.389D + _col3)) (type: double), _col4 (type: tinyint), (- (_col3 * (-1.389D + _col3))) (type: double), _col5 (type: int), (CAST( _col5 AS decimal(10,0)) * 79.553) (type: decimal(16,3)), ((_col6 - ((_col7 * _col7) / _col8)) / CASE WHEN ((_col8 = 1L)) THEN (null) ELSE ((_col8 - 1)) END) (type: double), (10.175D % (- (_col3 * (-1.389D + _col3)))) (type: double), _col9 (type: bigint), (-563 % _col5) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -182,4 +183,4 @@ WHERE (((cdouble > ctinyint)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@alltypesorc
#### A masked pattern was here ####
-1074.830257547229 -40.89917266161449 -21997.674998402596 -21999.063998402595 4.839282601059194E8 62 -4.839282601059194E8 1073680599 85414512692.247 7.569848642620903E10 10.175 3745 -563
+1074.8302575472321 -40.899172661614614 -21997.674998402596 -21999.063998402595 4.839282601059194E8 62 -4.839282601059194E8 1073680599 85414512692.247 7.569848642620917E10 10.175 3745 -563
http://git-wip-us.apache.org/repos/asf/hive/blob/5cb8867b/ql/src/test/results/clientpositive/vectorization_12.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vectorization_12.q.out b/ql/src/test/results/clientpositive/vectorization_12.q.out
index ba0882f..e129730 100644
--- a/ql/src/test/results/clientpositive/vectorization_12.q.out
+++ b/ql/src/test/results/clientpositive/vectorization_12.q.out
@@ -89,26 +89,27 @@ STAGE PLANS:
predicate: (((cboolean1 >= cboolean2) or (UDFToShort(ctinyint) <> csmallint)) and ((cstring1 like '%a') or ((cboolean2 <= 1) and (cbigint >= UDFToLong(csmallint)))) and ctimestamp1 is null) (type: boolean)
Statistics: Num rows: 3754 Data size: 807123 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: cbigint (type: bigint), cdouble (type: double), cstring1 (type: string), cboolean1 (type: boolean)
- outputColumnNames: cbigint, cdouble, cstring1, cboolean1
+ expressions: cbigint (type: bigint), cboolean1 (type: boolean), cstring1 (type: string), cdouble (type: double), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double), (cdouble * cdouble) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [3, 5, 6, 10]
+ projectedOutputColumnNums: [3, 10, 6, 5, 13, 16, 14]
+ selectExpressions: CastLongToDouble(col 3:bigint) -> 13:double, DoubleColMultiplyDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 3:bigint) -> 14:double, CastLongToDouble(col 3:bigint) -> 15:double) -> 16:double, DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 14:double
Statistics: Num rows: 3754 Data size: 807123 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(cbigint), stddev_samp(cbigint), avg(cdouble), sum(cbigint), stddev_pop(cdouble)
+ aggregations: count(_col0), sum(_col5), sum(_col4), sum(_col3), count(_col3), sum(_col0), sum(_col6)
Group By Vectorization:
- aggregators: VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFVarLong(col 3:bigint) -> struct<count:bigint,sum:double,variance:double> aggregation: stddev_samp, VectorUDAFAvgDouble(col 5:double) -> struct<count:bigint,sum:double,input:double>, VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFVarDouble(col 5:double) -> struct<count:bigint,sum:double,variance:double> aggregation: stddev_pop
+ aggregators: VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCount(col 5:double) -> bigint, VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 14:double) -> double
className: VectorGroupByOperator
groupByMode: HASH
keyExpressions: col 5:double, col 3:bigint, col 6:string, col 10:boolean
native: false
vectorProcessingMode: HASH
- projectedOutputColumnNums: [0, 1, 2, 3, 4]
- keys: cdouble (type: double), cbigint (type: bigint), cstring1 (type: string), cboolean1 (type: boolean)
+ projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6]
+ keys: _col3 (type: double), _col0 (type: bigint), _col2 (type: string), _col1 (type: boolean)
mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
Statistics: Num rows: 3754 Data size: 807123 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: double), _col1 (type: bigint), _col2 (type: string), _col3 (type: boolean)
@@ -120,7 +121,7 @@ STAGE PLANS:
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
Statistics: Num rows: 3754 Data size: 807123 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col4 (type: bigint), _col5 (type: struct<count:bigint,sum:double,variance:double>), _col6 (type: struct<count:bigint,sum:double,input:double>), _col7 (type: bigint), _col8 (type: struct<count:bigint,sum:double,variance:double>)
+ value expressions: _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: double)
Execution mode: vectorized
Map Vectorization:
enabled: true
@@ -136,20 +137,20 @@ STAGE PLANS:
includeColumns: [0, 1, 3, 5, 6, 8, 10, 11]
dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
partitionColumnCount: 0
- scratchColumnTypeNames: []
+ scratchColumnTypeNames: [double, double, double, double]
Reduce Vectorization:
enabled: false
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false
Reduce Operator Tree:
Group By Operator
- aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), avg(VALUE._col2), sum(VALUE._col3), stddev_pop(VALUE._col4)
+ aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6)
keys: KEY._col0 (type: double), KEY._col1 (type: bigint), KEY._col2 (type: string), KEY._col3 (type: boolean)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
Statistics: Num rows: 1877 Data size: 403561 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col1 (type: bigint), _col3 (type: boolean), _col2 (type: string), _col0 (type: double), (-6432.0D * _col0) (type: double), (- _col1) (type: bigint), _col4 (type: bigint), (_col1 * _col4) (type: bigint), _col5 (type: double), ((-6432.0D * _col0) / -6432.0D) (type: double), (- ((-6432.0D * _col0) / -6432.0D)) (type: double), _col6 (type: double), (- (-6432.0D * _col0)) (type: double), (-5638.15 + CAST( _col1 AS decimal(19,0))) (type: decimal(22,2)), _col7 (type: bigint), (_col6 / (-6432.0D * _col0)) (type: double), (- (- ((-6432.0D * _col0) / -6432.0D))) (type: double), (((-6432.0D * _col0) / -6432.0D) + (- (-6432.0D * _col0))) (type: double), _col8 (type: double)
+ expressions: _col1 (type: bigint), _col3 (type: boolean), _col2 (type: string), _col0 (type: double), (-6432.0D * _col0) (type: double), (- _col1) (type: bigint), _col4 (type: bigint), (_col1 * _col4) (type: bigint), power(((_col5 - ((_col6 * _col6) / _col4)) / CASE WHEN ((_col4 = 1L)) THEN (null) ELSE ((_col4 - 1)) END), 0.5) (type: double), ((-6432.0D * _col0) / -6432.0D) (type: double), (- ((-6432.0D * _col0) / -6432.0D)) (type: double), (_col7 / _col8) (type: double), (- (-6432.0D * _col0)) (type: double), (-5638.15 + CAST( _col1 AS decimal(19,0))) (type: decimal(22,2)), _col9 (type: bigint), ((_col7 / _col8) / (-6432.0D * _col0)) (type: double), (- (- ((-6432.0D * _col0) / -6432.0D))) (type: double), (((-6432.0D * _col0) / -6432.0D) + (- (-6432.0D * _col0))) (type: double), power(((_col10 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col17, _col18, _col19
Statistics: Num rows: 1877 Data size: 403561 Basic stats: COMPLETE Column stats: NONE
File Output Operator