You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2018/03/30 07:19:47 UTC
[1/3] hive git commit: HIVE-19032: Vectorization: Disable GROUP BY
aggregations with DISTINCT (Matt McCline, reviewed by Gopal Vijayaraghavan)
Repository: hive
Updated Branches:
refs/heads/master 1974397f6 -> 470a2f998
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/results/clientpositive/llap/vectorization_limit.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorization_limit.q.out b/ql/src/test/results/clientpositive/llap/vectorization_limit.q.out
new file mode 100644
index 0000000..c299796
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vectorization_limit.q.out
@@ -0,0 +1,952 @@
+WARNING: Comparing a bigint and a double may result in a loss of precision.
+PREHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 183488 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: ((UDFToDouble(cbigint) < cdouble) and (cint > 0)) (type: boolean)
+ Statistics: Num rows: 1365 Data size: 20400 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: cbigint (type: bigint), cdouble (type: double)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1365 Data size: 16320 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 7
+ Statistics: Num rows: 7 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 7 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 7
+ Processor Tree:
+ ListSink
+
+WARNING: Comparing a bigint and a double may result in a loss of precision.
+PREHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+-1887561756 -10011.0
+-1887561756 -13877.0
+-1887561756 -2281.0
+-1887561756 -8881.0
+-1887561756 10361.0
+-1887561756 1839.0
+-1887561756 9531.0
+PREHOOK: query: explain vectorization detail
+select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 146796 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+ Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: SelectColumnIsNotNull(col 0:tinyint)
+ predicate: ctinyint is not null (type: boolean)
+ Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: ctinyint (type: tinyint), cdouble (type: double), csmallint (type: smallint)
+ outputColumnNames: _col0, _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 5, 1]
+ Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint), _col1 (type: double)
+ sort order: ++
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumnNums: [0, 5]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: [1]
+ Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE
+ TopN Hash Memory Usage: 0.3
+ value expressions: _col2 (type: smallint)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 12
+ includeColumns: [0, 1, 5]
+ dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: aa
+ reduceColumnSortOrder: ++
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 3
+ dataColumns: KEY.reducesinkkey0:tinyint, KEY.reducesinkkey1:double, VALUE._col0:smallint
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: smallint)
+ outputColumnNames: _col0, _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 2]
+ Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 20
+ Limit Vectorization:
+ className: VectorLimitOperator
+ native: true
+ Statistics: Num rows: 20 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 20 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 20
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+-64 -10462.0 -10462
+-64 -15920.0 -15920
+-64 -1600.0 -1600
+-64 -200.0 -200
+-64 -2919.0 -2919
+-64 -3097.0 -3097
+-64 -3586.0 -3586
+-64 -4018.0 -4018
+-64 -4040.0 -4040
+-64 -4803.0 -4803
+-64 -6907.0 -6907
+-64 -7196.0 -7196
+-64 -7196.0 -7196
+-64 -7196.0 -7196
+-64 -7196.0 -7196
+-64 -7196.0 -7196
+-64 -7196.0 -7196
+-64 -7196.0 -7196
+-64 -8080.0 -8080
+-64 -9842.0 -9842
+PREHOOK: query: explain vectorization detail
+select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+ Select Operator
+ expressions: ctinyint (type: tinyint), (cdouble + 1.0D) (type: double)
+ outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 13]
+ selectExpressions: DoubleColAddDoubleScalar(col 5:double, val 1.0) -> 13:double
+ Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(_col1), count(_col1)
+ Group By Vectorization:
+ aggregators: VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFCount(col 13:double) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 0:tinyint
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: [0, 1]
+ keys: _col0 (type: tinyint)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumnNums: [0]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ partitionColumnNums: [0]
+ valueColumnNums: [1, 2]
+ Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE
+ TopN Hash Memory Usage: 0.3
+ value expressions: _col1 (type: double), _col2 (type: bigint)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 12
+ includeColumns: [0, 5]
+ dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [double]
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: a
+ reduceColumnSortOrder: +
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 3
+ dataColumns: KEY._col0:tinyint, VALUE._col0:double, VALUE._col1:bigint
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), count(VALUE._col1)
+ Group By Vectorization:
+ aggregators: VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFCountMerge(col 2:bigint) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ keyExpressions: col 0:tinyint
+ native: false
+ vectorProcessingMode: MERGE_PARTIAL
+ projectedOutputColumnNums: [0, 1]
+ keys: KEY._col0 (type: tinyint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: tinyint), (_col1 / _col2) (type: double)
+ outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 3]
+ selectExpressions: DoubleColDivideLongColumn(col 1:double, col 2:bigint) -> 3:double
+ Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 20
+ Limit Vectorization:
+ className: VectorLimitOperator
+ native: true
+ Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 20
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+-46 3033.55
+-47 -574.6428571428571
+-48 1672.909090909091
+-49 768.7659574468086
+-50 -960.0192307692307
+-51 -96.46341463414635
+-52 2810.705882352941
+-53 -532.7567567567568
+-54 2712.7272727272725
+-55 2385.595744680851
+-56 2595.818181818182
+-57 1867.0535714285713
+-58 3483.2444444444445
+-59 318.27272727272725
+-60 1071.82
+-61 914.3404255319149
+-62 245.69387755102042
+-63 2178.7272727272725
+-64 373.52941176470586
+NULL 9370.0945309795
+PREHOOK: query: explain vectorization detail
+select distinct(ctinyint) from alltypesorc limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select distinct(ctinyint) from alltypesorc limit 20
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+ Select Operator
+ expressions: ctinyint (type: tinyint)
+ outputColumnNames: ctinyint
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0]
+ Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 0:tinyint
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: []
+ keys: ctinyint (type: tinyint)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 128 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ keyColumnNums: [0]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: []
+ Statistics: Num rows: 128 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE
+ TopN Hash Memory Usage: 0.3
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 12
+ includeColumns: [0]
+ dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: a
+ reduceColumnSortOrder: +
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 1
+ dataColumns: KEY._col0:tinyint
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reduce Operator Tree:
+ Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ keyExpressions: col 0:tinyint
+ native: false
+ vectorProcessingMode: MERGE_PARTIAL
+ projectedOutputColumnNums: []
+ keys: KEY._col0 (type: tinyint)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 128 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 20
+ Limit Vectorization:
+ className: VectorLimitOperator
+ native: true
+ Statistics: Num rows: 20 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 20 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 20
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct(ctinyint) from alltypesorc limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct(ctinyint) from alltypesorc limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+-46
+-47
+-48
+-49
+-50
+-51
+-52
+-53
+-54
+-55
+-56
+-57
+-58
+-59
+-60
+-61
+-62
+-63
+-64
+NULL
+PREHOOK: query: explain vectorization detail
+select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+ Select Operator
+ expressions: ctinyint (type: tinyint), cdouble (type: double)
+ outputColumnNames: ctinyint, cdouble
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 5]
+ Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 0:tinyint, col 5:double
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: []
+ keys: ctinyint (type: tinyint), cdouble (type: double)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6144 Data size: 55052 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint), _col1 (type: double)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumnNums: [0, 1]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ partitionColumnNums: [0]
+ valueColumnNums: []
+ Statistics: Num rows: 6144 Data size: 55052 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 12
+ includeColumns: [0, 5]
+ dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: aa
+ reduceColumnSortOrder: ++
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ dataColumns: KEY._col0:tinyint, KEY._col1:double
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reduce Operator Tree:
+ Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ keyExpressions: col 0:tinyint, col 1:double
+ native: false
+ vectorProcessingMode: MERGE_PARTIAL
+ projectedOutputColumnNums: []
+ keys: KEY._col0 (type: tinyint), KEY._col1 (type: double)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6144 Data size: 55052 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count(_col1)
+ Group By Vectorization:
+ aggregators: VectorUDAFCount(col 1:double) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: COMPLETE
+ keyExpressions: col 0:tinyint
+ native: false
+ vectorProcessingMode: STREAMING
+ projectedOutputColumnNums: [0]
+ keys: _col0 (type: tinyint)
+ mode: complete
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 20
+ Limit Vectorization:
+ className: VectorLimitOperator
+ native: true
+ Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 20
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+-46 24
+-47 22
+-48 29
+-49 26
+-50 30
+-51 21
+-52 33
+-53 22
+-54 26
+-55 29
+-56 36
+-57 35
+-58 23
+-59 31
+-60 27
+-61 25
+-62 27
+-63 19
+-64 24
+NULL 2932
+PREHOOK: query: explain vectorization detail
+select ctinyint,cdouble from alltypesorc order by ctinyint limit 0
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select ctinyint,cdouble from alltypesorc order by ctinyint limit 0
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 0
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select ctinyint,cdouble from alltypesorc order by ctinyint limit 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select ctinyint,cdouble from alltypesorc order by ctinyint limit 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+PREHOOK: query: explain vectorization detail
+select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+ Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: SelectColumnIsNotNull(col 0:tinyint)
+ predicate: ctinyint is not null (type: boolean)
+ Statistics: Num rows: 9173 Data size: 82188 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(ctinyint)
+ Group By Vectorization:
+ aggregators: VectorUDAFSumLong(col 0:tinyint) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 5:double
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: [0]
+ keys: cdouble (type: double)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: double)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: double)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkMultiKeyOperator
+ keyColumnNums: [0]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: [1]
+ Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 12
+ includeColumns: [0, 5]
+ dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: a
+ reduceColumnSortOrder: +
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ dataColumns: KEY._col0:double, VALUE._col0:bigint
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFSumLong(col 1:bigint) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ keyExpressions: col 0:double
+ native: false
+ vectorProcessingMode: MERGE_PARTIAL
+ projectedOutputColumnNums: [0]
+ keys: KEY._col0 (type: double)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col1 (type: bigint), _col0 (type: double)
+ sort order: ++
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumnNums: [1, 0]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: []
+ Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE
+ TopN Hash Memory Usage: 0.3
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: aa
+ reduceColumnSortOrder: ++
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ dataColumns: KEY.reducesinkkey0:bigint, KEY.reducesinkkey1:double
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey1 (type: double), KEY.reducesinkkey0 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [1, 0]
+ Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 20
+ Limit Vectorization:
+ className: VectorLimitOperator
+ native: true
+ Statistics: Num rows: 20 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 20 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 20
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+-10462.0 -64
+-1121.0 -89
+-11322.0 -101
+-11492.0 -78
+-15920.0 -64
+-4803.0 -64
+-6907.0 -64
+-7196.0 -2009
+-8080.0 -64
+-8118.0 -80
+-9842.0 -64
+10496.0 -67
+15601.0 -1733
+3520.0 -86
+4811.0 -115
+5241.0 -80
+557.0 -75
+7705.0 -88
+9452.0 -76
+NULL -32768
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out b/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out
new file mode 100644
index 0000000..f19e2ca
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out
@@ -0,0 +1,362 @@
+PREHOOK: query: create table dtest(a int, b int) clustered by (a) sorted by (a) into 1 buckets stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dtest
+POSTHOOK: query: create table dtest(a int, b int) clustered by (a) sorted by (a) into 1 buckets stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dtest
+PREHOOK: query: insert into table dtest select c,b from (select array(300,300,300,300,300) as a, 1 as b from src order by a limit 1) y lateral view explode(a) t1 as c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dtest
+POSTHOOK: query: insert into table dtest select c,b from (select array(300,300,300,300,300) as a, 1 as b from src order by a limit 1) y lateral view explode(a) t1 as c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dtest
+POSTHOOK: Lineage: dtest.a SCRIPT []
+POSTHOOK: Lineage: dtest.b SIMPLE []
+PREHOOK: query: explain vectorization detail
+select sum(distinct a), count(distinct a) from dtest
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select sum(distinct a), count(distinct a) from dtest
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: dtest
+ Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:a:int, 1:b:int, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+ Select Operator
+ expressions: a (type: int)
+ outputColumnNames: a
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0]
+ Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: FINAL
+ keyExpressions: col 0:int
+ native: false
+ vectorProcessingMode: STREAMING
+ projectedOutputColumnNums: []
+ keys: a (type: int)
+ mode: final
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(_col0), count(_col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFSumLong(col 0:int) -> bigint, VectorUDAFCount(col 0:int) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: [0, 1]
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkEmptyKeyOperator
+ keyColumnNums: []
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: [0, 1]
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0]
+ dataColumns: a:int, b:int
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder:
+ reduceColumnSortOrder:
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ dataColumns: VALUE._col0:bigint, VALUE._col1:bigint
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), count(VALUE._col1)
+ Group By Vectorization:
+ aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint, VectorUDAFCountMerge(col 1:bigint) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ native: false
+ vectorProcessingMode: GLOBAL
+ projectedOutputColumnNums: [0, 1]
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select sum(distinct a), count(distinct a) from dtest
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dtest
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(distinct a), count(distinct a) from dtest
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dtest
+#### A masked pattern was here ####
+300 1
+PREHOOK: query: explain vectorization detail
+select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+ Select Operator
+ expressions: cint (type: int)
+ outputColumnNames: cint
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [2]
+ Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 2:int
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: []
+ keys: cint (type: int)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 6030 Data size: 18008 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ keyColumnNums: [0]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: []
+ Statistics: Num rows: 6030 Data size: 18008 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 12
+ includeColumns: [2]
+ dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: a
+ reduceColumnSortOrder: +
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 1
+ dataColumns: KEY._col0:int
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reduce Operator Tree:
+ Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ keyExpressions: col 0:int
+ native: false
+ vectorProcessingMode: MERGE_PARTIAL
+ projectedOutputColumnNums: []
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 6030 Data size: 18008 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: int), UDFToDouble(_col0) (type: double), (UDFToDouble(_col0) * UDFToDouble(_col0)) (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 4]
+ selectExpressions: CastLongToDouble(col 0:int) -> 1:double, DoubleColMultiplyDoubleColumn(col 2:double, col 3:double)(children: CastLongToDouble(col 0:int) -> 2:double, CastLongToDouble(col 0:int) -> 3:double) -> 4:double
+ Statistics: Num rows: 6030 Data size: 18008 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(_col0), count(_col0), sum(_col2), sum(_col1)
+ Group By Vectorization:
+ aggregators: VectorUDAFSumLong(col 0:int) -> bigint, VectorUDAFCount(col 0:int) -> bigint, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFSumDouble(col 1:double) -> double
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: [0, 1, 2, 3]
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkEmptyKeyOperator
+ keyColumnNums: []
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: [0, 1, 2, 3]
+ Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: double)
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder:
+ reduceColumnSortOrder:
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 4
+ dataColumns: VALUE._col0:bigint, VALUE._col1:bigint, VALUE._col2:double, VALUE._col3:double
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3)
+ Group By Vectorization:
+ aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint, VectorUDAFCountMerge(col 1:bigint) -> bigint, VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFSumDouble(col 3:double) -> double
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ native: false
+ vectorProcessingMode: GLOBAL
+ projectedOutputColumnNums: [0, 1, 2, 3]
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: bigint), _col1 (type: bigint), (UDFToDouble(_col0) / _col1) (type: double), power(((_col2 - ((_col3 * _col3) / _col1)) / _col1), 0.5) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 5, 4]
+ selectExpressions: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: CastLongToDouble(col 0:bigint) -> 4:double) -> 5:double, FuncPowerDoubleToDouble(col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 6:double) -> 4:double) -> 6:double) -> 4:double
+ Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+-3482841611 6082 -572647.4204209142 6.153814687328984E8
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/results/clientpositive/parquet_vectorization_limit.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_limit.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_limit.q.out
index 59db9d1..8a81b34 100644
--- a/ql/src/test/results/clientpositive/parquet_vectorization_limit.q.out
+++ b/ql/src/test/results/clientpositive/parquet_vectorization_limit.q.out
@@ -470,27 +470,12 @@ STAGE PLANS:
TableScan
alias: alltypesparquet
Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
- vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
Select Operator
expressions: ctinyint (type: tinyint), cdouble (type: double)
outputColumnNames: ctinyint, cdouble
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 5]
Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(DISTINCT cdouble)
- Group By Vectorization:
- aggregators: VectorUDAFCount(col 5:double) -> bigint
- className: VectorGroupByOperator
- groupByMode: HASH
- keyExpressions: col 0:tinyint, col 5:double
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: [0]
keys: ctinyint (type: tinyint), cdouble (type: double)
mode: hash
outputColumnNames: _col0, _col1, _col2
@@ -499,29 +484,14 @@ STAGE PLANS:
key expressions: _col0 (type: tinyint), _col1 (type: double)
sort order: ++
Map-reduce partition columns: _col0 (type: tinyint)
- Reduce Sink Vectorization:
- className: VectorReduceSinkOperator
- native: false
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No DISTINCT columns IS false
Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.3
- Execution mode: vectorized
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: []
- featureSupportInUse: []
inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 12
- includeColumns: [0, 5]
- dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
- partitionColumnCount: 0
- scratchColumnTypeNames: []
+ notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported
+ vectorized: false
Reduce Vectorization:
enabled: false
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/results/clientpositive/vector_count.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_count.q.out b/ql/src/test/results/clientpositive/vector_count.q.out
index 7e30870..85d5926 100644
--- a/ql/src/test/results/clientpositive/vector_count.q.out
+++ b/ql/src/test/results/clientpositive/vector_count.q.out
@@ -62,26 +62,12 @@ STAGE PLANS:
TableScan
alias: abcd
Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Select Operator
expressions: a (type: int), b (type: int), c (type: int), d (type: int)
outputColumnNames: a, b, c, d
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1, 2, 3]
Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(DISTINCT b), count(DISTINCT c), sum(d)
- Group By Vectorization:
- aggregators: VectorUDAFCount(col 1:int) -> bigint, VectorUDAFCount(col 2:int) -> bigint, VectorUDAFSumLong(col 3:int) -> bigint
- className: VectorGroupByOperator
- groupByMode: HASH
- keyExpressions: col 0:int, col 1:int, col 2:int
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: [0, 1, 2]
keys: a (type: int), b (type: int), c (type: int)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
@@ -90,23 +76,14 @@ STAGE PLANS:
key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int)
sort order: +++
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkOperator
- native: false
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No DISTINCT columns IS false
Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE
value expressions: _col5 (type: bigint)
- Execution mode: vectorized
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: []
- featureSupportInUse: []
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
+ notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported
+ vectorized: false
Reduce Vectorization:
enabled: false
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
@@ -182,7 +159,7 @@ STAGE PLANS:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- notVectorizedReason: GROUPBY operator: Aggregations with > 1 parameter are not supported count([Column[a], Column[b]])
+ notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported
vectorized: false
Reduce Vectorization:
enabled: false
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/results/clientpositive/vectorization_limit.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vectorization_limit.q.out b/ql/src/test/results/clientpositive/vectorization_limit.q.out
index 8226fd4..517cb07 100644
--- a/ql/src/test/results/clientpositive/vectorization_limit.q.out
+++ b/ql/src/test/results/clientpositive/vectorization_limit.q.out
@@ -470,27 +470,12 @@ STAGE PLANS:
TableScan
alias: alltypesorc
Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
- vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
Select Operator
expressions: ctinyint (type: tinyint), cdouble (type: double)
outputColumnNames: ctinyint, cdouble
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 5]
Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(DISTINCT cdouble)
- Group By Vectorization:
- aggregators: VectorUDAFCount(col 5:double) -> bigint
- className: VectorGroupByOperator
- groupByMode: HASH
- keyExpressions: col 0:tinyint, col 5:double
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: [0]
keys: ctinyint (type: tinyint), cdouble (type: double)
mode: hash
outputColumnNames: _col0, _col1, _col2
@@ -499,29 +484,14 @@ STAGE PLANS:
key expressions: _col0 (type: tinyint), _col1 (type: double)
sort order: ++
Map-reduce partition columns: _col0 (type: tinyint)
- Reduce Sink Vectorization:
- className: VectorReduceSinkOperator
- native: false
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No DISTINCT columns IS false
Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.3
- Execution mode: vectorized
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: []
- featureSupportInUse: []
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 12
- includeColumns: [0, 5]
- dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
- partitionColumnCount: 0
- scratchColumnTypeNames: []
+ notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported
+ vectorized: false
Reduce Vectorization:
enabled: false
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out b/ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out
index 07576fd..3847481 100644
--- a/ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out
+++ b/ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out
@@ -37,28 +37,13 @@ STAGE PLANS:
TableScan
alias: dtest
Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
- vectorizationSchemaColumns: [0:a:int, 1:b:int, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
Select Operator
expressions: a (type: int)
outputColumnNames: a
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0]
Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(DISTINCT a), count(DISTINCT a)
bucketGroup: true
- Group By Vectorization:
- aggregators: VectorUDAFSumLong(col 0:int) -> bigint, VectorUDAFCount(col 0:int) -> bigint
- className: VectorGroupByOperator
- groupByMode: HASH
- keyExpressions: col 0:int
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: [0, 1]
keys: a (type: int)
mode: hash
outputColumnNames: _col0, _col1, _col2
@@ -66,28 +51,13 @@ STAGE PLANS:
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
- Reduce Sink Vectorization:
- className: VectorReduceSinkOperator
- native: false
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No DISTINCT columns IS false
Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE
- Execution mode: vectorized
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: []
- featureSupportInUse: []
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 2
- includeColumns: [0]
- dataColumns: a:int, b:int
- partitionColumnCount: 0
- scratchColumnTypeNames: []
+ notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported
+ vectorized: false
Reduce Vectorization:
enabled: false
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
@@ -142,28 +112,12 @@ STAGE PLANS:
TableScan
alias: alltypesorc
Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
- vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
Select Operator
expressions: cint (type: int), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double)
outputColumnNames: _col0, _col1, _col2
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [2, 13, 16]
- selectExpressions: CastLongToDouble(col 2:int) -> 13:double, DoubleColMultiplyDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 2:int) -> 14:double, CastLongToDouble(col 2:int) -> 15:double) -> 16:double
Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(DISTINCT _col0), count(DISTINCT _col0), sum(DISTINCT _col2), sum(DISTINCT _col1)
- Group By Vectorization:
- aggregators: VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFCount(col 2:int) -> bigint, VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFSumDouble(col 13:double) -> double
- className: VectorGroupByOperator
- groupByMode: HASH
- keyExpressions: col 2:int, col 16:double, col 13:double
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: [0, 1, 2, 3]
keys: _col0 (type: int), _col2 (type: double), _col1 (type: double)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
@@ -171,28 +125,13 @@ STAGE PLANS:
Reduce Output Operator
key expressions: _col0 (type: int), _col1 (type: double), _col2 (type: double)
sort order: +++
- Reduce Sink Vectorization:
- className: VectorReduceSinkOperator
- native: false
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No DISTINCT columns IS false
Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
- Execution mode: vectorized
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: []
- featureSupportInUse: []
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 12
- includeColumns: [2]
- dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
- partitionColumnCount: 0
- scratchColumnTypeNames: [double, double, double, double]
+ notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported
+ vectorized: false
Reduce Vectorization:
enabled: false
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
[3/3] hive git commit: HIVE-19032: Vectorization: Disable GROUP BY
aggregations with DISTINCT (Matt McCline, reviewed by Gopal Vijayaraghavan)
Posted by mm...@apache.org.
HIVE-19032: Vectorization: Disable GROUP BY aggregations with DISTINCT (Matt McCline, reviewed by Gopal Vijayaraghavan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/470a2f99
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/470a2f99
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/470a2f99
Branch: refs/heads/master
Commit: 470a2f998494ef2a78e1424efae70d9eb1b17447
Parents: 1974397
Author: Matt McCline <mm...@hortonworks.com>
Authored: Fri Mar 30 02:19:24 2018 -0500
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Fri Mar 30 02:19:24 2018 -0500
----------------------------------------------------------------------
.../test/resources/testconfiguration.properties | 4 +
.../hive/ql/optimizer/physical/Vectorizer.java | 6 +-
.../test/queries/clientpositive/groupby_cube1.q | 1 +
.../clientpositive/groupby_grouping_id1.q | 2 +
.../clientpositive/groupby_grouping_id2.q | 1 +
.../clientpositive/groupby_grouping_id3.q | 2 +
.../clientpositive/groupby_grouping_sets1.q | 1 +
.../clientpositive/groupby_grouping_sets2.q | 1 +
.../clientpositive/groupby_grouping_sets3.q | 1 +
.../clientpositive/groupby_grouping_sets4.q | 1 +
.../clientpositive/groupby_grouping_sets5.q | 1 +
.../clientpositive/groupby_grouping_sets6.q | 2 +
.../groupby_grouping_sets_grouping.q | 1 +
.../queries/clientpositive/groupby_rollup1.q | 1 +
.../queries/clientpositive/groupby_sort_11.q | 3 +
.../queries/clientpositive/groupby_sort_8.q | 3 +
.../clientpositive/vector_groupby_cube1.q | 2 +-
.../vector_groupby_grouping_sets_grouping.q | 29 +
.../clientpositive/vector_groupby_sort_11.q | 50 +
.../clientpositive/vector_groupby_sort_8.q | 24 +
.../clientpositive/groupby_sort_11.q.out | 4 +-
.../clientpositive/llap/vector_count.q.out | 30 +-
.../llap/vector_groupby_cube1.q.out | 501 +++++++++-
.../vector_groupby_grouping_sets_grouping.q.out | 463 +++++++++
.../llap/vector_groupby_rollup1.q.out | 94 +-
.../llap/vector_groupby_sort_11.q.out | 996 +++++++++++++++++++
.../llap/vector_groupby_sort_8.q.out | 186 ++++
.../llap/vectorization_limit.q.out | 952 ++++++++++++++++++
.../llap/vectorized_distinct_gby.q.out | 362 +++++++
.../parquet_vectorization_limit.q.out | 34 +-
.../results/clientpositive/vector_count.q.out | 29 +-
.../clientpositive/vectorization_limit.q.out | 34 +-
.../vectorized_distinct_gby.q.out | 69 +-
33 files changed, 3601 insertions(+), 289 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 937ea79..a42ae80 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -352,6 +352,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\
vector_decimal_udf.q,\
vector_decimal_udf2.q,\
vector_distinct_2.q,\
+ vectorized_distinct_gby.q,\
vector_elt.q,\
vector_groupby4.q,\
vector_groupby6.q,\
@@ -420,6 +421,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\
vectorization_8.q,\
vectorization_9.q,\
vectorization_decimal_date.q,\
+ vectorization_limit.q,\
vectorization_nested_udf.q,\
vectorization_not.q,\
vectorization_part.q,\
@@ -747,6 +749,8 @@ minillaplocal.query.files=\
vector_groupby_grouping_sets_limit.q,\
vector_groupby_grouping_window.q,\
vector_groupby_rollup1.q,\
+ vector_groupby_sort_11.q,\
+ vector_groupby_sort_8.q,\
vector_if_expr_2.q,\
vector_join30.q,\
vector_join_filters.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index 33830b3..a822a4b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -2876,13 +2876,13 @@ public class Vectorizer implements PhysicalPlanResolver {
setExpressionIssue("Aggregation Function", "UDF " + udfName + " not supported");
return false;
}
- /*
+
// The planner seems to pull this one out.
if (aggDesc.getDistinct()) {
setExpressionIssue("Aggregation Function", "DISTINCT not supported");
- return new Pair<Boolean,Boolean>(false, false);
+ return false;
}
- */
+
ArrayList<ExprNodeDesc> parameters = aggDesc.getParameters();
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/groupby_cube1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_cube1.q b/ql/src/test/queries/clientpositive/groupby_cube1.q
index fd2f0de..92456d0 100644
--- a/ql/src/test/queries/clientpositive/groupby_cube1.q
+++ b/ql/src/test/queries/clientpositive/groupby_cube1.q
@@ -1,3 +1,4 @@
+SET hive.vectorized.execution.enabled=false;
set hive.mapred.mode=nonstrict;
set hive.map.aggr=true;
set hive.groupby.skewindata=false;
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/groupby_grouping_id1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_id1.q b/ql/src/test/queries/clientpositive/groupby_grouping_id1.q
index 9948ce9..7068d21 100644
--- a/ql/src/test/queries/clientpositive/groupby_grouping_id1.q
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_id1.q
@@ -1,3 +1,5 @@
+SET hive.vectorized.execution.enabled=false;
+
CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE;
LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/groupby_grouping_id2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_id2.q b/ql/src/test/queries/clientpositive/groupby_grouping_id2.q
index cc7f9e4..ba755c4 100644
--- a/ql/src/test/queries/clientpositive/groupby_grouping_id2.q
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_id2.q
@@ -1,4 +1,5 @@
set hive.explain.user=false;
+SET hive.vectorized.execution.enabled=false;
set hive.fetch.task.conversion=none;
set hive.cli.print.header=true;
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/groupby_grouping_id3.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_id3.q b/ql/src/test/queries/clientpositive/groupby_grouping_id3.q
index 955dbe0..29b2f15 100644
--- a/ql/src/test/queries/clientpositive/groupby_grouping_id3.q
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_id3.q
@@ -1,3 +1,5 @@
+SET hive.vectorized.execution.enabled=false;
+
CREATE TABLE T1(key INT, value INT) STORED AS TEXTFILE;
LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1;
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/groupby_grouping_sets1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_sets1.q b/ql/src/test/queries/clientpositive/groupby_grouping_sets1.q
index c22c97f..86c5246 100644
--- a/ql/src/test/queries/clientpositive/groupby_grouping_sets1.q
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_sets1.q
@@ -1,4 +1,5 @@
set hive.explain.user=false;
+SET hive.vectorized.execution.enabled=false;
set hive.fetch.task.conversion=none;
set hive.cli.print.header=true;
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/groupby_grouping_sets2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_sets2.q b/ql/src/test/queries/clientpositive/groupby_grouping_sets2.q
index 90e6325..1934321 100644
--- a/ql/src/test/queries/clientpositive/groupby_grouping_sets2.q
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_sets2.q
@@ -1,4 +1,5 @@
set hive.explain.user=false;
+SET hive.vectorized.execution.enabled=false;
set hive.cli.print.header=true;
set hive.mapred.mode=nonstrict;
set hive.new.job.grouping.set.cardinality=2;
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/groupby_grouping_sets3.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_sets3.q b/ql/src/test/queries/clientpositive/groupby_grouping_sets3.q
index 16421e8..81267dc 100644
--- a/ql/src/test/queries/clientpositive/groupby_grouping_sets3.q
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_sets3.q
@@ -1,4 +1,5 @@
set hive.explain.user=false;
+SET hive.vectorized.execution.enabled=false;
set hive.fetch.task.conversion=none;
set hive.cli.print.header=true;
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/groupby_grouping_sets4.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_sets4.q b/ql/src/test/queries/clientpositive/groupby_grouping_sets4.q
index 1074a3b..fa62992 100644
--- a/ql/src/test/queries/clientpositive/groupby_grouping_sets4.q
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_sets4.q
@@ -1,4 +1,5 @@
set hive.explain.user=false;
+SET hive.vectorized.execution.enabled=false;
set hive.mapred.mode=nonstrict;
set hive.merge.mapfiles = false;
set hive.merge.mapredfiles = false;
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/groupby_grouping_sets5.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_sets5.q b/ql/src/test/queries/clientpositive/groupby_grouping_sets5.q
index 570d464..829a0c2 100644
--- a/ql/src/test/queries/clientpositive/groupby_grouping_sets5.q
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_sets5.q
@@ -1,4 +1,5 @@
set hive.mapred.mode=nonstrict;
+SET hive.vectorized.execution.enabled=false;
set hive.merge.mapfiles = false;
set hive.merge.mapredfiles = false;
-- Set merging to false above to make the explain more readable
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/groupby_grouping_sets6.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_sets6.q b/ql/src/test/queries/clientpositive/groupby_grouping_sets6.q
index e537bce..515dce3 100644
--- a/ql/src/test/queries/clientpositive/groupby_grouping_sets6.q
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_sets6.q
@@ -1,4 +1,6 @@
set hive.mapred.mode=nonstrict;
+SET hive.vectorized.execution.enabled=false;
+
CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE;
LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1;
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q b/ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q
index 7157106..3f437a4 100644
--- a/ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q
@@ -1,3 +1,4 @@
+SET hive.vectorized.execution.enabled=false;
-- SORT_QUERY_RESULTS
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/groupby_rollup1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_rollup1.q b/ql/src/test/queries/clientpositive/groupby_rollup1.q
index f30ace0..94f533c 100644
--- a/ql/src/test/queries/clientpositive/groupby_rollup1.q
+++ b/ql/src/test/queries/clientpositive/groupby_rollup1.q
@@ -1,3 +1,4 @@
+SET hive.vectorized.execution.enabled=false;
set hive.mapred.mode=nonstrict;
set hive.map.aggr=true;
set hive.groupby.skewindata=false;
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/groupby_sort_11.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_sort_11.q b/ql/src/test/queries/clientpositive/groupby_sort_11.q
index 3b6c172..c56789d 100644
--- a/ql/src/test/queries/clientpositive/groupby_sort_11.q
+++ b/ql/src/test/queries/clientpositive/groupby_sort_11.q
@@ -1,7 +1,10 @@
+SET hive.vectorized.execution.enabled=false;
set hive.mapred.mode=nonstrict;
set hive.exec.reducers.max = 1;
set hive.map.groupby.sorted=true;
+-- SORT_QUERY_RESULTS
+
CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string)
CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/groupby_sort_8.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_sort_8.q b/ql/src/test/queries/clientpositive/groupby_sort_8.q
index 3f81a69..2c20b29 100644
--- a/ql/src/test/queries/clientpositive/groupby_sort_8.q
+++ b/ql/src/test/queries/clientpositive/groupby_sort_8.q
@@ -1,7 +1,10 @@
+SET hive.vectorized.execution.enabled=false;
set hive.mapred.mode=nonstrict;
set hive.exec.reducers.max = 10;
set hive.map.groupby.sorted=true;
+-- SORT_QUERY_RESULTS
+
CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string)
CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/vector_groupby_cube1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_groupby_cube1.q b/ql/src/test/queries/clientpositive/vector_groupby_cube1.q
index 1f7b467..d6bab2c 100644
--- a/ql/src/test/queries/clientpositive/vector_groupby_cube1.q
+++ b/ql/src/test/queries/clientpositive/vector_groupby_cube1.q
@@ -1,4 +1,4 @@
-SET hive.vectorized.execution.enabled=false;
+SET hive.vectorized.execution.enabled=true;
SET hive.vectorized.execution.reduce.enabled=true;
set hive.mapred.mode=nonstrict;
set hive.map.aggr=true;
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_grouping.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_grouping.q b/ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_grouping.q
index a0e874d..5a5757d 100644
--- a/ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_grouping.q
+++ b/ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_grouping.q
@@ -99,6 +99,35 @@ having grouping(key) = 1 OR grouping(value) = 1
order by x desc, case when x = 1 then key end;
explain vectorization detail
+select key, value, grouping(key), grouping(value)
+from T1
+group by key, value;
+
+select key, value, grouping(key), grouping(value)
+from T1
+group by key, value;
+
+explain vectorization detail
+select key, value, grouping(value)
+from T1
+group by key, value;
+
+select key, value, grouping(value)
+from T1
+group by key, value;
+
+explain vectorization detail
+select key, value
+from T1
+group by key, value
+having grouping(key) = 0;
+
+select key, value
+from T1
+group by key, value
+having grouping(key) = 0;
+
+explain vectorization detail
select key, value, `grouping__id`, grouping(key, value)
from T1
group by cube(key, value);
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/vector_groupby_sort_11.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_groupby_sort_11.q b/ql/src/test/queries/clientpositive/vector_groupby_sort_11.q
new file mode 100644
index 0000000..012fe82
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_groupby_sort_11.q
@@ -0,0 +1,50 @@
+SET hive.vectorized.execution.enabled=true;
+SET hive.vectorized.execution.reduce.enabled=true;
+set hive.mapred.mode=nonstrict;
+set hive.exec.reducers.max = 1;
+set hive.map.groupby.sorted=true;
+
+-- SORT_QUERY_RESULTS
+
+CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+
+-- perform an insert to make sure there are 2 files
+INSERT OVERWRITE TABLE T1 PARTITION (ds='1')
+SELECT * from src where key < 10;
+
+-- The plan is optimized to perform partial aggregation on the mapper
+EXPLAIN VECTORIZATION DETAIL
+select count(distinct key) from T1;
+select count(distinct key) from T1;
+
+-- The plan is optimized to perform partial aggregation on the mapper
+EXPLAIN VECTORIZATION DETAIL
+select count(distinct key), count(1), count(key), sum(distinct key) from T1;
+select count(distinct key), count(1), count(key), sum(distinct key) from T1;
+
+-- The plan is not changed in the presence of a grouping key
+EXPLAIN VECTORIZATION DETAIL
+select count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key;
+select count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key;
+
+-- The plan is not changed in the presence of a grouping key
+EXPLAIN VECTORIZATION DETAIL
+select key, count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key;
+select key, count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key;
+
+-- The plan is not changed in the presence of a grouping key expression
+EXPLAIN VECTORIZATION DETAIL
+select count(distinct key+key) from T1;
+select count(distinct key+key) from T1;
+
+EXPLAIN VECTORIZATION DETAIL
+select count(distinct 1) from T1;
+select count(distinct 1) from T1;
+
+set hive.map.aggr=false;
+
+-- no plan change if map aggr is turned off
+EXPLAIN VECTORIZATION DETAIL
+select count(distinct key) from T1;
+select count(distinct key) from T1;
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/vector_groupby_sort_8.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_groupby_sort_8.q b/ql/src/test/queries/clientpositive/vector_groupby_sort_8.q
new file mode 100644
index 0000000..b0c5699
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_groupby_sort_8.q
@@ -0,0 +1,24 @@
+SET hive.vectorized.execution.enabled=true;
+SET hive.vectorized.execution.reduce.enabled=true;
+set hive.mapred.mode=nonstrict;
+set hive.exec.reducers.max = 10;
+set hive.map.groupby.sorted=true;
+
+-- SORT_QUERY_RESULTS
+
+CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1 PARTITION (ds='1');
+
+-- perform an insert to make sure there are 2 files
+INSERT OVERWRITE TABLE T1 PARTITION (ds='1') select key, val from T1 where ds = '1';
+
+-- The plan is not converted to a map-side, since although the sorting columns and grouping
+-- columns match, the user is issueing a distinct.
+-- However, after HIVE-4310, partial aggregation is performed on the mapper
+EXPLAIN VECTORIZATION DETAIL
+select count(distinct key) from T1;
+select count(distinct key) from T1;
+
+DROP TABLE T1;
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/results/clientpositive/groupby_sort_11.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby_sort_11.q.out b/ql/src/test/results/clientpositive/groupby_sort_11.q.out
index 23c89f9..cbdc526 100644
--- a/ql/src/test/results/clientpositive/groupby_sort_11.q.out
+++ b/ql/src/test/results/clientpositive/groupby_sort_11.q.out
@@ -211,12 +211,12 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
POSTHOOK: Input: default@t1@ds=1
#### A masked pattern was here ####
-1 3 3 0.0
1 1 1 2.0
1 1 1 4.0
-1 3 3 5.0
1 1 1 8.0
1 1 1 9.0
+1 3 3 0.0
+1 3 3 5.0
PREHOOK: query: EXPLAIN select key, count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key
PREHOOK: type: QUERY
POSTHOOK: query: EXPLAIN select key, count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/results/clientpositive/llap/vector_count.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_count.q.out b/ql/src/test/results/clientpositive/llap/vector_count.q.out
index 400d930..ce35eb8 100644
--- a/ql/src/test/results/clientpositive/llap/vector_count.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_count.q.out
@@ -68,26 +68,12 @@ STAGE PLANS:
TableScan
alias: abcd
Statistics: Num rows: 7 Data size: 112 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Select Operator
expressions: a (type: int), b (type: int), c (type: int), d (type: int)
outputColumnNames: a, b, c, d
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1, 2, 3]
Statistics: Num rows: 7 Data size: 112 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(DISTINCT b), count(DISTINCT c), sum(d)
- Group By Vectorization:
- aggregators: VectorUDAFCount(col 1:int) -> bigint, VectorUDAFCount(col 2:int) -> bigint, VectorUDAFSumLong(col 3:int) -> bigint
- className: VectorGroupByOperator
- groupByMode: HASH
- keyExpressions: col 0:int, col 1:int, col 2:int
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: [0, 1, 2]
keys: a (type: int), b (type: int), c (type: int)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
@@ -96,24 +82,16 @@ STAGE PLANS:
key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int)
sort order: +++
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkOperator
- native: false
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- nativeConditionsNotMet: No DISTINCT columns IS false
Statistics: Num rows: 7 Data size: 112 Basic stats: COMPLETE Column stats: NONE
value expressions: _col5 (type: bigint)
- Execution mode: vectorized, llap
+ Execution mode: llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: []
- featureSupportInUse: []
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
+ notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported
+ vectorized: false
Reducer 2
Execution mode: llap
Reduce Vectorization:
@@ -200,7 +178,7 @@ STAGE PLANS:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- notVectorizedReason: GROUPBY operator: Aggregations with > 1 parameter are not supported count([Column[a], Column[b]])
+ notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported
vectorized: false
Reducer 2
Execution mode: llap
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out
index f1ed146..3bfbda0 100644
--- a/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out
@@ -21,8 +21,8 @@ POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube
POSTHOOK: type: QUERY
PLAN VECTORIZATION:
- enabled: false
- enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false]
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -41,12 +41,27 @@ STAGE PLANS:
TableScan
alias: t1
Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:key:string, 1:val:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
Select Operator
expressions: key (type: string), val (type: string)
outputColumnNames: key, val
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1]
Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
+ Group By Vectorization:
+ aggregators: VectorUDAFCountStar(*) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: [0]
keys: key (type: string), val (type: string), 0L (type: bigint)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
@@ -55,15 +70,58 @@ STAGE PLANS:
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkMultiKeyOperator
+ keyColumnNums: [0, 1, 2]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: [3]
Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE
value expressions: _col3 (type: bigint)
- Execution mode: llap
+ Execution mode: vectorized, llap
LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled]
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: key:string, val:string
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint]
Reducer 2
- Execution mode: llap
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: aaa
+ reduceColumnSortOrder: +++
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 4
+ dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:bigint
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFCountMerge(col 3:bigint) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ keyExpressions: col 0:string, col 1:string, col 2:bigint
+ native: false
+ vectorProcessingMode: MERGE_PARTIAL
+ projectedOutputColumnNums: [0]
keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint)
mode: mergepartial
outputColumnNames: _col0, _col1, _col3
@@ -72,9 +130,16 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint)
outputColumnNames: _col0, _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 2]
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -94,8 +159,8 @@ POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
SELECT key, val, count(1) FROM T1 GROUP BY CUBE(key, val)
POSTHOOK: type: QUERY
PLAN VECTORIZATION:
- enabled: false
- enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false]
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -114,12 +179,27 @@ STAGE PLANS:
TableScan
alias: t1
Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:key:string, 1:val:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
Select Operator
expressions: key (type: string), val (type: string)
outputColumnNames: key, val
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1]
Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
+ Group By Vectorization:
+ aggregators: VectorUDAFCountStar(*) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: [0]
keys: key (type: string), val (type: string), 0L (type: bigint)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
@@ -128,15 +208,58 @@ STAGE PLANS:
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkMultiKeyOperator
+ keyColumnNums: [0, 1, 2]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: [3]
Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE
value expressions: _col3 (type: bigint)
- Execution mode: llap
+ Execution mode: vectorized, llap
LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled]
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: key:string, val:string
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint]
Reducer 2
- Execution mode: llap
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: aaa
+ reduceColumnSortOrder: +++
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 4
+ dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:bigint
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFCountMerge(col 3:bigint) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ keyExpressions: col 0:string, col 1:string, col 2:bigint
+ native: false
+ vectorProcessingMode: MERGE_PARTIAL
+ projectedOutputColumnNums: [0]
keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint)
mode: mergepartial
outputColumnNames: _col0, _col1, _col3
@@ -145,9 +268,16 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint)
outputColumnNames: _col0, _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 2]
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -193,8 +323,8 @@ POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
SELECT key, val, GROUPING__ID, count(1) FROM T1 GROUP BY key, val with cube
POSTHOOK: type: QUERY
PLAN VECTORIZATION:
- enabled: false
- enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false]
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -213,12 +343,27 @@ STAGE PLANS:
TableScan
alias: t1
Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:key:string, 1:val:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
Select Operator
expressions: key (type: string), val (type: string)
outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1]
Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
+ Group By Vectorization:
+ aggregators: VectorUDAFCountStar(*) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: [0]
keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
@@ -227,15 +372,58 @@ STAGE PLANS:
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkMultiKeyOperator
+ keyColumnNums: [0, 1, 2]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: [3]
Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE
value expressions: _col3 (type: bigint)
- Execution mode: llap
+ Execution mode: vectorized, llap
LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled]
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: key:string, val:string
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint]
Reducer 2
- Execution mode: llap
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: aaa
+ reduceColumnSortOrder: +++
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 4
+ dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:bigint
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFCountMerge(col 3:bigint) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ keyExpressions: col 0:string, col 1:string, col 2:bigint
+ native: false
+ vectorProcessingMode: MERGE_PARTIAL
+ projectedOutputColumnNums: [0]
keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3
@@ -243,9 +431,16 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint), _col3 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 2, 3]
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -291,8 +486,8 @@ POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
SELECT key, count(distinct val) FROM T1 GROUP BY key with cube
POSTHOOK: type: QUERY
PLAN VECTORIZATION:
- enabled: false
- enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false]
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -328,8 +523,19 @@ STAGE PLANS:
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
Execution mode: llap
LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported
+ vectorized: false
Reducer 2
Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: GROUPBY operator: DISTINCT not supported
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: count(DISTINCT KEY._col2:0._col0)
@@ -377,8 +583,8 @@ POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube
POSTHOOK: type: QUERY
PLAN VECTORIZATION:
- enabled: false
- enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false]
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -398,12 +604,27 @@ STAGE PLANS:
TableScan
alias: t1
Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:key:string, 1:val:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
Select Operator
expressions: key (type: string), val (type: string)
outputColumnNames: key, val
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1]
Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
+ Group By Vectorization:
+ aggregators: VectorUDAFCountStar(*) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: [0]
keys: key (type: string), val (type: string), 0L (type: bigint)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
@@ -412,15 +633,59 @@ STAGE PLANS:
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
sort order: +++
Map-reduce partition columns: rand() (type: double)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumnNums: [0, 1, 2]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ partitionColumnNums: [4]
+ valueColumnNums: [3]
Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE
value expressions: _col3 (type: bigint)
- Execution mode: llap
+ Execution mode: vectorized, llap
LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled]
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: key:string, val:string
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint]
Reducer 2
- Execution mode: llap
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: aaa
+ reduceColumnSortOrder: +++
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 4
+ dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:bigint
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFCountMerge(col 3:bigint) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: PARTIALS
+ keyExpressions: col 0:string, col 1:string, col 2:bigint
+ native: false
+ vectorProcessingMode: STREAMING
+ projectedOutputColumnNums: [0]
keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint)
mode: partials
outputColumnNames: _col0, _col1, _col2, _col3
@@ -429,13 +694,41 @@ STAGE PLANS:
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumnNums: [0, 1, 2]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ partitionColumnNums: [0, 1]
+ valueColumnNums: [3]
Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE
value expressions: _col3 (type: bigint)
Reducer 3
- Execution mode: llap
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: aaa
+ reduceColumnSortOrder: +++
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 4
+ dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:bigint
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFCountMerge(col 3:bigint) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: FINAL
+ keyExpressions: col 0:string, col 1:string, col 2:bigint
+ native: false
+ vectorProcessingMode: STREAMING
+ projectedOutputColumnNums: [0]
keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint)
mode: final
outputColumnNames: _col0, _col1, _col3
@@ -444,9 +737,16 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint)
outputColumnNames: _col0, _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 2]
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -492,8 +792,8 @@ POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
SELECT key, count(distinct val) FROM T1 GROUP BY key with cube
POSTHOOK: type: QUERY
PLAN VECTORIZATION:
- enabled: false
- enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false]
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -530,8 +830,19 @@ STAGE PLANS:
Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
Execution mode: llap
LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported
+ vectorized: false
Reducer 2
Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: GROUPBY operator: DISTINCT not supported
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: count(DISTINCT KEY._col2:0._col0)
@@ -547,6 +858,11 @@ STAGE PLANS:
value expressions: _col2 (type: bigint)
Reducer 3
Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: GROUPBY operator: DISTINCT not supported
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
@@ -614,8 +930,8 @@ INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with cube
INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with cube
POSTHOOK: type: QUERY
PLAN VECTORIZATION:
- enabled: false
- enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false]
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
STAGE DEPENDENCIES:
Stage-2 is a root stage
@@ -643,12 +959,27 @@ STAGE PLANS:
TableScan
alias: t1
Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:key:string, 1:val:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
Select Operator
expressions: key (type: string), val (type: string)
outputColumnNames: key, val
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1]
Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(1)
+ Group By Vectorization:
+ aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 4:int) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: [0]
keys: key (type: string), val (type: string), 0L (type: bigint)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
@@ -657,14 +988,33 @@ STAGE PLANS:
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
sort order: +++
Map-reduce partition columns: rand() (type: double)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumnNums: [0, 1, 2]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ partitionColumnNums: [4]
+ valueColumnNums: [3]
Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE
value expressions: _col3 (type: bigint)
Select Operator
expressions: key (type: string), val (type: string)
outputColumnNames: key, val
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1]
Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(1)
+ Group By Vectorization:
+ aggregators: VectorUDAFSumLong(ConstantVectorExpression(val 1) -> 6:int) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: [0]
keys: key (type: string), val (type: string), 0L (type: bigint)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
@@ -673,15 +1023,59 @@ STAGE PLANS:
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
sort order: +++
Map-reduce partition columns: rand() (type: double)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumnNums: [0, 1, 2]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ partitionColumnNums: [4]
+ valueColumnNums: [3]
Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE
value expressions: _col3 (type: bigint)
- Execution mode: llap
+ Execution mode: vectorized, llap
LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled]
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: key:string, val:string
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, bigint, bigint, bigint]
Reducer 2
- Execution mode: llap
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: aaa
+ reduceColumnSortOrder: +++
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 4
+ dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:bigint
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFCountMerge(col 3:bigint) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: PARTIALS
+ keyExpressions: col 0:string, col 1:string, col 2:bigint
+ native: false
+ vectorProcessingMode: STREAMING
+ projectedOutputColumnNums: [0]
keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint)
mode: partials
outputColumnNames: _col0, _col1, _col2, _col3
@@ -690,10 +1084,22 @@ STAGE PLANS:
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumnNums: [0, 1, 2]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ partitionColumnNums: [0, 1]
+ valueColumnNums: [3]
Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE
value expressions: _col3 (type: bigint)
Reducer 3
Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
@@ -729,6 +1135,11 @@ STAGE PLANS:
value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
Reducer 4
Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
@@ -743,10 +1154,31 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Reducer 5
- Execution mode: llap
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: aaa
+ reduceColumnSortOrder: +++
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 4
+ dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:bigint
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
Reduce Operator Tree:
Group By Operator
aggregations: sum(VALUE._col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFSumLong(col 3:bigint) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: PARTIALS
+ keyExpressions: col 0:string, col 1:string, col 2:bigint
+ native: false
+ vectorProcessingMode: STREAMING
+ projectedOutputColumnNums: [0]
keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint)
mode: partials
outputColumnNames: _col0, _col1, _col2, _col3
@@ -755,10 +1187,22 @@ STAGE PLANS:
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumnNums: [0, 1, 2]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ partitionColumnNums: [0, 1]
+ valueColumnNums: [3]
Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE
value expressions: _col3 (type: bigint)
Reducer 6
Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: sum(VALUE._col0)
@@ -794,6 +1238,11 @@ STAGE PLANS:
value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
Reducer 7
Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
[2/3] hive git commit: HIVE-19032: Vectorization: Disable GROUP BY
aggregations with DISTINCT (Matt McCline, reviewed by Gopal Vijayaraghavan)
Posted by mm...@apache.org.
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out
index 7f7624a..80e073b 100644
--- a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out
@@ -1430,6 +1430,469 @@ NULL 5 1
NULL NULL 1
NULL NULL 2
PREHOOK: query: explain vectorization detail
+select key, value, grouping(key), grouping(value)
+from T1
+group by key, value
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select key, value, grouping(key), grouping(value)
+from T1
+group by key, value
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+ Select Operator
+ expressions: key (type: int), value (type: int)
+ outputColumnNames: key, value
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1]
+ Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 0:int, col 1:int
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: []
+ keys: key (type: int), value (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkMultiKeyOperator
+ keyColumnNums: [0, 1]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: []
+ Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: key:int, value:int
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: aa
+ reduceColumnSortOrder: ++
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ dataColumns: KEY._col0:int, KEY._col1:int
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reduce Operator Tree:
+ Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ keyExpressions: col 0:int, col 1:int
+ native: false
+ vectorProcessingMode: MERGE_PARTIAL
+ projectedOutputColumnNums: []
+ keys: KEY._col0 (type: int), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int), 0L (type: bigint), 0L (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 2, 3]
+ selectExpressions: ConstantVectorExpression(val 0) -> 2:bigint, ConstantVectorExpression(val 0) -> 3:bigint
+ Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select key, value, grouping(key), grouping(value)
+from T1
+group by key, value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select key, value, grouping(key), grouping(value)
+from T1
+group by key, value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+key value _c2 _c3
+1 1 0 0
+1 NULL 0 0
+2 2 0 0
+3 3 0 0
+3 NULL 0 0
+4 5 0 0
+PREHOOK: query: explain vectorization detail
+select key, value, grouping(value)
+from T1
+group by key, value
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select key, value, grouping(value)
+from T1
+group by key, value
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+ Select Operator
+ expressions: key (type: int), value (type: int)
+ outputColumnNames: key, value
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1]
+ Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 0:int, col 1:int
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: []
+ keys: key (type: int), value (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkMultiKeyOperator
+ keyColumnNums: [0, 1]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: []
+ Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: key:int, value:int
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: aa
+ reduceColumnSortOrder: ++
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ dataColumns: KEY._col0:int, KEY._col1:int
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reduce Operator Tree:
+ Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ keyExpressions: col 0:int, col 1:int
+ native: false
+ vectorProcessingMode: MERGE_PARTIAL
+ projectedOutputColumnNums: []
+ keys: KEY._col0 (type: int), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int), 0L (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 2]
+ selectExpressions: ConstantVectorExpression(val 0) -> 2:bigint
+ Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select key, value, grouping(value)
+from T1
+group by key, value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select key, value, grouping(value)
+from T1
+group by key, value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+key value _c2
+1 1 0
+1 NULL 0
+2 2 0
+3 3 0
+3 NULL 0
+4 5 0
+PREHOOK: query: explain vectorization detail
+select key, value
+from T1
+group by key, value
+having grouping(key) = 0
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select key, value
+from T1
+group by key, value
+having grouping(key) = 0
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+ Select Operator
+ expressions: key (type: int), value (type: int)
+ outputColumnNames: key, value
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1]
+ Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 0:int, col 1:int
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: []
+ keys: key (type: int), value (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkMultiKeyOperator
+ keyColumnNums: [0, 1]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: []
+ Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: key:int, value:int
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: aa
+ reduceColumnSortOrder: ++
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ dataColumns: KEY._col0:int, KEY._col1:int
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reduce Operator Tree:
+ Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ keyExpressions: col 0:int, col 1:int
+ native: false
+ vectorProcessingMode: MERGE_PARTIAL
+ projectedOutputColumnNums: []
+ keys: KEY._col0 (type: int), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select key, value
+from T1
+group by key, value
+having grouping(key) = 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select key, value
+from T1
+group by key, value
+having grouping(key) = 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+key value
+1 1
+1 NULL
+2 2
+3 3
+3 NULL
+4 5
+PREHOOK: query: explain vectorization detail
select key, value, `grouping__id`, grouping(key, value)
from T1
group by cube(key, value)
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out
index e839214..ef49d90 100644
--- a/ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out
@@ -210,27 +210,12 @@ STAGE PLANS:
TableScan
alias: t1
Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
- vectorizationSchemaColumns: [0:key:string, 1:val:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
Select Operator
expressions: key (type: string), val (type: string)
outputColumnNames: key, val
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1]
Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(DISTINCT val)
- Group By Vectorization:
- aggregators: VectorUDAFCount(col 1:string) -> bigint
- className: VectorGroupByOperator
- groupByMode: HASH
- keyExpressions: col 0:string, ConstantVectorExpression(val 0) -> 3:bigint, col 1:string
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: [0]
keys: key (type: string), 0L (type: bigint), val (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
@@ -239,29 +224,15 @@ STAGE PLANS:
key expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint)
- Reduce Sink Vectorization:
- className: VectorReduceSinkOperator
- native: false
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- nativeConditionsNotMet: No DISTINCT columns IS false
Statistics: Num rows: 12 Data size: 4416 Basic stats: COMPLETE Column stats: NONE
- Execution mode: vectorized, llap
+ Execution mode: llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: []
- featureSupportInUse: []
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 2
- includeColumns: [0, 1]
- dataColumns: key:string, val:string
- partitionColumnCount: 0
- scratchColumnTypeNames: [bigint]
+ notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported
+ vectorized: false
Reducer 2
Execution mode: llap
Reduce Vectorization:
@@ -303,12 +274,12 @@ POSTHOOK: query: SELECT key, count(distinct val) FROM T1 GROUP BY key with rollu
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
#### A masked pattern was here ####
-1 0
-2 0
-3 0
-7 0
-8 0
-NULL 0
+1 1
+2 1
+3 1
+7 1
+8 2
+NULL 6
PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
SELECT key, val, count(1) FROM T1 GROUP BY key, val with rollup
PREHOOK: type: QUERY
@@ -539,27 +510,12 @@ STAGE PLANS:
TableScan
alias: t1
Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
- vectorizationSchemaColumns: [0:key:string, 1:val:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
Select Operator
expressions: key (type: string), val (type: string)
outputColumnNames: key, val
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1]
Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(DISTINCT val)
- Group By Vectorization:
- aggregators: VectorUDAFCount(col 1:string) -> bigint
- className: VectorGroupByOperator
- groupByMode: HASH
- keyExpressions: col 0:string, ConstantVectorExpression(val 0) -> 3:bigint, col 1:string
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: [0]
keys: key (type: string), 0L (type: bigint), val (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
@@ -568,29 +524,15 @@ STAGE PLANS:
key expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string)
sort order: +++
Map-reduce partition columns: _col0 (type: string)
- Reduce Sink Vectorization:
- className: VectorReduceSinkOperator
- native: false
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- nativeConditionsNotMet: No DISTINCT columns IS false
Statistics: Num rows: 12 Data size: 4416 Basic stats: COMPLETE Column stats: NONE
- Execution mode: vectorized, llap
+ Execution mode: llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: []
- featureSupportInUse: []
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
- rowBatchContext:
- dataColumnCount: 2
- includeColumns: [0, 1]
- dataColumns: key:string, val:string
- partitionColumnCount: 0
- scratchColumnTypeNames: [bigint]
+ notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported
+ vectorized: false
Reducer 2
Execution mode: llap
Reduce Vectorization:
@@ -652,12 +594,12 @@ POSTHOOK: query: SELECT key, count(distinct val) FROM T1 GROUP BY key with rollu
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
#### A masked pattern was here ####
-1 0
-2 0
-3 0
-7 0
-8 0
-NULL 0
+1 1
+2 1
+3 1
+7 1
+8 2
+NULL 6
PREHOOK: query: CREATE TABLE T2(key1 STRING, key2 STRING, val INT) STORED AS ORC
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/results/clientpositive/llap/vector_groupby_sort_11.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_sort_11.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_sort_11.q.out
new file mode 100644
index 0000000..79ca6d9
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vector_groupby_sort_11.q.out
@@ -0,0 +1,996 @@
+PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@T1
+POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@T1
+PREHOOK: query: INSERT OVERWRITE TABLE T1 PARTITION (ds='1')
+SELECT * from src where key < 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@t1@ds=1
+POSTHOOK: query: INSERT OVERWRITE TABLE T1 PARTITION (ds='1')
+SELECT * from src where key < 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@t1@ds=1
+POSTHOOK: Lineage: t1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: t1 PARTITION(ds=1).val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+select count(distinct key) from T1
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+select count(distinct key) from T1
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:key:string, 1:val:string, 2:ds:string, 3:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0]
+ Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: FINAL
+ keyExpressions: col 0:string
+ native: false
+ vectorProcessingMode: STREAMING
+ projectedOutputColumnNums: []
+ keys: key (type: string)
+ mode: final
+ outputColumnNames: _col0
+ Statistics: Num rows: 6 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count(_col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFCount(col 0:string) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: [0]
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkEmptyKeyOperator
+ keyColumnNums: []
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: [0]
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled]
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0]
+ dataColumns: key:string, val:string
+ partitionColumnCount: 1
+ partitionColumns: ds:string
+ scratchColumnTypeNames: []
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder:
+ reduceColumnSortOrder:
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 1
+ dataColumns: VALUE._col0:bigint
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ native: false
+ vectorProcessingMode: GLOBAL
+ projectedOutputColumnNums: [0]
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(distinct key) from T1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t1@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct key) from T1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t1@ds=1
+#### A masked pattern was here ####
+6
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+select count(distinct key), count(1), count(key), sum(distinct key) from T1
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+select count(distinct key), count(1), count(key), sum(distinct key) from T1
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count(DISTINCT key), count(), count(key), sum(DISTINCT key)
+ bucketGroup: true
+ keys: key (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 5 Data size: 585 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Statistics: Num rows: 5 Data size: 585 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col2 (type: bigint), _col3 (type: bigint)
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported
+ vectorized: false
+ Reducer 2
+ Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: GROUPBY operator: DISTINCT not supported
+ vectorized: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(DISTINCT KEY._col0:0._col0), count(VALUE._col1), count(VALUE._col2), sum(DISTINCT KEY._col0:1._col0)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(distinct key), count(1), count(key), sum(distinct key) from T1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t1@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct key), count(1), count(key), sum(distinct key) from T1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t1@ds=1
+#### A masked pattern was here ####
+6 10 10 28.0
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+select count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+select count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count(DISTINCT key), count(), count(key), sum(DISTINCT key)
+ bucketGroup: true
+ keys: key (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 5 Data size: 585 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 5 Data size: 585 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col2 (type: bigint), _col3 (type: bigint)
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported
+ vectorized: false
+ Reducer 2
+ Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: GROUPBY operator: DISTINCT not supported
+ vectorized: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(DISTINCT KEY._col1:0._col0), count(VALUE._col1), count(VALUE._col2), sum(DISTINCT KEY._col1:1._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 5 Data size: 585 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 5 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 5 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t1@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t1@ds=1
+#### A masked pattern was here ####
+1 1 1 2.0
+1 1 1 4.0
+1 1 1 8.0
+1 1 1 9.0
+1 3 3 0.0
+1 3 3 5.0
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+select key, count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+select key, count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count(DISTINCT key), count(), count(key), sum(DISTINCT key)
+ bucketGroup: true
+ keys: key (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 5 Data size: 585 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 5 Data size: 585 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col2 (type: bigint), _col3 (type: bigint)
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported
+ vectorized: false
+ Reducer 2
+ Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: GROUPBY operator: DISTINCT not supported
+ vectorized: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(DISTINCT KEY._col1:0._col0), count(VALUE._col1), count(VALUE._col2), sum(DISTINCT KEY._col1:1._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 5 Data size: 585 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 5 Data size: 585 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select key, count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t1@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select key, count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t1@ds=1
+#### A masked pattern was here ####
+0 1 3 3 0.0
+2 1 1 1 2.0
+4 1 1 1 4.0
+5 1 3 3 5.0
+8 1 1 1 8.0
+9 1 1 1 9.0
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+select count(distinct key+key) from T1
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+select count(distinct key+key) from T1
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:key:string, 1:val:string, 2:ds:string, 3:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+ Select Operator
+ expressions: (UDFToDouble(key) + UDFToDouble(key)) (type: double)
+ outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [6]
+ selectExpressions: DoubleColAddDoubleColumn(col 4:double, col 5:double)(children: CastStringToDouble(col 0:string) -> 4:double, CastStringToDouble(col 0:string) -> 5:double) -> 6:double
+ Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 6:double
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: []
+ keys: _col0 (type: double)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: double)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: double)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkMultiKeyOperator
+ keyColumnNums: [0]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: []
+ Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled]
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0]
+ dataColumns: key:string, val:string
+ partitionColumnCount: 1
+ partitionColumns: ds:string
+ scratchColumnTypeNames: [double, double, double]
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: a
+ reduceColumnSortOrder: +
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 1
+ dataColumns: KEY._col0:double
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reduce Operator Tree:
+ Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ keyExpressions: col 0:double
+ native: false
+ vectorProcessingMode: MERGE_PARTIAL
+ projectedOutputColumnNums: []
+ keys: KEY._col0 (type: double)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count(_col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFCount(col 0:double) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: [0]
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkEmptyKeyOperator
+ keyColumnNums: []
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: [0]
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder:
+ reduceColumnSortOrder:
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 1
+ dataColumns: VALUE._col0:bigint
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ native: false
+ vectorProcessingMode: GLOBAL
+ projectedOutputColumnNums: [0]
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(distinct key+key) from T1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t1@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct key+key) from T1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t1@ds=1
+#### A masked pattern was here ####
+6
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+select count(distinct 1) from T1
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+select count(distinct 1) from T1
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:key:string, 1:val:string, 2:ds:string, 3:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+ Select Operator
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: []
+ Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: ConstantVectorExpression(val 1) -> 4:int
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: []
+ keys: 1 (type: int)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ keyColumnNums: [0]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: []
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled]
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: []
+ dataColumns: key:string, val:string
+ partitionColumnCount: 1
+ partitionColumns: ds:string
+ scratchColumnTypeNames: [bigint]
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: a
+ reduceColumnSortOrder: +
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 1
+ dataColumns: KEY._col0:int
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reduce Operator Tree:
+ Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ keyExpressions: col 0:int
+ native: false
+ vectorProcessingMode: MERGE_PARTIAL
+ projectedOutputColumnNums: []
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: []
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ Group By Vectorization:
+ aggregators: VectorUDAFCountStar(*) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: [0]
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkEmptyKeyOperator
+ keyColumnNums: []
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: [0]
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder:
+ reduceColumnSortOrder:
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 1
+ dataColumns: VALUE._col0:bigint
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ native: false
+ vectorProcessingMode: GLOBAL
+ projectedOutputColumnNums: [0]
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(distinct 1) from T1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t1@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct 1) from T1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t1@ds=1
+#### A masked pattern was here ####
+1
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+select count(distinct key) from T1
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+select count(distinct key) from T1
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:key:string, 1:val:string, 2:ds:string, 3:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0]
+ Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: key (type: string)
+ sort order: +
+ Map-reduce partition columns: key (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkStringOperator
+ keyColumnNums: [0]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: []
+ Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled]
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0]
+ dataColumns: key:string, val:string
+ partitionColumnCount: 1
+ partitionColumns: ds:string
+ scratchColumnTypeNames: []
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: a
+ reduceColumnSortOrder: +
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 1
+ dataColumns: KEY._col0:string
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reduce Operator Tree:
+ Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: COMPLETE
+ keyExpressions: col 0:string
+ native: false
+ vectorProcessingMode: STREAMING
+ projectedOutputColumnNums: []
+ keys: KEY._col0 (type: string)
+ mode: complete
+ outputColumnNames: _col0
+ Statistics: Num rows: 6 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkEmptyKeyOperator
+ keyColumnNums: []
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: [0]
+ Statistics: Num rows: 6 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: string)
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder:
+ reduceColumnSortOrder:
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 1
+ dataColumns: VALUE._col0:string
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFCount(col 0:string) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: COMPLETE
+ native: false
+ vectorProcessingMode: GLOBAL
+ projectedOutputColumnNums: [0]
+ mode: complete
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(distinct key) from T1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t1@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct key) from T1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t1@ds=1
+#### A masked pattern was here ####
+6
http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/results/clientpositive/llap/vector_groupby_sort_8.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_sort_8.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_sort_8.q.out
new file mode 100644
index 0000000..6c6986e
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vector_groupby_sort_8.q.out
@@ -0,0 +1,186 @@
+PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@T1
+POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@T1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1 PARTITION (ds='1')
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@t1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1 PARTITION (ds='1')
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@t1
+POSTHOOK: Output: default@t1@ds=1
+PREHOOK: query: INSERT OVERWRITE TABLE T1 PARTITION (ds='1') select key, val from T1 where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t1@ds=1
+PREHOOK: Output: default@t1@ds=1
+POSTHOOK: query: INSERT OVERWRITE TABLE T1 PARTITION (ds='1') select key, val from T1 where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t1@ds=1
+POSTHOOK: Output: default@t1@ds=1
+POSTHOOK: Lineage: t1 PARTITION(ds=1).key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: t1 PARTITION(ds=1).val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ]
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+select count(distinct key) from T1
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+select count(distinct key) from T1
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ Statistics: Num rows: 6 Data size: 1128 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:key:string, 1:val:string, 2:ds:string, 3:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0]
+ Statistics: Num rows: 6 Data size: 1128 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: FINAL
+ keyExpressions: col 0:string
+ native: false
+ vectorProcessingMode: STREAMING
+ projectedOutputColumnNums: []
+ keys: key (type: string)
+ mode: final
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(_col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFCount(col 0:string) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: [0]
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkEmptyKeyOperator
+ keyColumnNums: []
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: [0]
+ Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled]
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0]
+ dataColumns: key:string, val:string
+ partitionColumnCount: 1
+ partitionColumns: ds:string
+ scratchColumnTypeNames: []
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder:
+ reduceColumnSortOrder:
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 1
+ dataColumns: VALUE._col0:bigint
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ native: false
+ vectorProcessingMode: GLOBAL
+ projectedOutputColumnNums: [0]
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(distinct key) from T1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t1@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct key) from T1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t1@ds=1
+#### A masked pattern was here ####
+5
+PREHOOK: query: DROP TABLE T1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@t1
+PREHOOK: Output: default@t1
+POSTHOOK: query: DROP TABLE T1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: default@t1