You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@hive.apache.org by mm...@apache.org on 2018/03/30 07:19:47 UTC

[1/3] hive git commit: HIVE-19032: Vectorization: Disable GROUP BY aggregations with DISTINCT (Matt McCline, reviewed by Gopal Vijayaraghavan)

Repository: hive
Updated Branches:
  refs/heads/master 1974397f6 -> 470a2f998


http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/results/clientpositive/llap/vectorization_limit.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorization_limit.q.out b/ql/src/test/results/clientpositive/llap/vectorization_limit.q.out
new file mode 100644
index 0000000..c299796
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vectorization_limit.q.out
@@ -0,0 +1,952 @@
+WARNING: Comparing a bigint and a double may result in a loss of precision.
+PREHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: alltypesorc
+                  Statistics: Num rows: 12288 Data size: 183488 Basic stats: COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: ((UDFToDouble(cbigint) < cdouble) and (cint > 0)) (type: boolean)
+                    Statistics: Num rows: 1365 Data size: 20400 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: cbigint (type: bigint), cdouble (type: double)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 1365 Data size: 16320 Basic stats: COMPLETE Column stats: COMPLETE
+                      Limit
+                        Number of rows: 7
+                        Statistics: Num rows: 7 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+                        File Output Operator
+                          compressed: false
+                          Statistics: Num rows: 7 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+                          table:
+                              input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                              output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: []
+                featureSupportInUse: []
+                inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 7
+      Processor Tree:
+        ListSink
+
+WARNING: Comparing a bigint and a double may result in a loss of precision.
+PREHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+-1887561756	-10011.0
+-1887561756	-13877.0
+-1887561756	-2281.0
+-1887561756	-8881.0
+-1887561756	10361.0
+-1887561756	1839.0
+-1887561756	9531.0
+PREHOOK: query: explain vectorization detail
+select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: alltypesorc
+                  Statistics: Num rows: 12288 Data size: 146796 Basic stats: COMPLETE Column stats: COMPLETE
+                  TableScan Vectorization:
+                      native: true
+                      vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+                  Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: SelectColumnIsNotNull(col 0:tinyint)
+                    predicate: ctinyint is not null (type: boolean)
+                    Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: ctinyint (type: tinyint), cdouble (type: double), csmallint (type: smallint)
+                      outputColumnNames: _col0, _col1, _col2
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumnNums: [0, 5, 1]
+                      Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: tinyint), _col1 (type: double)
+                        sort order: ++
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkObjectHashOperator
+                            keyColumnNums: [0, 5]
+                            native: true
+                            nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                            valueColumnNums: [1]
+                        Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE
+                        TopN Hash Memory Usage: 0.3
+                        value expressions: _col2 (type: smallint)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: []
+                featureSupportInUse: []
+                inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 12
+                    includeColumns: [0, 1, 5]
+                    dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: aa
+                reduceColumnSortOrder: ++
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 3
+                    dataColumns: KEY.reducesinkkey0:tinyint, KEY.reducesinkkey1:double, VALUE._col0:smallint
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double), VALUE._col0 (type: smallint)
+                outputColumnNames: _col0, _col1, _col2
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumnNums: [0, 1, 2]
+                Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE
+                Limit
+                  Number of rows: 20
+                  Limit Vectorization:
+                      className: VectorLimitOperator
+                      native: true
+                  Statistics: Num rows: 20 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    File Sink Vectorization:
+                        className: VectorFileSinkOperator
+                        native: false
+                    Statistics: Num rows: 20 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 20
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+-64	-10462.0	-10462
+-64	-15920.0	-15920
+-64	-1600.0	-1600
+-64	-200.0	-200
+-64	-2919.0	-2919
+-64	-3097.0	-3097
+-64	-3586.0	-3586
+-64	-4018.0	-4018
+-64	-4040.0	-4040
+-64	-4803.0	-4803
+-64	-6907.0	-6907
+-64	-7196.0	-7196
+-64	-7196.0	-7196
+-64	-7196.0	-7196
+-64	-7196.0	-7196
+-64	-7196.0	-7196
+-64	-7196.0	-7196
+-64	-7196.0	-7196
+-64	-8080.0	-8080
+-64	-9842.0	-9842
+PREHOOK: query: explain vectorization detail
+select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: alltypesorc
+                  Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE
+                  TableScan Vectorization:
+                      native: true
+                      vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+                  Select Operator
+                    expressions: ctinyint (type: tinyint), (cdouble + 1.0D) (type: double)
+                    outputColumnNames: _col0, _col1
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumnNums: [0, 13]
+                        selectExpressions: DoubleColAddDoubleScalar(col 5:double, val 1.0) -> 13:double
+                    Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: sum(_col1), count(_col1)
+                      Group By Vectorization:
+                          aggregators: VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFCount(col 13:double) -> bigint
+                          className: VectorGroupByOperator
+                          groupByMode: HASH
+                          keyExpressions: col 0:tinyint
+                          native: false
+                          vectorProcessingMode: HASH
+                          projectedOutputColumnNums: [0, 1]
+                      keys: _col0 (type: tinyint)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2
+                      Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: tinyint)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: tinyint)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkObjectHashOperator
+                            keyColumnNums: [0]
+                            native: true
+                            nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                            partitionColumnNums: [0]
+                            valueColumnNums: [1, 2]
+                        Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE
+                        TopN Hash Memory Usage: 0.3
+                        value expressions: _col1 (type: double), _col2 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: []
+                featureSupportInUse: []
+                inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 12
+                    includeColumns: [0, 5]
+                    dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: [double]
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: a
+                reduceColumnSortOrder: +
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 3
+                    dataColumns: KEY._col0:tinyint, VALUE._col0:double, VALUE._col1:bigint
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0), count(VALUE._col1)
+                Group By Vectorization:
+                    aggregators: VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFCountMerge(col 2:bigint) -> bigint
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    keyExpressions: col 0:tinyint
+                    native: false
+                    vectorProcessingMode: MERGE_PARTIAL
+                    projectedOutputColumnNums: [0, 1]
+                keys: KEY._col0 (type: tinyint)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col0 (type: tinyint), (_col1 / _col2) (type: double)
+                  outputColumnNames: _col0, _col1
+                  Select Vectorization:
+                      className: VectorSelectOperator
+                      native: true
+                      projectedOutputColumnNums: [0, 3]
+                      selectExpressions: DoubleColDivideLongColumn(col 1:double, col 2:bigint) -> 3:double
+                  Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE
+                  Limit
+                    Number of rows: 20
+                    Limit Vectorization:
+                        className: VectorLimitOperator
+                        native: true
+                    Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE
+                    File Output Operator
+                      compressed: false
+                      File Sink Vectorization:
+                          className: VectorFileSinkOperator
+                          native: false
+                      Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE
+                      table:
+                          input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 20
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+-46	3033.55
+-47	-574.6428571428571
+-48	1672.909090909091
+-49	768.7659574468086
+-50	-960.0192307692307
+-51	-96.46341463414635
+-52	2810.705882352941
+-53	-532.7567567567568
+-54	2712.7272727272725
+-55	2385.595744680851
+-56	2595.818181818182
+-57	1867.0535714285713
+-58	3483.2444444444445
+-59	318.27272727272725
+-60	1071.82
+-61	914.3404255319149
+-62	245.69387755102042
+-63	2178.7272727272725
+-64	373.52941176470586
+NULL	9370.0945309795
+PREHOOK: query: explain vectorization detail
+select distinct(ctinyint) from alltypesorc limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select distinct(ctinyint) from alltypesorc limit 20
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: alltypesorc
+                  Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE
+                  TableScan Vectorization:
+                      native: true
+                      vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+                  Select Operator
+                    expressions: ctinyint (type: tinyint)
+                    outputColumnNames: ctinyint
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumnNums: [0]
+                    Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      Group By Vectorization:
+                          className: VectorGroupByOperator
+                          groupByMode: HASH
+                          keyExpressions: col 0:tinyint
+                          native: false
+                          vectorProcessingMode: HASH
+                          projectedOutputColumnNums: []
+                      keys: ctinyint (type: tinyint)
+                      mode: hash
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 128 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: tinyint)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: tinyint)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkLongOperator
+                            keyColumnNums: [0]
+                            native: true
+                            nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                            valueColumnNums: []
+                        Statistics: Num rows: 128 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE
+                        TopN Hash Memory Usage: 0.3
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: []
+                featureSupportInUse: []
+                inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 12
+                    includeColumns: [0]
+                    dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: a
+                reduceColumnSortOrder: +
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 1
+                    dataColumns: KEY._col0:tinyint
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+            Reduce Operator Tree:
+              Group By Operator
+                Group By Vectorization:
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    keyExpressions: col 0:tinyint
+                    native: false
+                    vectorProcessingMode: MERGE_PARTIAL
+                    projectedOutputColumnNums: []
+                keys: KEY._col0 (type: tinyint)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 128 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE
+                Limit
+                  Number of rows: 20
+                  Limit Vectorization:
+                      className: VectorLimitOperator
+                      native: true
+                  Statistics: Num rows: 20 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    File Sink Vectorization:
+                        className: VectorFileSinkOperator
+                        native: false
+                    Statistics: Num rows: 20 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 20
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select distinct(ctinyint) from alltypesorc limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct(ctinyint) from alltypesorc limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+-46
+-47
+-48
+-49
+-50
+-51
+-52
+-53
+-54
+-55
+-56
+-57
+-58
+-59
+-60
+-61
+-62
+-63
+-64
+NULL
+PREHOOK: query: explain vectorization detail
+select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: alltypesorc
+                  Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE
+                  TableScan Vectorization:
+                      native: true
+                      vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+                  Select Operator
+                    expressions: ctinyint (type: tinyint), cdouble (type: double)
+                    outputColumnNames: ctinyint, cdouble
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumnNums: [0, 5]
+                    Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      Group By Vectorization:
+                          className: VectorGroupByOperator
+                          groupByMode: HASH
+                          keyExpressions: col 0:tinyint, col 5:double
+                          native: false
+                          vectorProcessingMode: HASH
+                          projectedOutputColumnNums: []
+                      keys: ctinyint (type: tinyint), cdouble (type: double)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 6144 Data size: 55052 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: tinyint), _col1 (type: double)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: tinyint)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkObjectHashOperator
+                            keyColumnNums: [0, 1]
+                            native: true
+                            nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                            partitionColumnNums: [0]
+                            valueColumnNums: []
+                        Statistics: Num rows: 6144 Data size: 55052 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: []
+                featureSupportInUse: []
+                inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 12
+                    includeColumns: [0, 5]
+                    dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: aa
+                reduceColumnSortOrder: ++
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    dataColumns: KEY._col0:tinyint, KEY._col1:double
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+            Reduce Operator Tree:
+              Group By Operator
+                Group By Vectorization:
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    keyExpressions: col 0:tinyint, col 1:double
+                    native: false
+                    vectorProcessingMode: MERGE_PARTIAL
+                    projectedOutputColumnNums: []
+                keys: KEY._col0 (type: tinyint), KEY._col1 (type: double)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 6144 Data size: 55052 Basic stats: COMPLETE Column stats: COMPLETE
+                Group By Operator
+                  aggregations: count(_col1)
+                  Group By Vectorization:
+                      aggregators: VectorUDAFCount(col 1:double) -> bigint
+                      className: VectorGroupByOperator
+                      groupByMode: COMPLETE
+                      keyExpressions: col 0:tinyint
+                      native: false
+                      vectorProcessingMode: STREAMING
+                      projectedOutputColumnNums: [0]
+                  keys: _col0 (type: tinyint)
+                  mode: complete
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE
+                  Limit
+                    Number of rows: 20
+                    Limit Vectorization:
+                        className: VectorLimitOperator
+                        native: true
+                    Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE
+                    File Output Operator
+                      compressed: false
+                      File Sink Vectorization:
+                          className: VectorFileSinkOperator
+                          native: false
+                      Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE
+                      table:
+                          input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 20
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+-46	24
+-47	22
+-48	29
+-49	26
+-50	30
+-51	21
+-52	33
+-53	22
+-54	26
+-55	29
+-56	36
+-57	35
+-58	23
+-59	31
+-60	27
+-61	25
+-62	27
+-63	19
+-64	24
+NULL	2932
+PREHOOK: query: explain vectorization detail
+select ctinyint,cdouble from alltypesorc order by ctinyint limit 0
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select ctinyint,cdouble from alltypesorc order by ctinyint limit 0
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: 0
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select ctinyint,cdouble from alltypesorc order by ctinyint limit 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select ctinyint,cdouble from alltypesorc order by ctinyint limit 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+PREHOOK: query: explain vectorization detail
+select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: alltypesorc
+                  Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE
+                  TableScan Vectorization:
+                      native: true
+                      vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+                  Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: SelectColumnIsNotNull(col 0:tinyint)
+                    predicate: ctinyint is not null (type: boolean)
+                    Statistics: Num rows: 9173 Data size: 82188 Basic stats: COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: sum(ctinyint)
+                      Group By Vectorization:
+                          aggregators: VectorUDAFSumLong(col 0:tinyint) -> bigint
+                          className: VectorGroupByOperator
+                          groupByMode: HASH
+                          keyExpressions: col 5:double
+                          native: false
+                          vectorProcessingMode: HASH
+                          projectedOutputColumnNums: [0]
+                      keys: cdouble (type: double)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: double)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: double)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkMultiKeyOperator
+                            keyColumnNums: [0]
+                            native: true
+                            nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                            valueColumnNums: [1]
+                        Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: []
+                featureSupportInUse: []
+                inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 12
+                    includeColumns: [0, 5]
+                    dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: a
+                reduceColumnSortOrder: +
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    dataColumns: KEY._col0:double, VALUE._col0:bigint
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                Group By Vectorization:
+                    aggregators: VectorUDAFSumLong(col 1:bigint) -> bigint
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    keyExpressions: col 0:double
+                    native: false
+                    vectorProcessingMode: MERGE_PARTIAL
+                    projectedOutputColumnNums: [0]
+                keys: KEY._col0 (type: double)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col1 (type: bigint), _col0 (type: double)
+                  sort order: ++
+                  Reduce Sink Vectorization:
+                      className: VectorReduceSinkObjectHashOperator
+                      keyColumnNums: [1, 0]
+                      native: true
+                      nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                      valueColumnNums: []
+                  Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE
+                  TopN Hash Memory Usage: 0.3
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: aa
+                reduceColumnSortOrder: ++
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    dataColumns: KEY.reducesinkkey0:bigint, KEY.reducesinkkey1:double
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey1 (type: double), KEY.reducesinkkey0 (type: bigint)
+                outputColumnNames: _col0, _col1
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumnNums: [1, 0]
+                Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE
+                Limit
+                  Number of rows: 20
+                  Limit Vectorization:
+                      className: VectorLimitOperator
+                      native: true
+                  Statistics: Num rows: 20 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    File Sink Vectorization:
+                        className: VectorFileSinkOperator
+                        native: false
+                    Statistics: Num rows: 20 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 20
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+-10462.0	-64
+-1121.0	-89
+-11322.0	-101
+-11492.0	-78
+-15920.0	-64
+-4803.0	-64
+-6907.0	-64
+-7196.0	-2009
+-8080.0	-64
+-8118.0	-80
+-9842.0	-64
+10496.0	-67
+15601.0	-1733
+3520.0	-86
+4811.0	-115
+5241.0	-80
+557.0	-75
+7705.0	-88
+9452.0	-76
+NULL	-32768

http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out b/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out
new file mode 100644
index 0000000..f19e2ca
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out
@@ -0,0 +1,362 @@
+PREHOOK: query: create table dtest(a int, b int) clustered by (a) sorted by (a) into 1 buckets stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dtest
+POSTHOOK: query: create table dtest(a int, b int) clustered by (a) sorted by (a) into 1 buckets stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@dtest
+PREHOOK: query: insert into table dtest select c,b from (select array(300,300,300,300,300) as a, 1 as b from src order by a limit 1) y lateral view  explode(a) t1 as c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dtest
+POSTHOOK: query: insert into table dtest select c,b from (select array(300,300,300,300,300) as a, 1 as b from src order by a limit 1) y lateral view  explode(a) t1 as c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dtest
+POSTHOOK: Lineage: dtest.a SCRIPT []
+POSTHOOK: Lineage: dtest.b SIMPLE []
+PREHOOK: query: explain vectorization detail
+select sum(distinct a), count(distinct a) from dtest
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select sum(distinct a), count(distinct a) from dtest
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: dtest
+                  Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
+                  TableScan Vectorization:
+                      native: true
+                      vectorizationSchemaColumns: [0:a:int, 1:b:int, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+                  Select Operator
+                    expressions: a (type: int)
+                    outputColumnNames: a
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumnNums: [0]
+                    Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      Group By Vectorization:
+                          className: VectorGroupByOperator
+                          groupByMode: FINAL
+                          keyExpressions: col 0:int
+                          native: false
+                          vectorProcessingMode: STREAMING
+                          projectedOutputColumnNums: []
+                      keys: a (type: int)
+                      mode: final
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+                      Group By Operator
+                        aggregations: sum(_col0), count(_col0)
+                        Group By Vectorization:
+                            aggregators: VectorUDAFSumLong(col 0:int) -> bigint, VectorUDAFCount(col 0:int) -> bigint
+                            className: VectorGroupByOperator
+                            groupByMode: HASH
+                            native: false
+                            vectorProcessingMode: HASH
+                            projectedOutputColumnNums: [0, 1]
+                        mode: hash
+                        outputColumnNames: _col0, _col1
+                        Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+                        Reduce Output Operator
+                          sort order: 
+                          Reduce Sink Vectorization:
+                              className: VectorReduceSinkEmptyKeyOperator
+                              keyColumnNums: []
+                              native: true
+                              nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                              valueColumnNums: [0, 1]
+                          Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+                          value expressions: _col0 (type: bigint), _col1 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: []
+                featureSupportInUse: []
+                inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0]
+                    dataColumns: a:int, b:int
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: 
+                reduceColumnSortOrder: 
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    dataColumns: VALUE._col0:bigint, VALUE._col1:bigint
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0), count(VALUE._col1)
+                Group By Vectorization:
+                    aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint, VectorUDAFCountMerge(col 1:bigint) -> bigint
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    native: false
+                    vectorProcessingMode: GLOBAL
+                    projectedOutputColumnNums: [0, 1]
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  File Sink Vectorization:
+                      className: VectorFileSinkOperator
+                      native: false
+                  Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select sum(distinct a), count(distinct a) from dtest
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dtest
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(distinct a), count(distinct a) from dtest
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dtest
+#### A masked pattern was here ####
+300	1
+PREHOOK: query: explain vectorization detail
+select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: alltypesorc
+                  Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE
+                  TableScan Vectorization:
+                      native: true
+                      vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+                  Select Operator
+                    expressions: cint (type: int)
+                    outputColumnNames: cint
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumnNums: [2]
+                    Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      Group By Vectorization:
+                          className: VectorGroupByOperator
+                          groupByMode: HASH
+                          keyExpressions: col 2:int
+                          native: false
+                          vectorProcessingMode: HASH
+                          projectedOutputColumnNums: []
+                      keys: cint (type: int)
+                      mode: hash
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 6030 Data size: 18008 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkLongOperator
+                            keyColumnNums: [0]
+                            native: true
+                            nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                            valueColumnNums: []
+                        Statistics: Num rows: 6030 Data size: 18008 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: []
+                featureSupportInUse: []
+                inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 12
+                    includeColumns: [2]
+                    dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: a
+                reduceColumnSortOrder: +
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 1
+                    dataColumns: KEY._col0:int
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+            Reduce Operator Tree:
+              Group By Operator
+                Group By Vectorization:
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    keyExpressions: col 0:int
+                    native: false
+                    vectorProcessingMode: MERGE_PARTIAL
+                    projectedOutputColumnNums: []
+                keys: KEY._col0 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 6030 Data size: 18008 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col0 (type: int), UDFToDouble(_col0) (type: double), (UDFToDouble(_col0) * UDFToDouble(_col0)) (type: double)
+                  outputColumnNames: _col0, _col1, _col2
+                  Select Vectorization:
+                      className: VectorSelectOperator
+                      native: true
+                      projectedOutputColumnNums: [0, 1, 4]
+                      selectExpressions: CastLongToDouble(col 0:int) -> 1:double, DoubleColMultiplyDoubleColumn(col 2:double, col 3:double)(children: CastLongToDouble(col 0:int) -> 2:double, CastLongToDouble(col 0:int) -> 3:double) -> 4:double
+                  Statistics: Num rows: 6030 Data size: 18008 Basic stats: COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    aggregations: sum(_col0), count(_col0), sum(_col2), sum(_col1)
+                    Group By Vectorization:
+                        aggregators: VectorUDAFSumLong(col 0:int) -> bigint, VectorUDAFCount(col 0:int) -> bigint, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFSumDouble(col 1:double) -> double
+                        className: VectorGroupByOperator
+                        groupByMode: HASH
+                        native: false
+                        vectorProcessingMode: HASH
+                        projectedOutputColumnNums: [0, 1, 2, 3]
+                    mode: hash
+                    outputColumnNames: _col0, _col1, _col2, _col3
+                    Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      sort order: 
+                      Reduce Sink Vectorization:
+                          className: VectorReduceSinkEmptyKeyOperator
+                          keyColumnNums: []
+                          native: true
+                          nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                          valueColumnNums: [0, 1, 2, 3]
+                      Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+                      value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: double)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: 
+                reduceColumnSortOrder: 
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 4
+                    dataColumns: VALUE._col0:bigint, VALUE._col1:bigint, VALUE._col2:double, VALUE._col3:double
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3)
+                Group By Vectorization:
+                    aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint, VectorUDAFCountMerge(col 1:bigint) -> bigint, VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFSumDouble(col 3:double) -> double
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    native: false
+                    vectorProcessingMode: GLOBAL
+                    projectedOutputColumnNums: [0, 1, 2, 3]
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3
+                Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col0 (type: bigint), _col1 (type: bigint), (UDFToDouble(_col0) / _col1) (type: double), power(((_col2 - ((_col3 * _col3) / _col1)) / _col1), 0.5) (type: double)
+                  outputColumnNames: _col0, _col1, _col2, _col3
+                  Select Vectorization:
+                      className: VectorSelectOperator
+                      native: true
+                      projectedOutputColumnNums: [0, 1, 5, 4]
+                      selectExpressions: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: CastLongToDouble(col 0:bigint) -> 4:double) -> 5:double, FuncPowerDoubleToDouble(col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 6:double) -> 4:double) -> 6:double) -> 4:double
+                  Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    File Sink Vectorization:
+                        className: VectorFileSinkOperator
+                        native: false
+                    Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+-3482841611	6082	-572647.4204209142	6.153814687328984E8

http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/results/clientpositive/parquet_vectorization_limit.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_limit.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_limit.q.out
index 59db9d1..8a81b34 100644
--- a/ql/src/test/results/clientpositive/parquet_vectorization_limit.q.out
+++ b/ql/src/test/results/clientpositive/parquet_vectorization_limit.q.out
@@ -470,27 +470,12 @@ STAGE PLANS:
           TableScan
             alias: alltypesparquet
             Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
-            TableScan Vectorization:
-                native: true
-                vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
             Select Operator
               expressions: ctinyint (type: tinyint), cdouble (type: double)
               outputColumnNames: ctinyint, cdouble
-              Select Vectorization:
-                  className: VectorSelectOperator
-                  native: true
-                  projectedOutputColumnNums: [0, 5]
               Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
                 aggregations: count(DISTINCT cdouble)
-                Group By Vectorization:
-                    aggregators: VectorUDAFCount(col 5:double) -> bigint
-                    className: VectorGroupByOperator
-                    groupByMode: HASH
-                    keyExpressions: col 0:tinyint, col 5:double
-                    native: false
-                    vectorProcessingMode: HASH
-                    projectedOutputColumnNums: [0]
                 keys: ctinyint (type: tinyint), cdouble (type: double)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
@@ -499,29 +484,14 @@ STAGE PLANS:
                   key expressions: _col0 (type: tinyint), _col1 (type: double)
                   sort order: ++
                   Map-reduce partition columns: _col0 (type: tinyint)
-                  Reduce Sink Vectorization:
-                      className: VectorReduceSinkOperator
-                      native: false
-                      nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                      nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No DISTINCT columns IS false
                   Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE
                   TopN Hash Memory Usage: 0.3
-      Execution mode: vectorized
       Map Vectorization:
           enabled: true
           enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
-          inputFormatFeatureSupport: []
-          featureSupportInUse: []
           inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
-          allNative: false
-          usesVectorUDFAdaptor: false
-          vectorized: true
-          rowBatchContext:
-              dataColumnCount: 12
-              includeColumns: [0, 5]
-              dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
-              partitionColumnCount: 0
-              scratchColumnTypeNames: []
+          notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported
+          vectorized: false
       Reduce Vectorization:
           enabled: false
           enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true

http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/results/clientpositive/vector_count.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_count.q.out b/ql/src/test/results/clientpositive/vector_count.q.out
index 7e30870..85d5926 100644
--- a/ql/src/test/results/clientpositive/vector_count.q.out
+++ b/ql/src/test/results/clientpositive/vector_count.q.out
@@ -62,26 +62,12 @@ STAGE PLANS:
           TableScan
             alias: abcd
             Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE
-            TableScan Vectorization:
-                native: true
             Select Operator
               expressions: a (type: int), b (type: int), c (type: int), d (type: int)
               outputColumnNames: a, b, c, d
-              Select Vectorization:
-                  className: VectorSelectOperator
-                  native: true
-                  projectedOutputColumnNums: [0, 1, 2, 3]
               Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
                 aggregations: count(DISTINCT b), count(DISTINCT c), sum(d)
-                Group By Vectorization:
-                    aggregators: VectorUDAFCount(col 1:int) -> bigint, VectorUDAFCount(col 2:int) -> bigint, VectorUDAFSumLong(col 3:int) -> bigint
-                    className: VectorGroupByOperator
-                    groupByMode: HASH
-                    keyExpressions: col 0:int, col 1:int, col 2:int
-                    native: false
-                    vectorProcessingMode: HASH
-                    projectedOutputColumnNums: [0, 1, 2]
                 keys: a (type: int), b (type: int), c (type: int)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
@@ -90,23 +76,14 @@ STAGE PLANS:
                   key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int)
                   sort order: +++
                   Map-reduce partition columns: _col0 (type: int)
-                  Reduce Sink Vectorization:
-                      className: VectorReduceSinkOperator
-                      native: false
-                      nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                      nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No DISTINCT columns IS false
                   Statistics: Num rows: 7 Data size: 100 Basic stats: COMPLETE Column stats: NONE
                   value expressions: _col5 (type: bigint)
-      Execution mode: vectorized
       Map Vectorization:
           enabled: true
           enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
-          inputFormatFeatureSupport: []
-          featureSupportInUse: []
           inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-          allNative: false
-          usesVectorUDFAdaptor: false
-          vectorized: true
+          notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported
+          vectorized: false
       Reduce Vectorization:
           enabled: false
           enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
@@ -182,7 +159,7 @@ STAGE PLANS:
           enabled: true
           enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
           inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-          notVectorizedReason: GROUPBY operator: Aggregations with > 1 parameter are not supported count([Column[a], Column[b]])
+          notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported
           vectorized: false
       Reduce Vectorization:
           enabled: false

http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/results/clientpositive/vectorization_limit.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vectorization_limit.q.out b/ql/src/test/results/clientpositive/vectorization_limit.q.out
index 8226fd4..517cb07 100644
--- a/ql/src/test/results/clientpositive/vectorization_limit.q.out
+++ b/ql/src/test/results/clientpositive/vectorization_limit.q.out
@@ -470,27 +470,12 @@ STAGE PLANS:
           TableScan
             alias: alltypesorc
             Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
-            TableScan Vectorization:
-                native: true
-                vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
             Select Operator
               expressions: ctinyint (type: tinyint), cdouble (type: double)
               outputColumnNames: ctinyint, cdouble
-              Select Vectorization:
-                  className: VectorSelectOperator
-                  native: true
-                  projectedOutputColumnNums: [0, 5]
               Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
                 aggregations: count(DISTINCT cdouble)
-                Group By Vectorization:
-                    aggregators: VectorUDAFCount(col 5:double) -> bigint
-                    className: VectorGroupByOperator
-                    groupByMode: HASH
-                    keyExpressions: col 0:tinyint, col 5:double
-                    native: false
-                    vectorProcessingMode: HASH
-                    projectedOutputColumnNums: [0]
                 keys: ctinyint (type: tinyint), cdouble (type: double)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
@@ -499,29 +484,14 @@ STAGE PLANS:
                   key expressions: _col0 (type: tinyint), _col1 (type: double)
                   sort order: ++
                   Map-reduce partition columns: _col0 (type: tinyint)
-                  Reduce Sink Vectorization:
-                      className: VectorReduceSinkOperator
-                      native: false
-                      nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                      nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No DISTINCT columns IS false
                   Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TopN Hash Memory Usage: 0.3
-      Execution mode: vectorized
       Map Vectorization:
           enabled: true
           enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
-          inputFormatFeatureSupport: []
-          featureSupportInUse: []
           inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-          allNative: false
-          usesVectorUDFAdaptor: false
-          vectorized: true
-          rowBatchContext:
-              dataColumnCount: 12
-              includeColumns: [0, 5]
-              dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
-              partitionColumnCount: 0
-              scratchColumnTypeNames: []
+          notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported
+          vectorized: false
       Reduce Vectorization:
           enabled: false
           enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true

http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out b/ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out
index 07576fd..3847481 100644
--- a/ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out
+++ b/ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out
@@ -37,28 +37,13 @@ STAGE PLANS:
           TableScan
             alias: dtest
             Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE
-            TableScan Vectorization:
-                native: true
-                vectorizationSchemaColumns: [0:a:int, 1:b:int, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
             Select Operator
               expressions: a (type: int)
               outputColumnNames: a
-              Select Vectorization:
-                  className: VectorSelectOperator
-                  native: true
-                  projectedOutputColumnNums: [0]
               Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
                 aggregations: sum(DISTINCT a), count(DISTINCT a)
                 bucketGroup: true
-                Group By Vectorization:
-                    aggregators: VectorUDAFSumLong(col 0:int) -> bigint, VectorUDAFCount(col 0:int) -> bigint
-                    className: VectorGroupByOperator
-                    groupByMode: HASH
-                    keyExpressions: col 0:int
-                    native: false
-                    vectorProcessingMode: HASH
-                    projectedOutputColumnNums: [0, 1]
                 keys: a (type: int)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
@@ -66,28 +51,13 @@ STAGE PLANS:
                 Reduce Output Operator
                   key expressions: _col0 (type: int)
                   sort order: +
-                  Reduce Sink Vectorization:
-                      className: VectorReduceSinkOperator
-                      native: false
-                      nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                      nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No DISTINCT columns IS false
                   Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE
-      Execution mode: vectorized
       Map Vectorization:
           enabled: true
           enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
-          inputFormatFeatureSupport: []
-          featureSupportInUse: []
           inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-          allNative: false
-          usesVectorUDFAdaptor: false
-          vectorized: true
-          rowBatchContext:
-              dataColumnCount: 2
-              includeColumns: [0]
-              dataColumns: a:int, b:int
-              partitionColumnCount: 0
-              scratchColumnTypeNames: []
+          notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported
+          vectorized: false
       Reduce Vectorization:
           enabled: false
           enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
@@ -142,28 +112,12 @@ STAGE PLANS:
           TableScan
             alias: alltypesorc
             Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
-            TableScan Vectorization:
-                native: true
-                vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
             Select Operator
               expressions: cint (type: int), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double)
               outputColumnNames: _col0, _col1, _col2
-              Select Vectorization:
-                  className: VectorSelectOperator
-                  native: true
-                  projectedOutputColumnNums: [2, 13, 16]
-                  selectExpressions: CastLongToDouble(col 2:int) -> 13:double, DoubleColMultiplyDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 2:int) -> 14:double, CastLongToDouble(col 2:int) -> 15:double) -> 16:double
               Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
                 aggregations: sum(DISTINCT _col0), count(DISTINCT _col0), sum(DISTINCT _col2), sum(DISTINCT _col1)
-                Group By Vectorization:
-                    aggregators: VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFCount(col 2:int) -> bigint, VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFSumDouble(col 13:double) -> double
-                    className: VectorGroupByOperator
-                    groupByMode: HASH
-                    keyExpressions: col 2:int, col 16:double, col 13:double
-                    native: false
-                    vectorProcessingMode: HASH
-                    projectedOutputColumnNums: [0, 1, 2, 3]
                 keys: _col0 (type: int), _col2 (type: double), _col1 (type: double)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
@@ -171,28 +125,13 @@ STAGE PLANS:
                 Reduce Output Operator
                   key expressions: _col0 (type: int), _col1 (type: double), _col2 (type: double)
                   sort order: +++
-                  Reduce Sink Vectorization:
-                      className: VectorReduceSinkOperator
-                      native: false
-                      nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                      nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No DISTINCT columns IS false
                   Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
-      Execution mode: vectorized
       Map Vectorization:
           enabled: true
           enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
-          inputFormatFeatureSupport: []
-          featureSupportInUse: []
           inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-          allNative: false
-          usesVectorUDFAdaptor: false
-          vectorized: true
-          rowBatchContext:
-              dataColumnCount: 12
-              includeColumns: [2]
-              dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
-              partitionColumnCount: 0
-              scratchColumnTypeNames: [double, double, double, double]
+          notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported
+          vectorized: false
       Reduce Vectorization:
           enabled: false
           enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true

[3/3] hive git commit: HIVE-19032: Vectorization: Disable GROUP BY aggregations with DISTINCT (Matt McCline, reviewed by Gopal Vijayaraghavan)

Posted by mm...@apache.org.

HIVE-19032: Vectorization: Disable GROUP BY aggregations with DISTINCT (Matt McCline, reviewed by Gopal Vijayaraghavan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/470a2f99
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/470a2f99
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/470a2f99

Branch: refs/heads/master
Commit: 470a2f998494ef2a78e1424efae70d9eb1b17447
Parents: 1974397
Author: Matt McCline <mm...@hortonworks.com>
Authored: Fri Mar 30 02:19:24 2018 -0500
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Fri Mar 30 02:19:24 2018 -0500

----------------------------------------------------------------------
 .../test/resources/testconfiguration.properties |   4 +
 .../hive/ql/optimizer/physical/Vectorizer.java  |   6 +-
 .../test/queries/clientpositive/groupby_cube1.q |   1 +
 .../clientpositive/groupby_grouping_id1.q       |   2 +
 .../clientpositive/groupby_grouping_id2.q       |   1 +
 .../clientpositive/groupby_grouping_id3.q       |   2 +
 .../clientpositive/groupby_grouping_sets1.q     |   1 +
 .../clientpositive/groupby_grouping_sets2.q     |   1 +
 .../clientpositive/groupby_grouping_sets3.q     |   1 +
 .../clientpositive/groupby_grouping_sets4.q     |   1 +
 .../clientpositive/groupby_grouping_sets5.q     |   1 +
 .../clientpositive/groupby_grouping_sets6.q     |   2 +
 .../groupby_grouping_sets_grouping.q            |   1 +
 .../queries/clientpositive/groupby_rollup1.q    |   1 +
 .../queries/clientpositive/groupby_sort_11.q    |   3 +
 .../queries/clientpositive/groupby_sort_8.q     |   3 +
 .../clientpositive/vector_groupby_cube1.q       |   2 +-
 .../vector_groupby_grouping_sets_grouping.q     |  29 +
 .../clientpositive/vector_groupby_sort_11.q     |  50 +
 .../clientpositive/vector_groupby_sort_8.q      |  24 +
 .../clientpositive/groupby_sort_11.q.out        |   4 +-
 .../clientpositive/llap/vector_count.q.out      |  30 +-
 .../llap/vector_groupby_cube1.q.out             | 501 +++++++++-
 .../vector_groupby_grouping_sets_grouping.q.out | 463 +++++++++
 .../llap/vector_groupby_rollup1.q.out           |  94 +-
 .../llap/vector_groupby_sort_11.q.out           | 996 +++++++++++++++++++
 .../llap/vector_groupby_sort_8.q.out            | 186 ++++
 .../llap/vectorization_limit.q.out              | 952 ++++++++++++++++++
 .../llap/vectorized_distinct_gby.q.out          | 362 +++++++
 .../parquet_vectorization_limit.q.out           |  34 +-
 .../results/clientpositive/vector_count.q.out   |  29 +-
 .../clientpositive/vectorization_limit.q.out    |  34 +-
 .../vectorized_distinct_gby.q.out               |  69 +-
 33 files changed, 3601 insertions(+), 289 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 937ea79..a42ae80 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -352,6 +352,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\
   vector_decimal_udf.q,\
   vector_decimal_udf2.q,\
   vector_distinct_2.q,\
+  vectorized_distinct_gby.q,\
   vector_elt.q,\
   vector_groupby4.q,\
   vector_groupby6.q,\
@@ -420,6 +421,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\
   vectorization_8.q,\
   vectorization_9.q,\
   vectorization_decimal_date.q,\
+  vectorization_limit.q,\
   vectorization_nested_udf.q,\
   vectorization_not.q,\
   vectorization_part.q,\
@@ -747,6 +749,8 @@ minillaplocal.query.files=\
   vector_groupby_grouping_sets_limit.q,\
   vector_groupby_grouping_window.q,\
   vector_groupby_rollup1.q,\
+  vector_groupby_sort_11.q,\
+  vector_groupby_sort_8.q,\
   vector_if_expr_2.q,\
   vector_join30.q,\
   vector_join_filters.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index 33830b3..a822a4b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -2876,13 +2876,13 @@ public class Vectorizer implements PhysicalPlanResolver {
       setExpressionIssue("Aggregation Function", "UDF " + udfName + " not supported");
       return false;
     }
-    /*
+
     // The planner seems to pull this one out.
     if (aggDesc.getDistinct()) {
       setExpressionIssue("Aggregation Function", "DISTINCT not supported");
-      return new Pair<Boolean,Boolean>(false, false);
+      return false;
     }
-    */
+
 
     ArrayList<ExprNodeDesc> parameters = aggDesc.getParameters();
 

http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/groupby_cube1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_cube1.q b/ql/src/test/queries/clientpositive/groupby_cube1.q
index fd2f0de..92456d0 100644
--- a/ql/src/test/queries/clientpositive/groupby_cube1.q
+++ b/ql/src/test/queries/clientpositive/groupby_cube1.q
@@ -1,3 +1,4 @@
+SET hive.vectorized.execution.enabled=false;
 set hive.mapred.mode=nonstrict;
 set hive.map.aggr=true;
 set hive.groupby.skewindata=false;

http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/groupby_grouping_id1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_id1.q b/ql/src/test/queries/clientpositive/groupby_grouping_id1.q
index 9948ce9..7068d21 100644
--- a/ql/src/test/queries/clientpositive/groupby_grouping_id1.q
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_id1.q
@@ -1,3 +1,5 @@
+SET hive.vectorized.execution.enabled=false;
+
 CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE;
 
 LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1;

http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/groupby_grouping_id2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_id2.q b/ql/src/test/queries/clientpositive/groupby_grouping_id2.q
index cc7f9e4..ba755c4 100644
--- a/ql/src/test/queries/clientpositive/groupby_grouping_id2.q
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_id2.q
@@ -1,4 +1,5 @@
 set hive.explain.user=false;
+SET hive.vectorized.execution.enabled=false;
 set hive.fetch.task.conversion=none;
 set hive.cli.print.header=true;
 

http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/groupby_grouping_id3.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_id3.q b/ql/src/test/queries/clientpositive/groupby_grouping_id3.q
index 955dbe0..29b2f15 100644
--- a/ql/src/test/queries/clientpositive/groupby_grouping_id3.q
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_id3.q
@@ -1,3 +1,5 @@
+SET hive.vectorized.execution.enabled=false;
+
 CREATE TABLE T1(key INT, value INT) STORED AS TEXTFILE;
 
 LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1;

http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/groupby_grouping_sets1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_sets1.q b/ql/src/test/queries/clientpositive/groupby_grouping_sets1.q
index c22c97f..86c5246 100644
--- a/ql/src/test/queries/clientpositive/groupby_grouping_sets1.q
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_sets1.q
@@ -1,4 +1,5 @@
 set hive.explain.user=false;
+SET hive.vectorized.execution.enabled=false;
 set hive.fetch.task.conversion=none;
 set hive.cli.print.header=true;
 

http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/groupby_grouping_sets2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_sets2.q b/ql/src/test/queries/clientpositive/groupby_grouping_sets2.q
index 90e6325..1934321 100644
--- a/ql/src/test/queries/clientpositive/groupby_grouping_sets2.q
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_sets2.q
@@ -1,4 +1,5 @@
 set hive.explain.user=false;
+SET hive.vectorized.execution.enabled=false;
 set hive.cli.print.header=true;
 set hive.mapred.mode=nonstrict;
 set hive.new.job.grouping.set.cardinality=2;

http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/groupby_grouping_sets3.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_sets3.q b/ql/src/test/queries/clientpositive/groupby_grouping_sets3.q
index 16421e8..81267dc 100644
--- a/ql/src/test/queries/clientpositive/groupby_grouping_sets3.q
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_sets3.q
@@ -1,4 +1,5 @@
 set hive.explain.user=false;
+SET hive.vectorized.execution.enabled=false;
 set hive.fetch.task.conversion=none;
 set hive.cli.print.header=true;
 

http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/groupby_grouping_sets4.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_sets4.q b/ql/src/test/queries/clientpositive/groupby_grouping_sets4.q
index 1074a3b..fa62992 100644
--- a/ql/src/test/queries/clientpositive/groupby_grouping_sets4.q
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_sets4.q
@@ -1,4 +1,5 @@
 set hive.explain.user=false;
+SET hive.vectorized.execution.enabled=false;
 set hive.mapred.mode=nonstrict;
 set hive.merge.mapfiles = false;
 set hive.merge.mapredfiles = false;

http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/groupby_grouping_sets5.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_sets5.q b/ql/src/test/queries/clientpositive/groupby_grouping_sets5.q
index 570d464..829a0c2 100644
--- a/ql/src/test/queries/clientpositive/groupby_grouping_sets5.q
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_sets5.q
@@ -1,4 +1,5 @@
 set hive.mapred.mode=nonstrict;
+SET hive.vectorized.execution.enabled=false;
 set hive.merge.mapfiles = false;
 set hive.merge.mapredfiles = false;
 -- Set merging to false above to make the explain more readable

http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/groupby_grouping_sets6.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_sets6.q b/ql/src/test/queries/clientpositive/groupby_grouping_sets6.q
index e537bce..515dce3 100644
--- a/ql/src/test/queries/clientpositive/groupby_grouping_sets6.q
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_sets6.q
@@ -1,4 +1,6 @@
 set hive.mapred.mode=nonstrict;
+SET hive.vectorized.execution.enabled=false;
+
 CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE; 
 
 LOAD DATA LOCAL INPATH '../../data/files/grouping_sets.txt' INTO TABLE T1;

http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q b/ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q
index 7157106..3f437a4 100644
--- a/ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q
@@ -1,3 +1,4 @@
+SET hive.vectorized.execution.enabled=false;
 
 -- SORT_QUERY_RESULTS
 

http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/groupby_rollup1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_rollup1.q b/ql/src/test/queries/clientpositive/groupby_rollup1.q
index f30ace0..94f533c 100644
--- a/ql/src/test/queries/clientpositive/groupby_rollup1.q
+++ b/ql/src/test/queries/clientpositive/groupby_rollup1.q
@@ -1,3 +1,4 @@
+SET hive.vectorized.execution.enabled=false;
 set hive.mapred.mode=nonstrict;
 set hive.map.aggr=true;
 set hive.groupby.skewindata=false;

http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/groupby_sort_11.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_sort_11.q b/ql/src/test/queries/clientpositive/groupby_sort_11.q
index 3b6c172..c56789d 100644
--- a/ql/src/test/queries/clientpositive/groupby_sort_11.q
+++ b/ql/src/test/queries/clientpositive/groupby_sort_11.q
@@ -1,7 +1,10 @@
+SET hive.vectorized.execution.enabled=false;
 set hive.mapred.mode=nonstrict;
 set hive.exec.reducers.max = 1;
 set hive.map.groupby.sorted=true;
 
+-- SORT_QUERY_RESULTS
+
 CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string)
 CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
 

http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/groupby_sort_8.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_sort_8.q b/ql/src/test/queries/clientpositive/groupby_sort_8.q
index 3f81a69..2c20b29 100644
--- a/ql/src/test/queries/clientpositive/groupby_sort_8.q
+++ b/ql/src/test/queries/clientpositive/groupby_sort_8.q
@@ -1,7 +1,10 @@
+SET hive.vectorized.execution.enabled=false;
 set hive.mapred.mode=nonstrict;
 set hive.exec.reducers.max = 10;
 set hive.map.groupby.sorted=true;
 
+-- SORT_QUERY_RESULTS
+
 CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string)
 CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
 

http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/vector_groupby_cube1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_groupby_cube1.q b/ql/src/test/queries/clientpositive/vector_groupby_cube1.q
index 1f7b467..d6bab2c 100644
--- a/ql/src/test/queries/clientpositive/vector_groupby_cube1.q
+++ b/ql/src/test/queries/clientpositive/vector_groupby_cube1.q
@@ -1,4 +1,4 @@
-SET hive.vectorized.execution.enabled=false;
+SET hive.vectorized.execution.enabled=true;
 SET hive.vectorized.execution.reduce.enabled=true;
 set hive.mapred.mode=nonstrict;
 set hive.map.aggr=true;

http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_grouping.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_grouping.q b/ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_grouping.q
index a0e874d..5a5757d 100644
--- a/ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_grouping.q
+++ b/ql/src/test/queries/clientpositive/vector_groupby_grouping_sets_grouping.q
@@ -99,6 +99,35 @@ having grouping(key) = 1 OR grouping(value) = 1
 order by x desc, case when x = 1 then key end;
 
 explain vectorization detail
+select key, value, grouping(key), grouping(value)
+from T1
+group by key, value;
+
+select key, value, grouping(key), grouping(value)
+from T1
+group by key, value;
+
+explain vectorization detail
+select key, value, grouping(value)
+from T1
+group by key, value;
+
+select key, value, grouping(value)
+from T1
+group by key, value;
+
+explain vectorization detail
+select key, value
+from T1
+group by key, value
+having grouping(key) = 0;
+
+select key, value
+from T1
+group by key, value
+having grouping(key) = 0;
+
+explain vectorization detail
 select key, value, `grouping__id`, grouping(key, value)
 from T1
 group by cube(key, value);

http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/vector_groupby_sort_11.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_groupby_sort_11.q b/ql/src/test/queries/clientpositive/vector_groupby_sort_11.q
new file mode 100644
index 0000000..012fe82
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_groupby_sort_11.q
@@ -0,0 +1,50 @@
+SET hive.vectorized.execution.enabled=true;
+SET hive.vectorized.execution.reduce.enabled=true;
+set hive.mapred.mode=nonstrict;
+set hive.exec.reducers.max = 1;
+set hive.map.groupby.sorted=true;
+
+-- SORT_QUERY_RESULTS
+
+CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS;
+
+-- perform an insert to make sure there are 2 files
+INSERT OVERWRITE TABLE T1 PARTITION (ds='1')
+SELECT * from src where key < 10;
+
+-- The plan is optimized to perform partial aggregation on the mapper
+EXPLAIN VECTORIZATION DETAIL
+select count(distinct key) from T1;
+select count(distinct key) from T1;
+
+-- The plan is optimized to perform partial aggregation on the mapper
+EXPLAIN VECTORIZATION DETAIL
+select count(distinct key), count(1), count(key), sum(distinct key) from T1;
+select count(distinct key), count(1), count(key), sum(distinct key) from T1;
+
+-- The plan is not changed in the presence of a grouping key
+EXPLAIN VECTORIZATION DETAIL
+select count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key;
+select count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key;
+
+-- The plan is not changed in the presence of a grouping key
+EXPLAIN VECTORIZATION DETAIL
+select key, count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key;
+select key, count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key;
+
+-- The plan is not changed in the presence of a grouping key expression
+EXPLAIN VECTORIZATION DETAIL
+select count(distinct key+key) from T1;
+select count(distinct key+key) from T1;
+
+EXPLAIN VECTORIZATION DETAIL
+select count(distinct 1) from T1;
+select count(distinct 1) from T1;
+
+set hive.map.aggr=false;
+
+-- no plan change if map aggr is turned off
+EXPLAIN VECTORIZATION DETAIL
+select count(distinct key) from T1;
+select count(distinct key) from T1;

http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/queries/clientpositive/vector_groupby_sort_8.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_groupby_sort_8.q b/ql/src/test/queries/clientpositive/vector_groupby_sort_8.q
new file mode 100644
index 0000000..b0c5699
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_groupby_sort_8.q
@@ -0,0 +1,24 @@
+SET hive.vectorized.execution.enabled=true;
+SET hive.vectorized.execution.reduce.enabled=true;
+set hive.mapred.mode=nonstrict;
+set hive.exec.reducers.max = 10;
+set hive.map.groupby.sorted=true;
+
+-- SORT_QUERY_RESULTS
+
+CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1  PARTITION (ds='1');
+
+-- perform an insert to make sure there are 2 files
+INSERT OVERWRITE TABLE T1 PARTITION (ds='1') select key, val from T1 where ds = '1';
+
+-- The plan is not converted to a map-side, since although the sorting columns and grouping
+-- columns match, the user is issueing a distinct.
+-- However, after HIVE-4310, partial aggregation is performed on the mapper
+EXPLAIN VECTORIZATION DETAIL
+select count(distinct key) from T1;
+select count(distinct key) from T1;
+
+DROP TABLE T1;

http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/results/clientpositive/groupby_sort_11.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby_sort_11.q.out b/ql/src/test/results/clientpositive/groupby_sort_11.q.out
index 23c89f9..cbdc526 100644
--- a/ql/src/test/results/clientpositive/groupby_sort_11.q.out
+++ b/ql/src/test/results/clientpositive/groupby_sort_11.q.out
@@ -211,12 +211,12 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@t1
 POSTHOOK: Input: default@t1@ds=1
 #### A masked pattern was here ####
-1	3	3	0.0
 1	1	1	2.0
 1	1	1	4.0
-1	3	3	5.0
 1	1	1	8.0
 1	1	1	9.0
+1	3	3	0.0
+1	3	3	5.0
 PREHOOK: query: EXPLAIN select key, count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key
 PREHOOK: type: QUERY
 POSTHOOK: query: EXPLAIN select key, count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key

http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/results/clientpositive/llap/vector_count.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_count.q.out b/ql/src/test/results/clientpositive/llap/vector_count.q.out
index 400d930..ce35eb8 100644
--- a/ql/src/test/results/clientpositive/llap/vector_count.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_count.q.out
@@ -68,26 +68,12 @@ STAGE PLANS:
                 TableScan
                   alias: abcd
                   Statistics: Num rows: 7 Data size: 112 Basic stats: COMPLETE Column stats: NONE
-                  TableScan Vectorization:
-                      native: true
                   Select Operator
                     expressions: a (type: int), b (type: int), c (type: int), d (type: int)
                     outputColumnNames: a, b, c, d
-                    Select Vectorization:
-                        className: VectorSelectOperator
-                        native: true
-                        projectedOutputColumnNums: [0, 1, 2, 3]
                     Statistics: Num rows: 7 Data size: 112 Basic stats: COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: count(DISTINCT b), count(DISTINCT c), sum(d)
-                      Group By Vectorization:
-                          aggregators: VectorUDAFCount(col 1:int) -> bigint, VectorUDAFCount(col 2:int) -> bigint, VectorUDAFSumLong(col 3:int) -> bigint
-                          className: VectorGroupByOperator
-                          groupByMode: HASH
-                          keyExpressions: col 0:int, col 1:int, col 2:int
-                          native: false
-                          vectorProcessingMode: HASH
-                          projectedOutputColumnNums: [0, 1, 2]
                       keys: a (type: int), b (type: int), c (type: int)
                       mode: hash
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
@@ -96,24 +82,16 @@ STAGE PLANS:
                         key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int)
                         sort order: +++
                         Map-reduce partition columns: _col0 (type: int)
-                        Reduce Sink Vectorization:
-                            className: VectorReduceSinkOperator
-                            native: false
-                            nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                            nativeConditionsNotMet: No DISTINCT columns IS false
                         Statistics: Num rows: 7 Data size: 112 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col5 (type: bigint)
-            Execution mode: vectorized, llap
+            Execution mode: llap
             LLAP IO: all inputs
             Map Vectorization:
                 enabled: true
                 enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
-                inputFormatFeatureSupport: []
-                featureSupportInUse: []
                 inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-                allNative: false
-                usesVectorUDFAdaptor: false
-                vectorized: true
+                notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported
+                vectorized: false
         Reducer 2 
             Execution mode: llap
             Reduce Vectorization:
@@ -200,7 +178,7 @@ STAGE PLANS:
                 enabled: true
                 enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
                 inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-                notVectorizedReason: GROUPBY operator: Aggregations with > 1 parameter are not supported count([Column[a], Column[b]])
+                notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported
                 vectorized: false
         Reducer 2 
             Execution mode: llap

http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out
index f1ed146..3bfbda0 100644
--- a/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out
@@ -21,8 +21,8 @@ POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
 SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube
 POSTHOOK: type: QUERY
 PLAN VECTORIZATION:
-  enabled: false
-  enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false]
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
 
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
@@ -41,12 +41,27 @@ STAGE PLANS:
                 TableScan
                   alias: t1
                   Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      vectorizationSchemaColumns: [0:key:string, 1:val:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
                   Select Operator
                     expressions: key (type: string), val (type: string)
                     outputColumnNames: key, val
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumnNums: [0, 1]
                     Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: count()
+                      Group By Vectorization:
+                          aggregators: VectorUDAFCountStar(*) -> bigint
+                          className: VectorGroupByOperator
+                          groupByMode: HASH
+                          keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint
+                          native: false
+                          vectorProcessingMode: HASH
+                          projectedOutputColumnNums: [0]
                       keys: key (type: string), val (type: string), 0L (type: bigint)
                       mode: hash
                       outputColumnNames: _col0, _col1, _col2, _col3
@@ -55,15 +70,58 @@ STAGE PLANS:
                         key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
                         sort order: +++
                         Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkMultiKeyOperator
+                            keyColumnNums: [0, 1, 2]
+                            native: true
+                            nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                            valueColumnNums: [3]
                         Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col3 (type: bigint)
-            Execution mode: llap
+            Execution mode: vectorized, llap
             LLAP IO: no inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled]
+                featureSupportInUse: []
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0, 1]
+                    dataColumns: key:string, val:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: [bigint]
         Reducer 2 
-            Execution mode: llap
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: aaa
+                reduceColumnSortOrder: +++
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 4
+                    dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:bigint
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
             Reduce Operator Tree:
               Group By Operator
                 aggregations: count(VALUE._col0)
+                Group By Vectorization:
+                    aggregators: VectorUDAFCountMerge(col 3:bigint) -> bigint
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    keyExpressions: col 0:string, col 1:string, col 2:bigint
+                    native: false
+                    vectorProcessingMode: MERGE_PARTIAL
+                    projectedOutputColumnNums: [0]
                 keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col3
@@ -72,9 +130,16 @@ STAGE PLANS:
                 Select Operator
                   expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint)
                   outputColumnNames: _col0, _col1, _col2
+                  Select Vectorization:
+                      className: VectorSelectOperator
+                      native: true
+                      projectedOutputColumnNums: [0, 1, 2]
                   Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
+                    File Sink Vectorization:
+                        className: VectorFileSinkOperator
+                        native: false
                     Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
                     table:
                         input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -94,8 +159,8 @@ POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
 SELECT key, val, count(1) FROM T1 GROUP BY CUBE(key, val)
 POSTHOOK: type: QUERY
 PLAN VECTORIZATION:
-  enabled: false
-  enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false]
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
 
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
@@ -114,12 +179,27 @@ STAGE PLANS:
                 TableScan
                   alias: t1
                   Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      vectorizationSchemaColumns: [0:key:string, 1:val:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
                   Select Operator
                     expressions: key (type: string), val (type: string)
                     outputColumnNames: key, val
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumnNums: [0, 1]
                     Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: count()
+                      Group By Vectorization:
+                          aggregators: VectorUDAFCountStar(*) -> bigint
+                          className: VectorGroupByOperator
+                          groupByMode: HASH
+                          keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint
+                          native: false
+                          vectorProcessingMode: HASH
+                          projectedOutputColumnNums: [0]
                       keys: key (type: string), val (type: string), 0L (type: bigint)
                       mode: hash
                       outputColumnNames: _col0, _col1, _col2, _col3
@@ -128,15 +208,58 @@ STAGE PLANS:
                         key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
                         sort order: +++
                         Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkMultiKeyOperator
+                            keyColumnNums: [0, 1, 2]
+                            native: true
+                            nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                            valueColumnNums: [3]
                         Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col3 (type: bigint)
-            Execution mode: llap
+            Execution mode: vectorized, llap
             LLAP IO: no inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled]
+                featureSupportInUse: []
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0, 1]
+                    dataColumns: key:string, val:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: [bigint]
         Reducer 2 
-            Execution mode: llap
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: aaa
+                reduceColumnSortOrder: +++
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 4
+                    dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:bigint
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
             Reduce Operator Tree:
               Group By Operator
                 aggregations: count(VALUE._col0)
+                Group By Vectorization:
+                    aggregators: VectorUDAFCountMerge(col 3:bigint) -> bigint
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    keyExpressions: col 0:string, col 1:string, col 2:bigint
+                    native: false
+                    vectorProcessingMode: MERGE_PARTIAL
+                    projectedOutputColumnNums: [0]
                 keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col3
@@ -145,9 +268,16 @@ STAGE PLANS:
                 Select Operator
                   expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint)
                   outputColumnNames: _col0, _col1, _col2
+                  Select Vectorization:
+                      className: VectorSelectOperator
+                      native: true
+                      projectedOutputColumnNums: [0, 1, 2]
                   Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
+                    File Sink Vectorization:
+                        className: VectorFileSinkOperator
+                        native: false
                     Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
                     table:
                         input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -193,8 +323,8 @@ POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
 SELECT key, val, GROUPING__ID, count(1) FROM T1 GROUP BY key, val with cube
 POSTHOOK: type: QUERY
 PLAN VECTORIZATION:
-  enabled: false
-  enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false]
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
 
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
@@ -213,12 +343,27 @@ STAGE PLANS:
                 TableScan
                   alias: t1
                   Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      vectorizationSchemaColumns: [0:key:string, 1:val:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
                   Select Operator
                     expressions: key (type: string), val (type: string)
                     outputColumnNames: _col0, _col1
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumnNums: [0, 1]
                     Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: count()
+                      Group By Vectorization:
+                          aggregators: VectorUDAFCountStar(*) -> bigint
+                          className: VectorGroupByOperator
+                          groupByMode: HASH
+                          keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint
+                          native: false
+                          vectorProcessingMode: HASH
+                          projectedOutputColumnNums: [0]
                       keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint)
                       mode: hash
                       outputColumnNames: _col0, _col1, _col2, _col3
@@ -227,15 +372,58 @@ STAGE PLANS:
                         key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
                         sort order: +++
                         Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkMultiKeyOperator
+                            keyColumnNums: [0, 1, 2]
+                            native: true
+                            nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                            valueColumnNums: [3]
                         Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col3 (type: bigint)
-            Execution mode: llap
+            Execution mode: vectorized, llap
             LLAP IO: no inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled]
+                featureSupportInUse: []
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0, 1]
+                    dataColumns: key:string, val:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: [bigint]
         Reducer 2 
-            Execution mode: llap
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: aaa
+                reduceColumnSortOrder: +++
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 4
+                    dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:bigint
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
             Reduce Operator Tree:
               Group By Operator
                 aggregations: count(VALUE._col0)
+                Group By Vectorization:
+                    aggregators: VectorUDAFCountMerge(col 3:bigint) -> bigint
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    keyExpressions: col 0:string, col 1:string, col 2:bigint
+                    native: false
+                    vectorProcessingMode: MERGE_PARTIAL
+                    projectedOutputColumnNums: [0]
                 keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3
@@ -243,9 +431,16 @@ STAGE PLANS:
                 Select Operator
                   expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint), _col3 (type: bigint)
                   outputColumnNames: _col0, _col1, _col2, _col3
+                  Select Vectorization:
+                      className: VectorSelectOperator
+                      native: true
+                      projectedOutputColumnNums: [0, 1, 2, 3]
                   Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
+                    File Sink Vectorization:
+                        className: VectorFileSinkOperator
+                        native: false
                     Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
                     table:
                         input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -291,8 +486,8 @@ POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
 SELECT key, count(distinct val) FROM T1 GROUP BY key with cube
 POSTHOOK: type: QUERY
 PLAN VECTORIZATION:
-  enabled: false
-  enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false]
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
 
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
@@ -328,8 +523,19 @@ STAGE PLANS:
                         Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
             Execution mode: llap
             LLAP IO: no inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+                notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported
+                vectorized: false
         Reducer 2 
             Execution mode: llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                notVectorizedReason: GROUPBY operator: DISTINCT not supported
+                vectorized: false
             Reduce Operator Tree:
               Group By Operator
                 aggregations: count(DISTINCT KEY._col2:0._col0)
@@ -377,8 +583,8 @@ POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
 SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube
 POSTHOOK: type: QUERY
 PLAN VECTORIZATION:
-  enabled: false
-  enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false]
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
 
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
@@ -398,12 +604,27 @@ STAGE PLANS:
                 TableScan
                   alias: t1
                   Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      vectorizationSchemaColumns: [0:key:string, 1:val:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
                   Select Operator
                     expressions: key (type: string), val (type: string)
                     outputColumnNames: key, val
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumnNums: [0, 1]
                     Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: count()
+                      Group By Vectorization:
+                          aggregators: VectorUDAFCountStar(*) -> bigint
+                          className: VectorGroupByOperator
+                          groupByMode: HASH
+                          keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint
+                          native: false
+                          vectorProcessingMode: HASH
+                          projectedOutputColumnNums: [0]
                       keys: key (type: string), val (type: string), 0L (type: bigint)
                       mode: hash
                       outputColumnNames: _col0, _col1, _col2, _col3
@@ -412,15 +633,59 @@ STAGE PLANS:
                         key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
                         sort order: +++
                         Map-reduce partition columns: rand() (type: double)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkObjectHashOperator
+                            keyColumnNums: [0, 1, 2]
+                            native: true
+                            nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                            partitionColumnNums: [4]
+                            valueColumnNums: [3]
                         Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col3 (type: bigint)
-            Execution mode: llap
+            Execution mode: vectorized, llap
             LLAP IO: no inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled]
+                featureSupportInUse: []
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0, 1]
+                    dataColumns: key:string, val:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: [bigint]
         Reducer 2 
-            Execution mode: llap
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: aaa
+                reduceColumnSortOrder: +++
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 4
+                    dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:bigint
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
             Reduce Operator Tree:
               Group By Operator
                 aggregations: count(VALUE._col0)
+                Group By Vectorization:
+                    aggregators: VectorUDAFCountMerge(col 3:bigint) -> bigint
+                    className: VectorGroupByOperator
+                    groupByMode: PARTIALS
+                    keyExpressions: col 0:string, col 1:string, col 2:bigint
+                    native: false
+                    vectorProcessingMode: STREAMING
+                    projectedOutputColumnNums: [0]
                 keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint)
                 mode: partials
                 outputColumnNames: _col0, _col1, _col2, _col3
@@ -429,13 +694,41 @@ STAGE PLANS:
                   key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
                   sort order: +++
                   Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                  Reduce Sink Vectorization:
+                      className: VectorReduceSinkObjectHashOperator
+                      keyColumnNums: [0, 1, 2]
+                      native: true
+                      nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                      partitionColumnNums: [0, 1]
+                      valueColumnNums: [3]
                   Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE
                   value expressions: _col3 (type: bigint)
         Reducer 3 
-            Execution mode: llap
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: aaa
+                reduceColumnSortOrder: +++
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 4
+                    dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:bigint
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
             Reduce Operator Tree:
               Group By Operator
                 aggregations: count(VALUE._col0)
+                Group By Vectorization:
+                    aggregators: VectorUDAFCountMerge(col 3:bigint) -> bigint
+                    className: VectorGroupByOperator
+                    groupByMode: FINAL
+                    keyExpressions: col 0:string, col 1:string, col 2:bigint
+                    native: false
+                    vectorProcessingMode: STREAMING
+                    projectedOutputColumnNums: [0]
                 keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint)
                 mode: final
                 outputColumnNames: _col0, _col1, _col3
@@ -444,9 +737,16 @@ STAGE PLANS:
                 Select Operator
                   expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint)
                   outputColumnNames: _col0, _col1, _col2
+                  Select Vectorization:
+                      className: VectorSelectOperator
+                      native: true
+                      projectedOutputColumnNums: [0, 1, 2]
                   Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
+                    File Sink Vectorization:
+                        className: VectorFileSinkOperator
+                        native: false
                     Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
                     table:
                         input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -492,8 +792,8 @@ POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
 SELECT key, count(distinct val) FROM T1 GROUP BY key with cube
 POSTHOOK: type: QUERY
 PLAN VECTORIZATION:
-  enabled: false
-  enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false]
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
 
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
@@ -530,8 +830,19 @@ STAGE PLANS:
                         Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE
             Execution mode: llap
             LLAP IO: no inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+                notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported
+                vectorized: false
         Reducer 2 
             Execution mode: llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                notVectorizedReason: GROUPBY operator: DISTINCT not supported
+                vectorized: false
             Reduce Operator Tree:
               Group By Operator
                 aggregations: count(DISTINCT KEY._col2:0._col0)
@@ -547,6 +858,11 @@ STAGE PLANS:
                   value expressions: _col2 (type: bigint)
         Reducer 3 
             Execution mode: llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                notVectorizedReason: GROUPBY operator: DISTINCT not supported
+                vectorized: false
             Reduce Operator Tree:
               Group By Operator
                 aggregations: count(VALUE._col0)
@@ -614,8 +930,8 @@ INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with cube
 INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with cube
 POSTHOOK: type: QUERY
 PLAN VECTORIZATION:
-  enabled: false
-  enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false]
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
 
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
@@ -643,12 +959,27 @@ STAGE PLANS:
                 TableScan
                   alias: t1
                   Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      vectorizationSchemaColumns: [0:key:string, 1:val:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
                   Select Operator
                     expressions: key (type: string), val (type: string)
                     outputColumnNames: key, val
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumnNums: [0, 1]
                     Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: count(1)
+                      Group By Vectorization:
+                          aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 4:int) -> bigint
+                          className: VectorGroupByOperator
+                          groupByMode: HASH
+                          keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 3:bigint
+                          native: false
+                          vectorProcessingMode: HASH
+                          projectedOutputColumnNums: [0]
                       keys: key (type: string), val (type: string), 0L (type: bigint)
                       mode: hash
                       outputColumnNames: _col0, _col1, _col2, _col3
@@ -657,14 +988,33 @@ STAGE PLANS:
                         key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
                         sort order: +++
                         Map-reduce partition columns: rand() (type: double)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkObjectHashOperator
+                            keyColumnNums: [0, 1, 2]
+                            native: true
+                            nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                            partitionColumnNums: [4]
+                            valueColumnNums: [3]
                         Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col3 (type: bigint)
                   Select Operator
                     expressions: key (type: string), val (type: string)
                     outputColumnNames: key, val
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumnNums: [0, 1]
                     Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: sum(1)
+                      Group By Vectorization:
+                          aggregators: VectorUDAFSumLong(ConstantVectorExpression(val 1) -> 6:int) -> bigint
+                          className: VectorGroupByOperator
+                          groupByMode: HASH
+                          keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint
+                          native: false
+                          vectorProcessingMode: HASH
+                          projectedOutputColumnNums: [0]
                       keys: key (type: string), val (type: string), 0L (type: bigint)
                       mode: hash
                       outputColumnNames: _col0, _col1, _col2, _col3
@@ -673,15 +1023,59 @@ STAGE PLANS:
                         key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
                         sort order: +++
                         Map-reduce partition columns: rand() (type: double)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkObjectHashOperator
+                            keyColumnNums: [0, 1, 2]
+                            native: true
+                            nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                            partitionColumnNums: [4]
+                            valueColumnNums: [3]
                         Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col3 (type: bigint)
-            Execution mode: llap
+            Execution mode: vectorized, llap
             LLAP IO: no inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled]
+                featureSupportInUse: []
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0, 1]
+                    dataColumns: key:string, val:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: [bigint, bigint, bigint, bigint]
         Reducer 2 
-            Execution mode: llap
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: aaa
+                reduceColumnSortOrder: +++
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 4
+                    dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:bigint
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
             Reduce Operator Tree:
               Group By Operator
                 aggregations: count(VALUE._col0)
+                Group By Vectorization:
+                    aggregators: VectorUDAFCountMerge(col 3:bigint) -> bigint
+                    className: VectorGroupByOperator
+                    groupByMode: PARTIALS
+                    keyExpressions: col 0:string, col 1:string, col 2:bigint
+                    native: false
+                    vectorProcessingMode: STREAMING
+                    projectedOutputColumnNums: [0]
                 keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint)
                 mode: partials
                 outputColumnNames: _col0, _col1, _col2, _col3
@@ -690,10 +1084,22 @@ STAGE PLANS:
                   key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
                   sort order: +++
                   Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                  Reduce Sink Vectorization:
+                      className: VectorReduceSinkObjectHashOperator
+                      keyColumnNums: [0, 1, 2]
+                      native: true
+                      nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                      partitionColumnNums: [0, 1]
+                      valueColumnNums: [3]
                   Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE
                   value expressions: _col3 (type: bigint)
         Reducer 3 
             Execution mode: llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported
+                vectorized: false
             Reduce Operator Tree:
               Group By Operator
                 aggregations: count(VALUE._col0)
@@ -729,6 +1135,11 @@ STAGE PLANS:
                         value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
         Reducer 4 
             Execution mode: llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported
+                vectorized: false
             Reduce Operator Tree:
               Group By Operator
                 aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
@@ -743,10 +1154,31 @@ STAGE PLANS:
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
         Reducer 5 
-            Execution mode: llap
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: aaa
+                reduceColumnSortOrder: +++
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 4
+                    dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:bigint
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
             Reduce Operator Tree:
               Group By Operator
                 aggregations: sum(VALUE._col0)
+                Group By Vectorization:
+                    aggregators: VectorUDAFSumLong(col 3:bigint) -> bigint
+                    className: VectorGroupByOperator
+                    groupByMode: PARTIALS
+                    keyExpressions: col 0:string, col 1:string, col 2:bigint
+                    native: false
+                    vectorProcessingMode: STREAMING
+                    projectedOutputColumnNums: [0]
                 keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint)
                 mode: partials
                 outputColumnNames: _col0, _col1, _col2, _col3
@@ -755,10 +1187,22 @@ STAGE PLANS:
                   key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
                   sort order: +++
                   Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+                  Reduce Sink Vectorization:
+                      className: VectorReduceSinkObjectHashOperator
+                      keyColumnNums: [0, 1, 2]
+                      native: true
+                      nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                      partitionColumnNums: [0, 1]
+                      valueColumnNums: [3]
                   Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE
                   value expressions: _col3 (type: bigint)
         Reducer 6 
             Execution mode: llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported
+                vectorized: false
             Reduce Operator Tree:
               Group By Operator
                 aggregations: sum(VALUE._col0)
@@ -794,6 +1238,11 @@ STAGE PLANS:
                         value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
         Reducer 7 
             Execution mode: llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported
+                vectorized: false
             Reduce Operator Tree:
               Group By Operator
                 aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)

[2/3] hive git commit: HIVE-19032: Vectorization: Disable GROUP BY aggregations with DISTINCT (Matt McCline, reviewed by Gopal Vijayaraghavan)

Posted by mm...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out
index 7f7624a..80e073b 100644
--- a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out
@@ -1430,6 +1430,469 @@ NULL	5	1
 NULL	NULL	1
 NULL	NULL	2
 PREHOOK: query: explain vectorization detail
+select key, value, grouping(key), grouping(value)
+from T1
+group by key, value
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select key, value, grouping(key), grouping(value)
+from T1
+group by key, value
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+                  Select Operator
+                    expressions: key (type: int), value (type: int)
+                    outputColumnNames: key, value
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumnNums: [0, 1]
+                    Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      Group By Vectorization:
+                          className: VectorGroupByOperator
+                          groupByMode: HASH
+                          keyExpressions: col 0:int, col 1:int
+                          native: false
+                          vectorProcessingMode: HASH
+                          projectedOutputColumnNums: []
+                      keys: key (type: int), value (type: int)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int), _col1 (type: int)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkMultiKeyOperator
+                            keyColumnNums: [0, 1]
+                            native: true
+                            nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                            valueColumnNums: []
+                        Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: []
+                featureSupportInUse: []
+                inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0, 1]
+                    dataColumns: key:int, value:int
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: aa
+                reduceColumnSortOrder: ++
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    dataColumns: KEY._col0:int, KEY._col1:int
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+            Reduce Operator Tree:
+              Group By Operator
+                Group By Vectorization:
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    keyExpressions: col 0:int, col 1:int
+                    native: false
+                    vectorProcessingMode: MERGE_PARTIAL
+                    projectedOutputColumnNums: []
+                keys: KEY._col0 (type: int), KEY._col1 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: int), _col1 (type: int), 0L (type: bigint), 0L (type: bigint)
+                  outputColumnNames: _col0, _col1, _col2, _col3
+                  Select Vectorization:
+                      className: VectorSelectOperator
+                      native: true
+                      projectedOutputColumnNums: [0, 1, 2, 3]
+                      selectExpressions: ConstantVectorExpression(val 0) -> 2:bigint, ConstantVectorExpression(val 0) -> 3:bigint
+                  Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    File Sink Vectorization:
+                        className: VectorFileSinkOperator
+                        native: false
+                    Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select key, value, grouping(key), grouping(value)
+from T1
+group by key, value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select key, value, grouping(key), grouping(value)
+from T1
+group by key, value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+key	value	_c2	_c3
+1	1	0	0
+1	NULL	0	0
+2	2	0	0
+3	3	0	0
+3	NULL	0	0
+4	5	0	0
+PREHOOK: query: explain vectorization detail
+select key, value, grouping(value)
+from T1
+group by key, value
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select key, value, grouping(value)
+from T1
+group by key, value
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+                  Select Operator
+                    expressions: key (type: int), value (type: int)
+                    outputColumnNames: key, value
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumnNums: [0, 1]
+                    Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      Group By Vectorization:
+                          className: VectorGroupByOperator
+                          groupByMode: HASH
+                          keyExpressions: col 0:int, col 1:int
+                          native: false
+                          vectorProcessingMode: HASH
+                          projectedOutputColumnNums: []
+                      keys: key (type: int), value (type: int)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int), _col1 (type: int)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkMultiKeyOperator
+                            keyColumnNums: [0, 1]
+                            native: true
+                            nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                            valueColumnNums: []
+                        Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: []
+                featureSupportInUse: []
+                inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0, 1]
+                    dataColumns: key:int, value:int
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: aa
+                reduceColumnSortOrder: ++
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    dataColumns: KEY._col0:int, KEY._col1:int
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+            Reduce Operator Tree:
+              Group By Operator
+                Group By Vectorization:
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    keyExpressions: col 0:int, col 1:int
+                    native: false
+                    vectorProcessingMode: MERGE_PARTIAL
+                    projectedOutputColumnNums: []
+                keys: KEY._col0 (type: int), KEY._col1 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: int), _col1 (type: int), 0L (type: bigint)
+                  outputColumnNames: _col0, _col1, _col2
+                  Select Vectorization:
+                      className: VectorSelectOperator
+                      native: true
+                      projectedOutputColumnNums: [0, 1, 2]
+                      selectExpressions: ConstantVectorExpression(val 0) -> 2:bigint
+                  Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    File Sink Vectorization:
+                        className: VectorFileSinkOperator
+                        native: false
+                    Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select key, value, grouping(value)
+from T1
+group by key, value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select key, value, grouping(value)
+from T1
+group by key, value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+key	value	_c2
+1	1	0
+1	NULL	0
+2	2	0
+3	3	0
+3	NULL	0
+4	5	0
+PREHOOK: query: explain vectorization detail
+select key, value
+from T1
+group by key, value
+having grouping(key) = 0
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+select key, value
+from T1
+group by key, value
+having grouping(key) = 0
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+                  Select Operator
+                    expressions: key (type: int), value (type: int)
+                    outputColumnNames: key, value
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumnNums: [0, 1]
+                    Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      Group By Vectorization:
+                          className: VectorGroupByOperator
+                          groupByMode: HASH
+                          keyExpressions: col 0:int, col 1:int
+                          native: false
+                          vectorProcessingMode: HASH
+                          projectedOutputColumnNums: []
+                      keys: key (type: int), value (type: int)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int), _col1 (type: int)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkMultiKeyOperator
+                            keyColumnNums: [0, 1]
+                            native: true
+                            nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                            valueColumnNums: []
+                        Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+                inputFormatFeatureSupport: []
+                featureSupportInUse: []
+                inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0, 1]
+                    dataColumns: key:int, value:int
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: aa
+                reduceColumnSortOrder: ++
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    dataColumns: KEY._col0:int, KEY._col1:int
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+            Reduce Operator Tree:
+              Group By Operator
+                Group By Vectorization:
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    keyExpressions: col 0:int, col 1:int
+                    native: false
+                    vectorProcessingMode: MERGE_PARTIAL
+                    projectedOutputColumnNums: []
+                keys: KEY._col0 (type: int), KEY._col1 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  File Sink Vectorization:
+                      className: VectorFileSinkOperator
+                      native: false
+                  Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select key, value
+from T1
+group by key, value
+having grouping(key) = 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: select key, value
+from T1
+group by key, value
+having grouping(key) = 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+key	value
+1	1
+1	NULL
+2	2
+3	3
+3	NULL
+4	5
+PREHOOK: query: explain vectorization detail
 select key, value, `grouping__id`, grouping(key, value)
 from T1
 group by cube(key, value)

http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out
index e839214..ef49d90 100644
--- a/ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out
@@ -210,27 +210,12 @@ STAGE PLANS:
                 TableScan
                   alias: t1
                   Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE
-                  TableScan Vectorization:
-                      native: true
-                      vectorizationSchemaColumns: [0:key:string, 1:val:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
                   Select Operator
                     expressions: key (type: string), val (type: string)
                     outputColumnNames: key, val
-                    Select Vectorization:
-                        className: VectorSelectOperator
-                        native: true
-                        projectedOutputColumnNums: [0, 1]
                     Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: count(DISTINCT val)
-                      Group By Vectorization:
-                          aggregators: VectorUDAFCount(col 1:string) -> bigint
-                          className: VectorGroupByOperator
-                          groupByMode: HASH
-                          keyExpressions: col 0:string, ConstantVectorExpression(val 0) -> 3:bigint, col 1:string
-                          native: false
-                          vectorProcessingMode: HASH
-                          projectedOutputColumnNums: [0]
                       keys: key (type: string), 0L (type: bigint), val (type: string)
                       mode: hash
                       outputColumnNames: _col0, _col1, _col2, _col3
@@ -239,29 +224,15 @@ STAGE PLANS:
                         key expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string)
                         sort order: +++
                         Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint)
-                        Reduce Sink Vectorization:
-                            className: VectorReduceSinkOperator
-                            native: false
-                            nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                            nativeConditionsNotMet: No DISTINCT columns IS false
                         Statistics: Num rows: 12 Data size: 4416 Basic stats: COMPLETE Column stats: NONE
-            Execution mode: vectorized, llap
+            Execution mode: llap
             LLAP IO: all inputs
             Map Vectorization:
                 enabled: true
                 enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
-                inputFormatFeatureSupport: []
-                featureSupportInUse: []
                 inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-                allNative: false
-                usesVectorUDFAdaptor: false
-                vectorized: true
-                rowBatchContext:
-                    dataColumnCount: 2
-                    includeColumns: [0, 1]
-                    dataColumns: key:string, val:string
-                    partitionColumnCount: 0
-                    scratchColumnTypeNames: [bigint]
+                notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported
+                vectorized: false
         Reducer 2 
             Execution mode: llap
             Reduce Vectorization:
@@ -303,12 +274,12 @@ POSTHOOK: query: SELECT key, count(distinct val) FROM T1 GROUP BY key with rollu
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@t1
 #### A masked pattern was here ####
-1	0
-2	0
-3	0
-7	0
-8	0
-NULL	0
+1	1
+2	1
+3	1
+7	1
+8	2
+NULL	6
 PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
 SELECT key, val, count(1) FROM T1 GROUP BY key, val with rollup
 PREHOOK: type: QUERY
@@ -539,27 +510,12 @@ STAGE PLANS:
                 TableScan
                   alias: t1
                   Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE
-                  TableScan Vectorization:
-                      native: true
-                      vectorizationSchemaColumns: [0:key:string, 1:val:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
                   Select Operator
                     expressions: key (type: string), val (type: string)
                     outputColumnNames: key, val
-                    Select Vectorization:
-                        className: VectorSelectOperator
-                        native: true
-                        projectedOutputColumnNums: [0, 1]
                     Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: count(DISTINCT val)
-                      Group By Vectorization:
-                          aggregators: VectorUDAFCount(col 1:string) -> bigint
-                          className: VectorGroupByOperator
-                          groupByMode: HASH
-                          keyExpressions: col 0:string, ConstantVectorExpression(val 0) -> 3:bigint, col 1:string
-                          native: false
-                          vectorProcessingMode: HASH
-                          projectedOutputColumnNums: [0]
                       keys: key (type: string), 0L (type: bigint), val (type: string)
                       mode: hash
                       outputColumnNames: _col0, _col1, _col2, _col3
@@ -568,29 +524,15 @@ STAGE PLANS:
                         key expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string)
                         sort order: +++
                         Map-reduce partition columns: _col0 (type: string)
-                        Reduce Sink Vectorization:
-                            className: VectorReduceSinkOperator
-                            native: false
-                            nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                            nativeConditionsNotMet: No DISTINCT columns IS false
                         Statistics: Num rows: 12 Data size: 4416 Basic stats: COMPLETE Column stats: NONE
-            Execution mode: vectorized, llap
+            Execution mode: llap
             LLAP IO: all inputs
             Map Vectorization:
                 enabled: true
                 enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
-                inputFormatFeatureSupport: []
-                featureSupportInUse: []
                 inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-                allNative: false
-                usesVectorUDFAdaptor: false
-                vectorized: true
-                rowBatchContext:
-                    dataColumnCount: 2
-                    includeColumns: [0, 1]
-                    dataColumns: key:string, val:string
-                    partitionColumnCount: 0
-                    scratchColumnTypeNames: [bigint]
+                notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported
+                vectorized: false
         Reducer 2 
             Execution mode: llap
             Reduce Vectorization:
@@ -652,12 +594,12 @@ POSTHOOK: query: SELECT key, count(distinct val) FROM T1 GROUP BY key with rollu
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@t1
 #### A masked pattern was here ####
-1	0
-2	0
-3	0
-7	0
-8	0
-NULL	0
+1	1
+2	1
+3	1
+7	1
+8	2
+NULL	6
 PREHOOK: query: CREATE TABLE T2(key1 STRING, key2 STRING, val INT) STORED AS ORC
 PREHOOK: type: CREATETABLE
 PREHOOK: Output: database:default

http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/results/clientpositive/llap/vector_groupby_sort_11.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_sort_11.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_sort_11.q.out
new file mode 100644
index 0000000..79ca6d9
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vector_groupby_sort_11.q.out
@@ -0,0 +1,996 @@
+PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@T1
+POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@T1
+PREHOOK: query: INSERT OVERWRITE TABLE T1 PARTITION (ds='1')
+SELECT * from src where key < 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@t1@ds=1
+POSTHOOK: query: INSERT OVERWRITE TABLE T1 PARTITION (ds='1')
+SELECT * from src where key < 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@t1@ds=1
+POSTHOOK: Lineage: t1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: t1 PARTITION(ds=1).val SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+select count(distinct key) from T1
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+select count(distinct key) from T1
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE
+                  TableScan Vectorization:
+                      native: true
+                      vectorizationSchemaColumns: [0:key:string, 1:val:string, 2:ds:string, 3:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+                  Select Operator
+                    expressions: key (type: string)
+                    outputColumnNames: key
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumnNums: [0]
+                    Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      Group By Vectorization:
+                          className: VectorGroupByOperator
+                          groupByMode: FINAL
+                          keyExpressions: col 0:string
+                          native: false
+                          vectorProcessingMode: STREAMING
+                          projectedOutputColumnNums: []
+                      keys: key (type: string)
+                      mode: final
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 6 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE
+                      Group By Operator
+                        aggregations: count(_col0)
+                        Group By Vectorization:
+                            aggregators: VectorUDAFCount(col 0:string) -> bigint
+                            className: VectorGroupByOperator
+                            groupByMode: HASH
+                            native: false
+                            vectorProcessingMode: HASH
+                            projectedOutputColumnNums: [0]
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                        Reduce Output Operator
+                          sort order: 
+                          Reduce Sink Vectorization:
+                              className: VectorReduceSinkEmptyKeyOperator
+                              keyColumnNums: []
+                              native: true
+                              nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                              valueColumnNums: [0]
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                          value expressions: _col0 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled]
+                featureSupportInUse: []
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0]
+                    dataColumns: key:string, val:string
+                    partitionColumnCount: 1
+                    partitionColumns: ds:string
+                    scratchColumnTypeNames: []
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: 
+                reduceColumnSortOrder: 
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 1
+                    dataColumns: VALUE._col0:bigint
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                Group By Vectorization:
+                    aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    native: false
+                    vectorProcessingMode: GLOBAL
+                    projectedOutputColumnNums: [0]
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  File Sink Vectorization:
+                      className: VectorFileSinkOperator
+                      native: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(distinct key) from T1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t1@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct key) from T1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t1@ds=1
+#### A masked pattern was here ####
+6
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+select count(distinct key), count(1), count(key), sum(distinct key) from T1
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+select count(distinct key), count(1), count(key), sum(distinct key) from T1
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: key (type: string)
+                    outputColumnNames: key
+                    Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: count(DISTINCT key), count(), count(key), sum(DISTINCT key)
+                      bucketGroup: true
+                      keys: key (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                      Statistics: Num rows: 5 Data size: 585 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Statistics: Num rows: 5 Data size: 585 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col2 (type: bigint), _col3 (type: bigint)
+            Execution mode: llap
+            LLAP IO: no inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+                notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported
+                vectorized: false
+        Reducer 2 
+            Execution mode: llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                notVectorizedReason: GROUPBY operator: DISTINCT not supported
+                vectorized: false
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(DISTINCT KEY._col0:0._col0), count(VALUE._col1), count(VALUE._col2), sum(DISTINCT KEY._col0:1._col0)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3
+                Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(distinct key), count(1), count(key), sum(distinct key) from T1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t1@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct key), count(1), count(key), sum(distinct key) from T1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t1@ds=1
+#### A masked pattern was here ####
+6	10	10	28.0
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+select count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+select count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: key (type: string)
+                    outputColumnNames: key
+                    Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: count(DISTINCT key), count(), count(key), sum(DISTINCT key)
+                      bucketGroup: true
+                      keys: key (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                      Statistics: Num rows: 5 Data size: 585 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 5 Data size: 585 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col2 (type: bigint), _col3 (type: bigint)
+            Execution mode: llap
+            LLAP IO: no inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+                notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported
+                vectorized: false
+        Reducer 2 
+            Execution mode: llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                notVectorizedReason: GROUPBY operator: DISTINCT not supported
+                vectorized: false
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(DISTINCT KEY._col1:0._col0), count(VALUE._col1), count(VALUE._col2), sum(DISTINCT KEY._col1:1._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                Statistics: Num rows: 5 Data size: 585 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: double)
+                  outputColumnNames: _col0, _col1, _col2, _col3
+                  Statistics: Num rows: 5 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 5 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t1@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t1@ds=1
+#### A masked pattern was here ####
+1	1	1	2.0
+1	1	1	4.0
+1	1	1	8.0
+1	1	1	9.0
+1	3	3	0.0
+1	3	3	5.0
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+select key, count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+select key, count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: key (type: string)
+                    outputColumnNames: key
+                    Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: count(DISTINCT key), count(), count(key), sum(DISTINCT key)
+                      bucketGroup: true
+                      keys: key (type: string)
+                      mode: hash
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                      Statistics: Num rows: 5 Data size: 585 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: ++
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 5 Data size: 585 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col2 (type: bigint), _col3 (type: bigint)
+            Execution mode: llap
+            LLAP IO: no inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+                notVectorizedReason: Aggregation Function expression for GROUPBY operator: DISTINCT not supported
+                vectorized: false
+        Reducer 2 
+            Execution mode: llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                notVectorizedReason: GROUPBY operator: DISTINCT not supported
+                vectorized: false
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(DISTINCT KEY._col1:0._col0), count(VALUE._col1), count(VALUE._col2), sum(DISTINCT KEY._col1:1._col0)
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                Statistics: Num rows: 5 Data size: 585 Basic stats: COMPLETE Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 5 Data size: 585 Basic stats: COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select key, count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t1@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select key, count(distinct key), count(1), count(key), sum(distinct key) from T1 group by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t1@ds=1
+#### A masked pattern was here ####
+0	1	3	3	0.0
+2	1	1	1	2.0
+4	1	1	1	4.0
+5	1	3	3	5.0
+8	1	1	1	8.0
+9	1	1	1	9.0
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+select count(distinct key+key) from T1
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+select count(distinct key+key) from T1
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE
+                  TableScan Vectorization:
+                      native: true
+                      vectorizationSchemaColumns: [0:key:string, 1:val:string, 2:ds:string, 3:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+                  Select Operator
+                    expressions: (UDFToDouble(key) + UDFToDouble(key)) (type: double)
+                    outputColumnNames: _col0
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumnNums: [6]
+                        selectExpressions: DoubleColAddDoubleColumn(col 4:double, col 5:double)(children: CastStringToDouble(col 0:string) -> 4:double, CastStringToDouble(col 0:string) -> 5:double) -> 6:double
+                    Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      Group By Vectorization:
+                          className: VectorGroupByOperator
+                          groupByMode: HASH
+                          keyExpressions: col 6:double
+                          native: false
+                          vectorProcessingMode: HASH
+                          projectedOutputColumnNums: []
+                      keys: _col0 (type: double)
+                      mode: hash
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: double)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: double)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkMultiKeyOperator
+                            keyColumnNums: [0]
+                            native: true
+                            nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                            valueColumnNums: []
+                        Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled]
+                featureSupportInUse: []
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0]
+                    dataColumns: key:string, val:string
+                    partitionColumnCount: 1
+                    partitionColumns: ds:string
+                    scratchColumnTypeNames: [double, double, double]
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: a
+                reduceColumnSortOrder: +
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 1
+                    dataColumns: KEY._col0:double
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+            Reduce Operator Tree:
+              Group By Operator
+                Group By Vectorization:
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    keyExpressions: col 0:double
+                    native: false
+                    vectorProcessingMode: MERGE_PARTIAL
+                    projectedOutputColumnNums: []
+                keys: KEY._col0 (type: double)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+                Group By Operator
+                  aggregations: count(_col0)
+                  Group By Vectorization:
+                      aggregators: VectorUDAFCount(col 0:double) -> bigint
+                      className: VectorGroupByOperator
+                      groupByMode: HASH
+                      native: false
+                      vectorProcessingMode: HASH
+                      projectedOutputColumnNums: [0]
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    sort order: 
+                    Reduce Sink Vectorization:
+                        className: VectorReduceSinkEmptyKeyOperator
+                        keyColumnNums: []
+                        native: true
+                        nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                        valueColumnNums: [0]
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                    value expressions: _col0 (type: bigint)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: 
+                reduceColumnSortOrder: 
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 1
+                    dataColumns: VALUE._col0:bigint
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                Group By Vectorization:
+                    aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    native: false
+                    vectorProcessingMode: GLOBAL
+                    projectedOutputColumnNums: [0]
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  File Sink Vectorization:
+                      className: VectorFileSinkOperator
+                      native: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(distinct key+key) from T1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t1@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct key+key) from T1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t1@ds=1
+#### A masked pattern was here ####
+6
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+select count(distinct 1) from T1
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+select count(distinct 1) from T1
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: COMPLETE
+                  TableScan Vectorization:
+                      native: true
+                      vectorizationSchemaColumns: [0:key:string, 1:val:string, 2:ds:string, 3:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+                  Select Operator
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumnNums: []
+                    Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      Group By Vectorization:
+                          className: VectorGroupByOperator
+                          groupByMode: HASH
+                          keyExpressions: ConstantVectorExpression(val 1) -> 4:int
+                          native: false
+                          vectorProcessingMode: HASH
+                          projectedOutputColumnNums: []
+                      keys: 1 (type: int)
+                      mode: hash
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkLongOperator
+                            keyColumnNums: [0]
+                            native: true
+                            nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                            valueColumnNums: []
+                        Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled]
+                featureSupportInUse: []
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: []
+                    dataColumns: key:string, val:string
+                    partitionColumnCount: 1
+                    partitionColumns: ds:string
+                    scratchColumnTypeNames: [bigint]
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: a
+                reduceColumnSortOrder: +
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 1
+                    dataColumns: KEY._col0:int
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+            Reduce Operator Tree:
+              Group By Operator
+                Group By Vectorization:
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    keyExpressions: col 0:int
+                    native: false
+                    vectorProcessingMode: MERGE_PARTIAL
+                    projectedOutputColumnNums: []
+                keys: KEY._col0 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  Select Vectorization:
+                      className: VectorSelectOperator
+                      native: true
+                      projectedOutputColumnNums: []
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+                  Group By Operator
+                    aggregations: count()
+                    Group By Vectorization:
+                        aggregators: VectorUDAFCountStar(*) -> bigint
+                        className: VectorGroupByOperator
+                        groupByMode: HASH
+                        native: false
+                        vectorProcessingMode: HASH
+                        projectedOutputColumnNums: [0]
+                    mode: hash
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      sort order: 
+                      Reduce Sink Vectorization:
+                          className: VectorReduceSinkEmptyKeyOperator
+                          keyColumnNums: []
+                          native: true
+                          nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                          valueColumnNums: [0]
+                      Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                      value expressions: _col0 (type: bigint)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: 
+                reduceColumnSortOrder: 
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 1
+                    dataColumns: VALUE._col0:bigint
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                Group By Vectorization:
+                    aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    native: false
+                    vectorProcessingMode: GLOBAL
+                    projectedOutputColumnNums: [0]
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  File Sink Vectorization:
+                      className: VectorFileSinkOperator
+                      native: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(distinct 1) from T1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t1@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct 1) from T1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t1@ds=1
+#### A masked pattern was here ####
+1
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+select count(distinct key) from T1
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+select count(distinct key) from T1
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE
+                  TableScan Vectorization:
+                      native: true
+                      vectorizationSchemaColumns: [0:key:string, 1:val:string, 2:ds:string, 3:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+                  Select Operator
+                    expressions: key (type: string)
+                    outputColumnNames: key
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumnNums: [0]
+                    Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: key (type: string)
+                      sort order: +
+                      Map-reduce partition columns: key (type: string)
+                      Reduce Sink Vectorization:
+                          className: VectorReduceSinkStringOperator
+                          keyColumnNums: [0]
+                          native: true
+                          nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                          valueColumnNums: []
+                      Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled]
+                featureSupportInUse: []
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0]
+                    dataColumns: key:string, val:string
+                    partitionColumnCount: 1
+                    partitionColumns: ds:string
+                    scratchColumnTypeNames: []
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: a
+                reduceColumnSortOrder: +
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 1
+                    dataColumns: KEY._col0:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+            Reduce Operator Tree:
+              Group By Operator
+                Group By Vectorization:
+                    className: VectorGroupByOperator
+                    groupByMode: COMPLETE
+                    keyExpressions: col 0:string
+                    native: false
+                    vectorProcessingMode: STREAMING
+                    projectedOutputColumnNums: []
+                keys: KEY._col0 (type: string)
+                mode: complete
+                outputColumnNames: _col0
+                Statistics: Num rows: 6 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  sort order: 
+                  Reduce Sink Vectorization:
+                      className: VectorReduceSinkEmptyKeyOperator
+                      keyColumnNums: []
+                      native: true
+                      nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                      valueColumnNums: [0]
+                  Statistics: Num rows: 6 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE
+                  value expressions: _col0 (type: string)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: 
+                reduceColumnSortOrder: 
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 1
+                    dataColumns: VALUE._col0:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                Group By Vectorization:
+                    aggregators: VectorUDAFCount(col 0:string) -> bigint
+                    className: VectorGroupByOperator
+                    groupByMode: COMPLETE
+                    native: false
+                    vectorProcessingMode: GLOBAL
+                    projectedOutputColumnNums: [0]
+                mode: complete
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  File Sink Vectorization:
+                      className: VectorFileSinkOperator
+                      native: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(distinct key) from T1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t1@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct key) from T1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t1@ds=1
+#### A masked pattern was here ####
+6

http://git-wip-us.apache.org/repos/asf/hive/blob/470a2f99/ql/src/test/results/clientpositive/llap/vector_groupby_sort_8.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_sort_8.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_sort_8.q.out
new file mode 100644
index 0000000..6c6986e
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vector_groupby_sort_8.q.out
@@ -0,0 +1,186 @@
+PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@T1
+POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string)
+CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@T1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1  PARTITION (ds='1')
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@t1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/bucket_files/000000_0' INTO TABLE T1  PARTITION (ds='1')
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@t1
+POSTHOOK: Output: default@t1@ds=1
+PREHOOK: query: INSERT OVERWRITE TABLE T1 PARTITION (ds='1') select key, val from T1 where ds = '1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t1@ds=1
+PREHOOK: Output: default@t1@ds=1
+POSTHOOK: query: INSERT OVERWRITE TABLE T1 PARTITION (ds='1') select key, val from T1 where ds = '1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t1@ds=1
+POSTHOOK: Output: default@t1@ds=1
+POSTHOOK: Lineage: t1 PARTITION(ds=1).key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: t1 PARTITION(ds=1).val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ]
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+select count(distinct key) from T1
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+select count(distinct key) from T1
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  Statistics: Num rows: 6 Data size: 1128 Basic stats: COMPLETE Column stats: NONE
+                  TableScan Vectorization:
+                      native: true
+                      vectorizationSchemaColumns: [0:key:string, 1:val:string, 2:ds:string, 3:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+                  Select Operator
+                    expressions: key (type: string)
+                    outputColumnNames: key
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumnNums: [0]
+                    Statistics: Num rows: 6 Data size: 1128 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      Group By Vectorization:
+                          className: VectorGroupByOperator
+                          groupByMode: FINAL
+                          keyExpressions: col 0:string
+                          native: false
+                          vectorProcessingMode: STREAMING
+                          projectedOutputColumnNums: []
+                      keys: key (type: string)
+                      mode: final
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE
+                      Group By Operator
+                        aggregations: count(_col0)
+                        Group By Vectorization:
+                            aggregators: VectorUDAFCount(col 0:string) -> bigint
+                            className: VectorGroupByOperator
+                            groupByMode: HASH
+                            native: false
+                            vectorProcessingMode: HASH
+                            projectedOutputColumnNums: [0]
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          sort order: 
+                          Reduce Sink Vectorization:
+                              className: VectorReduceSinkEmptyKeyOperator
+                              keyColumnNums: []
+                              native: true
+                              nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                              valueColumnNums: [0]
+                          Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col0 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled]
+                featureSupportInUse: []
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0]
+                    dataColumns: key:string, val:string
+                    partitionColumnCount: 1
+                    partitionColumns: ds:string
+                    scratchColumnTypeNames: []
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: 
+                reduceColumnSortOrder: 
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 1
+                    dataColumns: VALUE._col0:bigint
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                Group By Vectorization:
+                    aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    native: false
+                    vectorProcessingMode: GLOBAL
+                    projectedOutputColumnNums: [0]
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  File Sink Vectorization:
+                      className: VectorFileSinkOperator
+                      native: false
+                  Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(distinct key) from T1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t1@ds=1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(distinct key) from T1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t1@ds=1
+#### A masked pattern was here ####
+5
+PREHOOK: query: DROP TABLE T1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@t1
+PREHOOK: Output: default@t1
+POSTHOOK: query: DROP TABLE T1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: default@t1