You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2018/07/19 21:24:23 UTC
[10/13] hive git commit: HIVE-17896: TopNKey: Create a standalone vectorizable TopNKey operator (Teddy Choi, reviewed by Jesus Camacho Rodriguez)

http://git-wip-us.apache.org/repos/asf/hive/blob/cc294d32/ql/src/test/results/clientpositive/llap/vector_topnkey.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_topnkey.q.out b/ql/src/test/results/clientpositive/llap/vector_topnkey.q.out
new file mode 100644
index 0000000..16803c9
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vector_topnkey.q.out
@@ -0,0 +1,592 @@
+PREHOOK: query: explain vectorization detail
+SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+                  TableScan Vectorization:
+                      native: true
+                      vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+                  Select Operator
+                    expressions: key (type: string), UDFToInteger(substr(value, 5)) (type: int)
+                    outputColumnNames: _col0, _col1
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumnNums: [0, 4]
+                        selectExpressions: CastStringToLong(col 3:string)(children: StringSubstrColStart(col 1:string, start 4) -> 3:string) -> 4:int
+                    Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+                    Top N Key Operator
+                      sort order: +
+                      keys: _col0 (type: string)
+                      Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+                      top n: 5
+                      Top N Key Vectorization:
+                          className: VectorTopNKeyOperator
+                          keyExpressions: col 0:string
+                          native: true
+                      Group By Operator
+                        aggregations: sum(_col1)
+                        Group By Vectorization:
+                            aggregators: VectorUDAFSumLong(col 4:int) -> bigint
+                            className: VectorGroupByOperator
+                            groupByMode: HASH
+                            keyExpressions: col 0:string
+                            native: false
+                            vectorProcessingMode: HASH
+                            projectedOutputColumnNums: [0]
+                        keys: _col0 (type: string)
+                        mode: hash
+                        outputColumnNames: _col0, _col1
+                        Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: string)
+                          sort order: +
+                          Map-reduce partition columns: _col0 (type: string)
+                          Reduce Sink Vectorization:
+                              className: VectorReduceSinkStringOperator
+                              keyColumnNums: [0]
+                              native: true
+                              nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                              valueColumnNums: [1]
+                          Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
+                          TopN Hash Memory Usage: 0.1
+                          value expressions: _col1 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0, 1]
+                    dataColumns: key:string, value:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: [string, bigint]
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: a
+                reduceColumnSortOrder: +
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    dataColumns: KEY._col0:string, VALUE._col0:bigint
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: sum(VALUE._col0)
+                Group By Vectorization:
+                    aggregators: VectorUDAFSumLong(col 1:bigint) -> bigint
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    keyExpressions: col 0:string
+                    native: false
+                    vectorProcessingMode: MERGE_PARTIAL
+                    projectedOutputColumnNums: [0]
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Reduce Sink Vectorization:
+                      className: VectorReduceSinkObjectHashOperator
+                      keyColumnNums: [0]
+                      native: true
+                      nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                      valueColumnNums: [1]
+                  Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
+                  TopN Hash Memory Usage: 0.1
+                  value expressions: _col1 (type: bigint)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: a
+                reduceColumnSortOrder: +
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    dataColumns: KEY.reducesinkkey0:string, VALUE._col0:bigint
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint)
+                outputColumnNames: _col0, _col1
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumnNums: [0, 1]
+                Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
+                Limit
+                  Number of rows: 5
+                  Limit Vectorization:
+                      className: VectorLimitOperator
+                      native: true
+                  Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    File Sink Vectorization:
+                        className: VectorFileSinkOperator
+                        native: false
+                    Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 5
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0	0
+10	10
+100	200
+103	206
+104	208
+PREHOOK: query: explain vectorization detail
+SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+                  TableScan Vectorization:
+                      native: true
+                      vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+                  Select Operator
+                    expressions: key (type: string)
+                    outputColumnNames: key
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumnNums: [0]
+                    Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+                    Top N Key Operator
+                      sort order: +
+                      keys: key (type: string)
+                      Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+                      top n: 5
+                      Top N Key Vectorization:
+                          className: VectorTopNKeyOperator
+                          keyExpressions: col 0:string
+                          native: true
+                      Group By Operator
+                        Group By Vectorization:
+                            className: VectorGroupByOperator
+                            groupByMode: HASH
+                            keyExpressions: col 0:string
+                            native: false
+                            vectorProcessingMode: HASH
+                            projectedOutputColumnNums: []
+                        keys: key (type: string)
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: string)
+                          sort order: +
+                          Map-reduce partition columns: _col0 (type: string)
+                          Reduce Sink Vectorization:
+                              className: VectorReduceSinkStringOperator
+                              keyColumnNums: [0]
+                              native: true
+                              nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                              valueColumnNums: []
+                          Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE
+                          TopN Hash Memory Usage: 0.1
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0]
+                    dataColumns: key:string, value:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: a
+                reduceColumnSortOrder: +
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 1
+                    dataColumns: KEY._col0:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+            Reduce Operator Tree:
+              Group By Operator
+                Group By Vectorization:
+                    className: VectorGroupByOperator
+                    groupByMode: MERGEPARTIAL
+                    keyExpressions: col 0:string
+                    native: false
+                    vectorProcessingMode: MERGE_PARTIAL
+                    projectedOutputColumnNums: []
+                keys: KEY._col0 (type: string)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Reduce Sink Vectorization:
+                      className: VectorReduceSinkObjectHashOperator
+                      keyColumnNums: [0]
+                      native: true
+                      nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                      valueColumnNums: []
+                  Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE
+                  TopN Hash Memory Usage: 0.1
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: a
+                reduceColumnSortOrder: +
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 1
+                    dataColumns: KEY.reducesinkkey0:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: string)
+                outputColumnNames: _col0
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumnNums: [0]
+                Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE
+                Limit
+                  Number of rows: 5
+                  Limit Vectorization:
+                      className: VectorLimitOperator
+                      native: true
+                  Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    File Sink Vectorization:
+                        className: VectorFileSinkOperator
+                        native: false
+                    Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 5
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0
+10
+100
+103
+104
+PREHOOK: query: explain vectorization detail
+SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src1
+                  filterExpr: key is not null (type: boolean)
+                  Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+                  TableScan Vectorization:
+                      native: true
+                      vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+                  Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: SelectColumnIsNotNull(col 0:string)
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: key (type: string)
+                      outputColumnNames: _col0
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumnNums: [0]
+                      Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkStringOperator
+                            keyColumnNums: [0]
+                            native: true
+                            nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                            valueColumnNums: []
+                        Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0]
+                    dataColumns: key:string, value:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: src2
+                  filterExpr: key is not null (type: boolean)
+                  Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+                  TableScan Vectorization:
+                      native: true
+                      vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+                  Filter Operator
+                    Filter Vectorization:
+                        className: VectorFilterOperator
+                        native: true
+                        predicateExpression: SelectColumnIsNotNull(col 0:string)
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: key (type: string), value (type: string)
+                      outputColumnNames: _col0, _col1
+                      Select Vectorization:
+                          className: VectorSelectOperator
+                          native: true
+                          projectedOutputColumnNums: [0, 1]
+                      Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkStringOperator
+                            keyColumnNums: [0]
+                            native: true
+                            nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                            valueColumnNums: [1]
+                        Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+                        value expressions: _col1 (type: string)
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    includeColumns: [0, 1]
+                    dataColumns: key:string, value:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                outputColumnNames: _col0, _col2
+                Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col0 (type: string), _col2 (type: string)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE
+                  Reduce Output Operator
+                    key expressions: _col0 (type: string)
+                    sort order: +
+                    Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE
+                    TopN Hash Memory Usage: 0.1
+                    value expressions: _col1 (type: string)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                reduceColumnNullOrder: a
+                reduceColumnSortOrder: +
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+                rowBatchContext:
+                    dataColumnCount: 2
+                    dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string
+                    partitionColumnCount: 0
+                    scratchColumnTypeNames: []
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string)
+                outputColumnNames: _col0, _col1
+                Select Vectorization:
+                    className: VectorSelectOperator
+                    native: true
+                    projectedOutputColumnNums: [0, 1]
+                Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE
+                Limit
+                  Number of rows: 5
+                  Limit Vectorization:
+                      className: VectorLimitOperator
+                      native: true
+                  Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    File Sink Vectorization:
+                        className: VectorFileSinkOperator
+                        native: false
+                    Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: 5
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0	val_0
+0	val_0
+0	val_0
+0	val_0
+0	val_0

http://git-wip-us.apache.org/repos/asf/hive/blob/cc294d32/ql/src/test/results/clientpositive/llap/vectorization_limit.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorization_limit.q.out b/ql/src/test/results/clientpositive/llap/vectorization_limit.q.out
index ded40fd..6848331 100644
--- a/ql/src/test/results/clientpositive/llap/vectorization_limit.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorization_limit.q.out
@@ -489,31 +489,40 @@ STAGE PLANS:
                         native: true
                         projectedOutputColumnNums: [0]
                     Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE
-                    Group By Operator
-                      Group By Vectorization:
-                          className: VectorGroupByOperator
-                          groupByMode: HASH
-                          keyExpressions: col 0:tinyint
-                          native: false
-                          vectorProcessingMode: HASH
-                          projectedOutputColumnNums: []
+                    Top N Key Operator
+                      sort order: +
                       keys: ctinyint (type: tinyint)
-                      mode: hash
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 131 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: tinyint)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: tinyint)
-                        Reduce Sink Vectorization:
-                            className: VectorReduceSinkObjectHashOperator
-                            keyColumnNums: [0]
-                            native: true
-                            nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                            partitionColumnNums: [0]
-                            valueColumnNums: []
-                        Statistics: Num rows: 131 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE
-                        TopN Hash Memory Usage: 0.3
+                      Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE
+                      top n: 20
+                      Top N Key Vectorization:
+                          className: VectorTopNKeyOperator
+                          keyExpressions: col 0:tinyint
+                          native: true
+                      Group By Operator
+                        Group By Vectorization:
+                            className: VectorGroupByOperator
+                            groupByMode: HASH
+                            keyExpressions: col 0:tinyint
+                            native: false
+                            vectorProcessingMode: HASH
+                            projectedOutputColumnNums: []
+                        keys: ctinyint (type: tinyint)
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 131 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: tinyint)
+                          sort order: +
+                          Map-reduce partition columns: _col0 (type: tinyint)
+                          Reduce Sink Vectorization:
+                              className: VectorReduceSinkObjectHashOperator
+                              keyColumnNums: [0]
+                              native: true
+                              nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                              partitionColumnNums: [0]
+                              valueColumnNums: []
+                          Statistics: Num rows: 131 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE
+                          TopN Hash Memory Usage: 0.3
             Execution mode: vectorized, llap
             LLAP IO: all inputs
             Map Vectorization:
@@ -558,19 +567,19 @@ STAGE PLANS:
                 keys: KEY._col0 (type: tinyint)
                 mode: mergepartial
                 outputColumnNames: _col0
-                Statistics: Num rows: 131 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 131 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE
                 Limit
                   Number of rows: 20
                   Limit Vectorization:
                       className: VectorLimitOperator
                       native: true
-                  Statistics: Num rows: 20 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 20 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE
                   File Output Operator
                     compressed: false
                     File Sink Vectorization:
                         className: VectorFileSinkOperator
                         native: false
-                    Statistics: Num rows: 20 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 20 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE
                     table:
                         input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/cc294d32/ql/src/test/results/clientpositive/perf/tez/query10.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query10.q.out b/ql/src/test/results/clientpositive/perf/tez/query10.q.out
index a88b7e1..a537bd1 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query10.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query10.q.out
@@ -133,144 +133,146 @@ Stage-0
     limit:100
     Stage-1
       Reducer 6 vectorized
-      File Output Operator [FS_143]
-        Limit [LIM_142] (rows=100 width=88)
+      File Output Operator [FS_144]
+        Limit [LIM_143] (rows=100 width=88)
           Number of rows:100
-          Select Operator [SEL_141] (rows=1045432122 width=88)
+          Select Operator [SEL_142] (rows=1045432122 width=88)
             Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"]
           <-Reducer 5 [SIMPLE_EDGE] vectorized
-            SHUFFLE [RS_140]
-              Select Operator [SEL_139] (rows=1045432122 width=88)
+            SHUFFLE [RS_141]
+              Select Operator [SEL_140] (rows=1045432122 width=88)
                 Output:["_col0","_col1","_col2","_col3","_col4","_col6","_col8","_col10","_col12"]
-                Group By Operator [GBY_138] (rows=1045432122 width=88)
+                Group By Operator [GBY_139] (rows=1045432122 width=88)
                   Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7
                 <-Reducer 4 [SIMPLE_EDGE]
                   SHUFFLE [RS_63]
                     PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
                     Group By Operator [GBY_62] (rows=2090864244 width=88)
                       Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["count()"],keys:_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
-                      Select Operator [SEL_61] (rows=2090864244 width=88)
-                        Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"]
-                        Filter Operator [FIL_60] (rows=2090864244 width=88)
-                          predicate:(_col15 is not null or _col17 is not null)
-                          Merge Join Operator [MERGEJOIN_108] (rows=2090864244 width=88)
-                            Conds:RS_55._col0=RS_56._col0(Left Semi),RS_55._col0=RS_131._col0(Left Outer),RS_55._col0=RS_137._col0(Left Outer),Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col15","_col17"]
-                          <-Reducer 10 [SIMPLE_EDGE]
-                            SHUFFLE [RS_56]
-                              PartitionCols:_col0
-                              Group By Operator [GBY_54] (rows=633595212 width=88)
-                                Output:["_col0"],keys:_col0
-                                Select Operator [SEL_18] (rows=633595212 width=88)
-                                  Output:["_col0"]
-                                  Merge Join Operator [MERGEJOIN_105] (rows=633595212 width=88)
-                                    Conds:RS_120._col0=RS_123._col0(Inner),Output:["_col1"]
-                                  <-Map 11 [SIMPLE_EDGE] vectorized
-                                    SHUFFLE [RS_123]
+                      Top N Key Operator [TNK_103] (rows=2090864244 width=88)
+                        keys:_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13,sort order:++++++++,top n:100
+                        Select Operator [SEL_61] (rows=2090864244 width=88)
+                          Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"]
+                          Filter Operator [FIL_60] (rows=2090864244 width=88)
+                            predicate:(_col15 is not null or _col17 is not null)
+                            Merge Join Operator [MERGEJOIN_109] (rows=2090864244 width=88)
+                              Conds:RS_55._col0=RS_56._col0(Left Semi),RS_55._col0=RS_132._col0(Left Outer),RS_55._col0=RS_138._col0(Left Outer),Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col15","_col17"]
+                            <-Reducer 10 [SIMPLE_EDGE]
+                              SHUFFLE [RS_56]
+                                PartitionCols:_col0
+                                Group By Operator [GBY_54] (rows=633595212 width=88)
+                                  Output:["_col0"],keys:_col0
+                                  Select Operator [SEL_18] (rows=633595212 width=88)
+                                    Output:["_col0"]
+                                    Merge Join Operator [MERGEJOIN_106] (rows=633595212 width=88)
+                                      Conds:RS_121._col0=RS_124._col0(Inner),Output:["_col1"]
+                                    <-Map 11 [SIMPLE_EDGE] vectorized
+                                      SHUFFLE [RS_124]
+                                        PartitionCols:_col0
+                                        Select Operator [SEL_123] (rows=4058 width=1119)
+                                          Output:["_col0"]
+                                          Filter Operator [FIL_122] (rows=4058 width=1119)
+                                            predicate:((d_year = 2002) and d_date_sk is not null and d_moy BETWEEN 4 AND 7)
+                                            TableScan [TS_12] (rows=73049 width=1119)
+                                              default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"]
+                                    <-Map 9 [SIMPLE_EDGE] vectorized
+                                      SHUFFLE [RS_121]
+                                        PartitionCols:_col0
+                                        Select Operator [SEL_120] (rows=575995635 width=88)
+                                          Output:["_col0","_col1"]
+                                          Filter Operator [FIL_119] (rows=575995635 width=88)
+                                            predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null)
+                                            TableScan [TS_9] (rows=575995635 width=88)
+                                              default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"]
+                            <-Reducer 13 [ONE_TO_ONE_EDGE] vectorized
+                              FORWARD [RS_132]
+                                PartitionCols:_col0
+                                Select Operator [SEL_131] (rows=79201469 width=135)
+                                  Output:["_col0","_col1"]
+                                  Group By Operator [GBY_130] (rows=79201469 width=135)
+                                    Output:["_col0"],keys:KEY._col0
+                                  <-Reducer 12 [SIMPLE_EDGE]
+                                    SHUFFLE [RS_30]
                                       PartitionCols:_col0
-                                      Select Operator [SEL_122] (rows=4058 width=1119)
-                                        Output:["_col0"]
-                                        Filter Operator [FIL_121] (rows=4058 width=1119)
-                                          predicate:((d_year = 2002) and d_date_sk is not null and d_moy BETWEEN 4 AND 7)
-                                          TableScan [TS_12] (rows=73049 width=1119)
-                                            default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"]
-                                  <-Map 9 [SIMPLE_EDGE] vectorized
-                                    SHUFFLE [RS_120]
+                                      Group By Operator [GBY_29] (rows=158402938 width=135)
+                                        Output:["_col0"],keys:_col1
+                                        Merge Join Operator [MERGEJOIN_107] (rows=158402938 width=135)
+                                          Conds:RS_129._col0=RS_125._col0(Inner),Output:["_col1"]
+                                        <-Map 11 [SIMPLE_EDGE] vectorized
+                                          SHUFFLE [RS_125]
+                                            PartitionCols:_col0
+                                             Please refer to the previous Select Operator [SEL_123]
+                                        <-Map 16 [SIMPLE_EDGE] vectorized
+                                          SHUFFLE [RS_129]
+                                            PartitionCols:_col0
+                                            Select Operator [SEL_128] (rows=144002668 width=135)
+                                              Output:["_col0","_col1"]
+                                              Filter Operator [FIL_127] (rows=144002668 width=135)
+                                                predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null)
+                                                TableScan [TS_19] (rows=144002668 width=135)
+                                                  default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_customer_sk"]
+                            <-Reducer 15 [ONE_TO_ONE_EDGE] vectorized
+                              FORWARD [RS_138]
+                                PartitionCols:_col0
+                                Select Operator [SEL_137] (rows=158394413 width=135)
+                                  Output:["_col0","_col1"]
+                                  Group By Operator [GBY_136] (rows=158394413 width=135)
+                                    Output:["_col0"],keys:KEY._col0
+                                  <-Reducer 14 [SIMPLE_EDGE]
+                                    SHUFFLE [RS_44]
                                       PartitionCols:_col0
-                                      Select Operator [SEL_119] (rows=575995635 width=88)
-                                        Output:["_col0","_col1"]
-                                        Filter Operator [FIL_118] (rows=575995635 width=88)
-                                          predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null)
-                                          TableScan [TS_9] (rows=575995635 width=88)
-                                            default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"]
-                          <-Reducer 13 [ONE_TO_ONE_EDGE] vectorized
-                            FORWARD [RS_131]
-                              PartitionCols:_col0
-                              Select Operator [SEL_130] (rows=79201469 width=135)
-                                Output:["_col0","_col1"]
-                                Group By Operator [GBY_129] (rows=79201469 width=135)
-                                  Output:["_col0"],keys:KEY._col0
-                                <-Reducer 12 [SIMPLE_EDGE]
-                                  SHUFFLE [RS_30]
+                                      Group By Operator [GBY_43] (rows=316788826 width=135)
+                                        Output:["_col0"],keys:_col1
+                                        Merge Join Operator [MERGEJOIN_108] (rows=316788826 width=135)
+                                          Conds:RS_135._col0=RS_126._col0(Inner),Output:["_col1"]
+                                        <-Map 11 [SIMPLE_EDGE] vectorized
+                                          SHUFFLE [RS_126]
+                                            PartitionCols:_col0
+                                             Please refer to the previous Select Operator [SEL_123]
+                                        <-Map 17 [SIMPLE_EDGE] vectorized
+                                          SHUFFLE [RS_135]
+                                            PartitionCols:_col0
+                                            Select Operator [SEL_134] (rows=287989836 width=135)
+                                              Output:["_col0","_col1"]
+                                              Filter Operator [FIL_133] (rows=287989836 width=135)
+                                                predicate:(cs_ship_customer_sk is not null and cs_sold_date_sk is not null)
+                                                TableScan [TS_33] (rows=287989836 width=135)
+                                                  default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_ship_customer_sk"]
+                            <-Reducer 3 [SIMPLE_EDGE]
+                              SHUFFLE [RS_55]
+                                PartitionCols:_col0
+                                Merge Join Operator [MERGEJOIN_105] (rows=96800003 width=860)
+                                  Conds:RS_50._col1=RS_118._col0(Inner),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"]
+                                <-Map 8 [SIMPLE_EDGE] vectorized
+                                  SHUFFLE [RS_118]
                                     PartitionCols:_col0
-                                    Group By Operator [GBY_29] (rows=158402938 width=135)
-                                      Output:["_col0"],keys:_col1
-                                      Merge Join Operator [MERGEJOIN_106] (rows=158402938 width=135)
-                                        Conds:RS_128._col0=RS_124._col0(Inner),Output:["_col1"]
-                                      <-Map 11 [SIMPLE_EDGE] vectorized
-                                        SHUFFLE [RS_124]
-                                          PartitionCols:_col0
-                                           Please refer to the previous Select Operator [SEL_122]
-                                      <-Map 16 [SIMPLE_EDGE] vectorized
-                                        SHUFFLE [RS_128]
-                                          PartitionCols:_col0
-                                          Select Operator [SEL_127] (rows=144002668 width=135)
-                                            Output:["_col0","_col1"]
-                                            Filter Operator [FIL_126] (rows=144002668 width=135)
-                                              predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null)
-                                              TableScan [TS_19] (rows=144002668 width=135)
-                                                default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_customer_sk"]
-                          <-Reducer 15 [ONE_TO_ONE_EDGE] vectorized
-                            FORWARD [RS_137]
-                              PartitionCols:_col0
-                              Select Operator [SEL_136] (rows=158394413 width=135)
-                                Output:["_col0","_col1"]
-                                Group By Operator [GBY_135] (rows=158394413 width=135)
-                                  Output:["_col0"],keys:KEY._col0
-                                <-Reducer 14 [SIMPLE_EDGE]
-                                  SHUFFLE [RS_44]
-                                    PartitionCols:_col0
-                                    Group By Operator [GBY_43] (rows=316788826 width=135)
-                                      Output:["_col0"],keys:_col1
-                                      Merge Join Operator [MERGEJOIN_107] (rows=316788826 width=135)
-                                        Conds:RS_134._col0=RS_125._col0(Inner),Output:["_col1"]
-                                      <-Map 11 [SIMPLE_EDGE] vectorized
-                                        SHUFFLE [RS_125]
-                                          PartitionCols:_col0
-                                           Please refer to the previous Select Operator [SEL_122]
-                                      <-Map 17 [SIMPLE_EDGE] vectorized
-                                        SHUFFLE [RS_134]
-                                          PartitionCols:_col0
-                                          Select Operator [SEL_133] (rows=287989836 width=135)
-                                            Output:["_col0","_col1"]
-                                            Filter Operator [FIL_132] (rows=287989836 width=135)
-                                              predicate:(cs_ship_customer_sk is not null and cs_sold_date_sk is not null)
-                                              TableScan [TS_33] (rows=287989836 width=135)
-                                                default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_ship_customer_sk"]
-                          <-Reducer 3 [SIMPLE_EDGE]
-                            SHUFFLE [RS_55]
-                              PartitionCols:_col0
-                              Merge Join Operator [MERGEJOIN_104] (rows=96800003 width=860)
-                                Conds:RS_50._col1=RS_117._col0(Inner),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"]
-                              <-Map 8 [SIMPLE_EDGE] vectorized
-                                SHUFFLE [RS_117]
-                                  PartitionCols:_col0
-                                  Select Operator [SEL_116] (rows=1861800 width=385)
-                                    Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"]
-                                    Filter Operator [FIL_115] (rows=1861800 width=385)
-                                      predicate:cd_demo_sk is not null
-                                      TableScan [TS_6] (rows=1861800 width=385)
-                                        default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status","cd_purchase_estimate","cd_credit_rating","cd_dep_count","cd_dep_employed_count","cd_dep_college_count"]
-                              <-Reducer 2 [SIMPLE_EDGE]
-                                SHUFFLE [RS_50]
-                                  PartitionCols:_col1
-                                  Merge Join Operator [MERGEJOIN_103] (rows=88000001 width=860)
-                                    Conds:RS_111._col2=RS_114._col0(Inner),Output:["_col0","_col1"]
-                                  <-Map 1 [SIMPLE_EDGE] vectorized
-                                    SHUFFLE [RS_111]
-                                      PartitionCols:_col2
-                                      Select Operator [SEL_110] (rows=80000000 width=860)
-                                        Output:["_col0","_col1","_col2"]
-                                        Filter Operator [FIL_109] (rows=80000000 width=860)
-                                          predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_customer_sk is not null)
-                                          TableScan [TS_0] (rows=80000000 width=860)
-                                            default@customer,c,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"]
-                                  <-Map 7 [SIMPLE_EDGE] vectorized
-                                    SHUFFLE [RS_114]
-                                      PartitionCols:_col0
-                                      Select Operator [SEL_113] (rows=20000000 width=1014)
-                                        Output:["_col0"]
-                                        Filter Operator [FIL_112] (rows=20000000 width=1014)
-                                          predicate:((ca_county) IN ('Walker County', 'Richland County', 'Gaines County', 'Douglas County', 'Dona Ana County') and ca_address_sk is not null)
-                                          TableScan [TS_3] (rows=40000000 width=1014)
-                                            default@customer_address,ca,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_county"]
+                                    Select Operator [SEL_117] (rows=1861800 width=385)
+                                      Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"]
+                                      Filter Operator [FIL_116] (rows=1861800 width=385)
+                                        predicate:cd_demo_sk is not null
+                                        TableScan [TS_6] (rows=1861800 width=385)
+                                          default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status","cd_purchase_estimate","cd_credit_rating","cd_dep_count","cd_dep_employed_count","cd_dep_college_count"]
+                                <-Reducer 2 [SIMPLE_EDGE]
+                                  SHUFFLE [RS_50]
+                                    PartitionCols:_col1
+                                    Merge Join Operator [MERGEJOIN_104] (rows=88000001 width=860)
+                                      Conds:RS_112._col2=RS_115._col0(Inner),Output:["_col0","_col1"]
+                                    <-Map 1 [SIMPLE_EDGE] vectorized
+                                      SHUFFLE [RS_112]
+                                        PartitionCols:_col2
+                                        Select Operator [SEL_111] (rows=80000000 width=860)
+                                          Output:["_col0","_col1","_col2"]
+                                          Filter Operator [FIL_110] (rows=80000000 width=860)
+                                            predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_customer_sk is not null)
+                                            TableScan [TS_0] (rows=80000000 width=860)
+                                              default@customer,c,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"]
+                                    <-Map 7 [SIMPLE_EDGE] vectorized
+                                      SHUFFLE [RS_115]
+                                        PartitionCols:_col0
+                                        Select Operator [SEL_114] (rows=20000000 width=1014)
+                                          Output:["_col0"]
+                                          Filter Operator [FIL_113] (rows=20000000 width=1014)
+                                            predicate:((ca_county) IN ('Walker County', 'Richland County', 'Gaines County', 'Douglas County', 'Dona Ana County') and ca_address_sk is not null)
+                                            TableScan [TS_3] (rows=40000000 width=1014)
+                                              default@customer_address,ca,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_county"]