You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2018/07/19 20:59:53 UTC
[15/18] hive git commit: HIVE-17896: TopNKey: Create a standalone
vectorizable TopNKey operator (Teddy Choi,
reviewed by Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/851c8aba/ql/src/test/results/clientpositive/llap/vector_topnkey.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_topnkey.q.out b/ql/src/test/results/clientpositive/llap/vector_topnkey.q.out
new file mode 100644
index 0000000..16803c9
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vector_topnkey.q.out
@@ -0,0 +1,592 @@
+PREHOOK: query: explain vectorization detail
+SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+ Select Operator
+ expressions: key (type: string), UDFToInteger(substr(value, 5)) (type: int)
+ outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 4]
+ selectExpressions: CastStringToLong(col 3:string)(children: StringSubstrColStart(col 1:string, start 4) -> 3:string) -> 4:int
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Top N Key Operator
+ sort order: +
+ keys: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ top n: 5
+ Top N Key Vectorization:
+ className: VectorTopNKeyOperator
+ keyExpressions: col 0:string
+ native: true
+ Group By Operator
+ aggregations: sum(_col1)
+ Group By Vectorization:
+ aggregators: VectorUDAFSumLong(col 4:int) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 0:string
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: [0]
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkStringOperator
+ keyColumnNums: [0]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: [1]
+ Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: key:string, value:string
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [string, bigint]
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: a
+ reduceColumnSortOrder: +
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ dataColumns: KEY._col0:string, VALUE._col0:bigint
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFSumLong(col 1:bigint) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ keyExpressions: col 0:string
+ native: false
+ vectorProcessingMode: MERGE_PARTIAL
+ projectedOutputColumnNums: [0]
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumnNums: [0]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: [1]
+ Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col1 (type: bigint)
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: a
+ reduceColumnSortOrder: +
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ dataColumns: KEY.reducesinkkey0:string, VALUE._col0:bigint
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1]
+ Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 5
+ Limit Vectorization:
+ className: VectorLimitOperator
+ native: true
+ Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 5
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 0
+10 10
+100 200
+103 206
+104 208
+PREHOOK: query: explain vectorization detail
+SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0]
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ Top N Key Operator
+ sort order: +
+ keys: key (type: string)
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ top n: 5
+ Top N Key Vectorization:
+ className: VectorTopNKeyOperator
+ keyExpressions: col 0:string
+ native: true
+ Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 0:string
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: []
+ keys: key (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkStringOperator
+ keyColumnNums: [0]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: []
+ Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE
+ TopN Hash Memory Usage: 0.1
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0]
+ dataColumns: key:string, value:string
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: a
+ reduceColumnSortOrder: +
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 1
+ dataColumns: KEY._col0:string
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reduce Operator Tree:
+ Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ keyExpressions: col 0:string
+ native: false
+ vectorProcessingMode: MERGE_PARTIAL
+ projectedOutputColumnNums: []
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumnNums: [0]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: []
+ Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE
+ TopN Hash Memory Usage: 0.1
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: a
+ reduceColumnSortOrder: +
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 1
+ dataColumns: KEY.reducesinkkey0:string
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0]
+ Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 5
+ Limit Vectorization:
+ className: VectorLimitOperator
+ native: true
+ Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 5
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0
+10
+100
+103
+104
+PREHOOK: query: explain vectorization detail
+SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ filterExpr: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+ Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: SelectColumnIsNotNull(col 0:string)
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0]
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkStringOperator
+ keyColumnNums: [0]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: []
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0]
+ dataColumns: key:string, value:string
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: src2
+ filterExpr: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+ Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: SelectColumnIsNotNull(col 0:string)
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1]
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkStringOperator
+ keyColumnNums: [0]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: [1]
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: string)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: key:string, value:string
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), _col2 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col1 (type: string)
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ reduceColumnNullOrder: a
+ reduceColumnSortOrder: +
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string)
+ outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1]
+ Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 5
+ Limit Vectorization:
+ className: VectorLimitOperator
+ native: true
+ Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 5
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+0 val_0
http://git-wip-us.apache.org/repos/asf/hive/blob/851c8aba/ql/src/test/results/clientpositive/llap/vectorization_limit.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorization_limit.q.out b/ql/src/test/results/clientpositive/llap/vectorization_limit.q.out
index e79cdf7..f7c00f8 100644
--- a/ql/src/test/results/clientpositive/llap/vectorization_limit.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorization_limit.q.out
@@ -491,31 +491,40 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [0]
Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- Group By Vectorization:
- className: VectorGroupByOperator
- groupByMode: HASH
- keyExpressions: col 0:tinyint
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: []
+ Top N Key Operator
+ sort order: +
keys: ctinyint (type: tinyint)
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 131 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: tinyint)
- sort order: +
- Map-reduce partition columns: _col0 (type: tinyint)
- Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
- keyColumnNums: [0]
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- partitionColumnNums: [0]
- valueColumnNums: []
- Statistics: Num rows: 131 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE
- TopN Hash Memory Usage: 0.3
+ Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE
+ top n: 20
+ Top N Key Vectorization:
+ className: VectorTopNKeyOperator
+ keyExpressions: col 0:tinyint
+ native: true
+ Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 0:tinyint
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: []
+ keys: ctinyint (type: tinyint)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 131 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: tinyint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: tinyint)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumnNums: [0]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ partitionColumnNums: [0]
+ valueColumnNums: []
+ Statistics: Num rows: 131 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE
+ TopN Hash Memory Usage: 0.3
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
@@ -560,19 +569,19 @@ STAGE PLANS:
keys: KEY._col0 (type: tinyint)
mode: mergepartial
outputColumnNames: _col0
- Statistics: Num rows: 131 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 131 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 20
Limit Vectorization:
className: VectorLimitOperator
native: true
- Statistics: Num rows: 20 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 20 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Statistics: Num rows: 20 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 20 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/851c8aba/ql/src/test/results/clientpositive/perf/tez/query10.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query10.q.out b/ql/src/test/results/clientpositive/perf/tez/query10.q.out
index 1b6adee..a8f097f 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query10.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query10.q.out
@@ -140,188 +140,190 @@ Stage-0
limit:100
Stage-1
Reducer 6 vectorized
- File Output Operator [FS_224]
- Limit [LIM_223] (rows=100 width=88)
+ File Output Operator [FS_225]
+ Limit [LIM_224] (rows=100 width=88)
Number of rows:100
- Select Operator [SEL_222] (rows=1045432122 width=88)
+ Select Operator [SEL_223] (rows=1045432122 width=88)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"]
<-Reducer 5 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_221]
- Select Operator [SEL_220] (rows=1045432122 width=88)
+ SHUFFLE [RS_222]
+ Select Operator [SEL_221] (rows=1045432122 width=88)
Output:["_col0","_col1","_col2","_col3","_col4","_col6","_col8","_col10","_col12"]
- Group By Operator [GBY_219] (rows=1045432122 width=88)
+ Group By Operator [GBY_220] (rows=1045432122 width=88)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7
<-Reducer 4 [SIMPLE_EDGE]
SHUFFLE [RS_63]
PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
Group By Operator [GBY_62] (rows=2090864244 width=88)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["count()"],keys:_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
- Select Operator [SEL_61] (rows=2090864244 width=88)
- Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"]
- Filter Operator [FIL_60] (rows=2090864244 width=88)
- predicate:(_col15 is not null or _col17 is not null)
- Merge Join Operator [MERGEJOIN_172] (rows=2090864244 width=88)
- Conds:RS_55._col0=RS_56._col0(Left Semi),RS_55._col0=RS_210._col0(Left Outer),RS_55._col0=RS_218._col0(Left Outer),Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col15","_col17"]
- <-Reducer 3 [SIMPLE_EDGE]
- PARTITION_ONLY_SHUFFLE [RS_55]
- PartitionCols:_col0
- Merge Join Operator [MERGEJOIN_168] (rows=96800003 width=860)
- Conds:RS_50._col1=RS_181._col0(Inner),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"]
- <-Map 9 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_181]
- PartitionCols:_col0
- Select Operator [SEL_180] (rows=1861800 width=385)
- Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"]
- Filter Operator [FIL_179] (rows=1861800 width=385)
- predicate:cd_demo_sk is not null
- TableScan [TS_6] (rows=1861800 width=385)
- default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status","cd_purchase_estimate","cd_credit_rating","cd_dep_count","cd_dep_employed_count","cd_dep_college_count"]
- <-Reducer 2 [SIMPLE_EDGE]
- SHUFFLE [RS_50]
- PartitionCols:_col1
- Merge Join Operator [MERGEJOIN_167] (rows=88000001 width=860)
- Conds:RS_175._col2=RS_178._col0(Inner),Output:["_col0","_col1"]
- <-Map 1 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_175]
- PartitionCols:_col2
- Select Operator [SEL_174] (rows=80000000 width=860)
- Output:["_col0","_col1","_col2"]
- Filter Operator [FIL_173] (rows=80000000 width=860)
- predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_customer_sk is not null)
- TableScan [TS_0] (rows=80000000 width=860)
- default@customer,c,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"]
- <-Map 8 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_178]
- PartitionCols:_col0
- Select Operator [SEL_177] (rows=20000000 width=1014)
- Output:["_col0"]
- Filter Operator [FIL_176] (rows=20000000 width=1014)
- predicate:((ca_county) IN ('Walker County', 'Richland County', 'Gaines County', 'Douglas County', 'Dona Ana County') and ca_address_sk is not null)
- TableScan [TS_3] (rows=40000000 width=1014)
- default@customer_address,ca,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_county"]
- <-Reducer 11 [SIMPLE_EDGE]
- SHUFFLE [RS_56]
- PartitionCols:_col0
- Group By Operator [GBY_54] (rows=633595212 width=88)
- Output:["_col0"],keys:_col0
- Select Operator [SEL_18] (rows=633595212 width=88)
- Output:["_col0"]
- Merge Join Operator [MERGEJOIN_169] (rows=633595212 width=88)
- Conds:RS_202._col0=RS_184._col0(Inner),Output:["_col1"]
- <-Map 12 [SIMPLE_EDGE] vectorized
- PARTITION_ONLY_SHUFFLE [RS_184]
+ Top N Key Operator [TNK_103] (rows=2090864244 width=88)
+ keys:_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13,sort order:++++++++,top n:100
+ Select Operator [SEL_61] (rows=2090864244 width=88)
+ Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"]
+ Filter Operator [FIL_60] (rows=2090864244 width=88)
+ predicate:(_col15 is not null or _col17 is not null)
+ Merge Join Operator [MERGEJOIN_173] (rows=2090864244 width=88)
+ Conds:RS_55._col0=RS_56._col0(Left Semi),RS_55._col0=RS_211._col0(Left Outer),RS_55._col0=RS_219._col0(Left Outer),Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col15","_col17"]
+ <-Reducer 3 [SIMPLE_EDGE]
+ PARTITION_ONLY_SHUFFLE [RS_55]
+ PartitionCols:_col0
+ Merge Join Operator [MERGEJOIN_169] (rows=96800003 width=860)
+ Conds:RS_50._col1=RS_182._col0(Inner),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"]
+ <-Map 9 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_182]
+ PartitionCols:_col0
+ Select Operator [SEL_181] (rows=1861800 width=385)
+ Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"]
+ Filter Operator [FIL_180] (rows=1861800 width=385)
+ predicate:cd_demo_sk is not null
+ TableScan [TS_6] (rows=1861800 width=385)
+ default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status","cd_purchase_estimate","cd_credit_rating","cd_dep_count","cd_dep_employed_count","cd_dep_college_count"]
+ <-Reducer 2 [SIMPLE_EDGE]
+ SHUFFLE [RS_50]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_168] (rows=88000001 width=860)
+ Conds:RS_176._col2=RS_179._col0(Inner),Output:["_col0","_col1"]
+ <-Map 1 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_176]
+ PartitionCols:_col2
+ Select Operator [SEL_175] (rows=80000000 width=860)
+ Output:["_col0","_col1","_col2"]
+ Filter Operator [FIL_174] (rows=80000000 width=860)
+ predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_customer_sk is not null)
+ TableScan [TS_0] (rows=80000000 width=860)
+ default@customer,c,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"]
+ <-Map 8 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_179]
+ PartitionCols:_col0
+ Select Operator [SEL_178] (rows=20000000 width=1014)
+ Output:["_col0"]
+ Filter Operator [FIL_177] (rows=20000000 width=1014)
+ predicate:((ca_county) IN ('Walker County', 'Richland County', 'Gaines County', 'Douglas County', 'Dona Ana County') and ca_address_sk is not null)
+ TableScan [TS_3] (rows=40000000 width=1014)
+ default@customer_address,ca,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_county"]
+ <-Reducer 11 [SIMPLE_EDGE]
+ SHUFFLE [RS_56]
+ PartitionCols:_col0
+ Group By Operator [GBY_54] (rows=633595212 width=88)
+ Output:["_col0"],keys:_col0
+ Select Operator [SEL_18] (rows=633595212 width=88)
+ Output:["_col0"]
+ Merge Join Operator [MERGEJOIN_170] (rows=633595212 width=88)
+ Conds:RS_203._col0=RS_185._col0(Inner),Output:["_col1"]
+ <-Map 12 [SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_185]
+ PartitionCols:_col0
+ Select Operator [SEL_184] (rows=4058 width=1119)
+ Output:["_col0"]
+ Filter Operator [FIL_183] (rows=4058 width=1119)
+ predicate:((d_year = 2002) and d_date_sk is not null and d_moy BETWEEN 4 AND 7)
+ TableScan [TS_12] (rows=73049 width=1119)
+ default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"]
+ <-Map 10 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_203]
+ PartitionCols:_col0
+ Select Operator [SEL_202] (rows=575995635 width=88)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_201] (rows=575995635 width=88)
+ predicate:((ss_customer_sk BETWEEN DynamicValue(RS_55_c_c_customer_sk_min) AND DynamicValue(RS_55_c_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_55_c_c_customer_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null)
+ TableScan [TS_9] (rows=575995635 width=88)
+ default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"]
+ <-Reducer 13 [BROADCAST_EDGE] vectorized
+ BROADCAST [RS_198]
+ Group By Operator [GBY_197] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
+ <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_194]
+ Group By Operator [GBY_191] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
+ Select Operator [SEL_186] (rows=4058 width=1119)
+ Output:["_col0"]
+ Please refer to the previous Select Operator [SEL_184]
+ <-Reducer 7 [BROADCAST_EDGE] vectorized
+ BROADCAST [RS_200]
+ Group By Operator [GBY_199] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=96800000)"]
+ <-Reducer 3 [CUSTOM_SIMPLE_EDGE]
+ PARTITION_ONLY_SHUFFLE [RS_136]
+ Group By Operator [GBY_135] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=96800000)"]
+ Select Operator [SEL_134] (rows=96800003 width=860)
+ Output:["_col0"]
+ Please refer to the previous Merge Join Operator [MERGEJOIN_169]
+ <-Reducer 15 [ONE_TO_ONE_EDGE] vectorized
+ FORWARD [RS_211]
+ PartitionCols:_col0
+ Select Operator [SEL_210] (rows=79201469 width=135)
+ Output:["_col0","_col1"]
+ Group By Operator [GBY_209] (rows=79201469 width=135)
+ Output:["_col0"],keys:KEY._col0
+ <-Reducer 14 [SIMPLE_EDGE]
+ SHUFFLE [RS_30]
PartitionCols:_col0
- Select Operator [SEL_183] (rows=4058 width=1119)
- Output:["_col0"]
- Filter Operator [FIL_182] (rows=4058 width=1119)
- predicate:((d_year = 2002) and d_date_sk is not null and d_moy BETWEEN 4 AND 7)
- TableScan [TS_12] (rows=73049 width=1119)
- default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"]
- <-Map 10 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_202]
+ Group By Operator [GBY_29] (rows=158402938 width=135)
+ Output:["_col0"],keys:_col1
+ Merge Join Operator [MERGEJOIN_171] (rows=158402938 width=135)
+ Conds:RS_208._col0=RS_187._col0(Inner),Output:["_col1"]
+ <-Map 12 [SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_187]
+ PartitionCols:_col0
+ Please refer to the previous Select Operator [SEL_184]
+ <-Map 20 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_208]
+ PartitionCols:_col0
+ Select Operator [SEL_207] (rows=144002668 width=135)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_206] (rows=144002668 width=135)
+ predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_26_date_dim_d_date_sk_min) AND DynamicValue(RS_26_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_26_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null)
+ TableScan [TS_19] (rows=144002668 width=135)
+ default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_customer_sk"]
+ <-Reducer 16 [BROADCAST_EDGE] vectorized
+ BROADCAST [RS_205]
+ Group By Operator [GBY_204] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
+ <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_195]
+ Group By Operator [GBY_192] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
+ Select Operator [SEL_188] (rows=4058 width=1119)
+ Output:["_col0"]
+ Please refer to the previous Select Operator [SEL_184]
+ <-Reducer 18 [ONE_TO_ONE_EDGE] vectorized
+ FORWARD [RS_219]
+ PartitionCols:_col0
+ Select Operator [SEL_218] (rows=158394413 width=135)
+ Output:["_col0","_col1"]
+ Group By Operator [GBY_217] (rows=158394413 width=135)
+ Output:["_col0"],keys:KEY._col0
+ <-Reducer 17 [SIMPLE_EDGE]
+ SHUFFLE [RS_44]
PartitionCols:_col0
- Select Operator [SEL_201] (rows=575995635 width=88)
- Output:["_col0","_col1"]
- Filter Operator [FIL_200] (rows=575995635 width=88)
- predicate:((ss_customer_sk BETWEEN DynamicValue(RS_55_c_c_customer_sk_min) AND DynamicValue(RS_55_c_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_55_c_c_customer_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null)
- TableScan [TS_9] (rows=575995635 width=88)
- default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"]
- <-Reducer 13 [BROADCAST_EDGE] vectorized
- BROADCAST [RS_197]
- Group By Operator [GBY_196] (rows=1 width=12)
- Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
- <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized
- PARTITION_ONLY_SHUFFLE [RS_193]
- Group By Operator [GBY_190] (rows=1 width=12)
- Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
- Select Operator [SEL_185] (rows=4058 width=1119)
- Output:["_col0"]
- Please refer to the previous Select Operator [SEL_183]
- <-Reducer 7 [BROADCAST_EDGE] vectorized
- BROADCAST [RS_199]
- Group By Operator [GBY_198] (rows=1 width=12)
- Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=96800000)"]
- <-Reducer 3 [CUSTOM_SIMPLE_EDGE]
- PARTITION_ONLY_SHUFFLE [RS_135]
- Group By Operator [GBY_134] (rows=1 width=12)
- Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=96800000)"]
- Select Operator [SEL_133] (rows=96800003 width=860)
- Output:["_col0"]
- Please refer to the previous Merge Join Operator [MERGEJOIN_168]
- <-Reducer 15 [ONE_TO_ONE_EDGE] vectorized
- FORWARD [RS_210]
- PartitionCols:_col0
- Select Operator [SEL_209] (rows=79201469 width=135)
- Output:["_col0","_col1"]
- Group By Operator [GBY_208] (rows=79201469 width=135)
- Output:["_col0"],keys:KEY._col0
- <-Reducer 14 [SIMPLE_EDGE]
- SHUFFLE [RS_30]
- PartitionCols:_col0
- Group By Operator [GBY_29] (rows=158402938 width=135)
- Output:["_col0"],keys:_col1
- Merge Join Operator [MERGEJOIN_170] (rows=158402938 width=135)
- Conds:RS_207._col0=RS_186._col0(Inner),Output:["_col1"]
- <-Map 12 [SIMPLE_EDGE] vectorized
- PARTITION_ONLY_SHUFFLE [RS_186]
- PartitionCols:_col0
- Please refer to the previous Select Operator [SEL_183]
- <-Map 20 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_207]
- PartitionCols:_col0
- Select Operator [SEL_206] (rows=144002668 width=135)
- Output:["_col0","_col1"]
- Filter Operator [FIL_205] (rows=144002668 width=135)
- predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_26_date_dim_d_date_sk_min) AND DynamicValue(RS_26_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_26_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null)
- TableScan [TS_19] (rows=144002668 width=135)
- default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_customer_sk"]
- <-Reducer 16 [BROADCAST_EDGE] vectorized
- BROADCAST [RS_204]
- Group By Operator [GBY_203] (rows=1 width=12)
- Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
- <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized
- PARTITION_ONLY_SHUFFLE [RS_194]
- Group By Operator [GBY_191] (rows=1 width=12)
- Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
- Select Operator [SEL_187] (rows=4058 width=1119)
- Output:["_col0"]
- Please refer to the previous Select Operator [SEL_183]
- <-Reducer 18 [ONE_TO_ONE_EDGE] vectorized
- FORWARD [RS_218]
- PartitionCols:_col0
- Select Operator [SEL_217] (rows=158394413 width=135)
- Output:["_col0","_col1"]
- Group By Operator [GBY_216] (rows=158394413 width=135)
- Output:["_col0"],keys:KEY._col0
- <-Reducer 17 [SIMPLE_EDGE]
- SHUFFLE [RS_44]
- PartitionCols:_col0
- Group By Operator [GBY_43] (rows=316788826 width=135)
- Output:["_col0"],keys:_col1
- Merge Join Operator [MERGEJOIN_171] (rows=316788826 width=135)
- Conds:RS_215._col0=RS_188._col0(Inner),Output:["_col1"]
- <-Map 12 [SIMPLE_EDGE] vectorized
- PARTITION_ONLY_SHUFFLE [RS_188]
- PartitionCols:_col0
- Please refer to the previous Select Operator [SEL_183]
- <-Map 21 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_215]
- PartitionCols:_col0
- Select Operator [SEL_214] (rows=287989836 width=135)
- Output:["_col0","_col1"]
- Filter Operator [FIL_213] (rows=287989836 width=135)
- predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_40_date_dim_d_date_sk_min) AND DynamicValue(RS_40_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_40_date_dim_d_date_sk_bloom_filter))) and cs_ship_customer_sk is not null and cs_sold_date_sk is not null)
- TableScan [TS_33] (rows=287989836 width=135)
- default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_ship_customer_sk"]
- <-Reducer 19 [BROADCAST_EDGE] vectorized
- BROADCAST [RS_212]
- Group By Operator [GBY_211] (rows=1 width=12)
- Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
- <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized
- PARTITION_ONLY_SHUFFLE [RS_195]
- Group By Operator [GBY_192] (rows=1 width=12)
- Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
- Select Operator [SEL_189] (rows=4058 width=1119)
- Output:["_col0"]
- Please refer to the previous Select Operator [SEL_183]
+ Group By Operator [GBY_43] (rows=316788826 width=135)
+ Output:["_col0"],keys:_col1
+ Merge Join Operator [MERGEJOIN_172] (rows=316788826 width=135)
+ Conds:RS_216._col0=RS_189._col0(Inner),Output:["_col1"]
+ <-Map 12 [SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_189]
+ PartitionCols:_col0
+ Please refer to the previous Select Operator [SEL_184]
+ <-Map 21 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_216]
+ PartitionCols:_col0
+ Select Operator [SEL_215] (rows=287989836 width=135)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_214] (rows=287989836 width=135)
+ predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_40_date_dim_d_date_sk_min) AND DynamicValue(RS_40_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_40_date_dim_d_date_sk_bloom_filter))) and cs_ship_customer_sk is not null and cs_sold_date_sk is not null)
+ TableScan [TS_33] (rows=287989836 width=135)
+ default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_ship_customer_sk"]
+ <-Reducer 19 [BROADCAST_EDGE] vectorized
+ BROADCAST [RS_213]
+ Group By Operator [GBY_212] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
+ <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_196]
+ Group By Operator [GBY_193] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
+ Select Operator [SEL_190] (rows=4058 width=1119)
+ Output:["_col0"]
+ Please refer to the previous Select Operator [SEL_184]