You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2018/07/19 21:44:54 UTC
[16/54] [abbrv] hive git commit: HIVE-17896: TopNKey: Create a
standalone vectorizable TopNKey operator (Teddy Choi,
reviewed by Jesus Camacho Rodriguez)
http://git-wip-us.apache.org/repos/asf/hive/blob/851c8aba/ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out b/ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out
index 6f65061..b278ecc 100644
--- a/ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out
+++ b/ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out
@@ -230,31 +230,40 @@ STAGE PLANS:
projectedOutputColumnNums: [8, 9, 10]
selectExpressions: VectorUDFMapIndexStringScalar(col 1:map<string,string>, key: k1) -> 8:string, VectorUDFMapIndexLongScalar(col 2:map<int,int>, key: 123) -> 9:int, VectorUDFMapIndexDoubleScalar(col 3:map<double,double>, key: 123.123) -> 10:double
Statistics: Num rows: 511 Data size: 995378 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: sum(_col1), sum(_col2)
- Group By Vectorization:
- aggregators: VectorUDAFSumLong(col 9:int) -> bigint, VectorUDAFSumDouble(col 10:double) -> double
- className: VectorGroupByOperator
- groupByMode: HASH
- keyExpressions: col 8:string
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: [0, 1]
+ Top N Key Operator
+ sort order: +
keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 511 Data size: 995378 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Reduce Sink Vectorization:
- className: VectorReduceSinkStringOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ top n: 10
+ Top N Key Vectorization:
+ className: VectorTopNKeyOperator
+ keyExpressions: col 8:string
+ native: true
+ Group By Operator
+ aggregations: sum(_col1), sum(_col2)
+ Group By Vectorization:
+ aggregators: VectorUDAFSumLong(col 9:int) -> bigint, VectorUDAFSumDouble(col 10:double) -> double
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 8:string
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: [0, 1]
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 511 Data size: 995378 Basic stats: COMPLETE Column stats: NONE
- TopN Hash Memory Usage: 0.1
- value expressions: _col1 (type: bigint), _col2 (type: double)
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkStringOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ Statistics: Num rows: 511 Data size: 995378 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col1 (type: bigint), _col2 (type: double)
Execution mode: vectorized, llap
LLAP IO: all inputs (cache only)
Map Vectorization:
http://git-wip-us.apache.org/repos/asf/hive/blob/851c8aba/ql/src/test/results/clientpositive/llap/parquet_struct_type_vectorization.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/parquet_struct_type_vectorization.q.out b/ql/src/test/results/clientpositive/llap/parquet_struct_type_vectorization.q.out
index affb27e..fec8093 100644
--- a/ql/src/test/results/clientpositive/llap/parquet_struct_type_vectorization.q.out
+++ b/ql/src/test/results/clientpositive/llap/parquet_struct_type_vectorization.q.out
@@ -238,31 +238,40 @@ STAGE PLANS:
projectedOutputColumnNums: [4]
selectExpressions: VectorUDFStructField(col 1:struct<f1:int,f2:string>, col 0:int) -> 4:int
Statistics: Num rows: 341 Data size: 76542 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: sum(_col0)
- Group By Vectorization:
- aggregators: VectorUDAFSumLong(col 4:int) -> bigint
- className: VectorGroupByOperator
- groupByMode: HASH
- keyExpressions: col 4:int
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: [0]
+ Top N Key Operator
+ sort order: +
keys: _col0 (type: int)
- mode: hash
- outputColumnNames: _col0, _col1
Statistics: Num rows: 341 Data size: 76542 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ top n: 10
+ Top N Key Vectorization:
+ className: VectorTopNKeyOperator
+ keyExpressions: col 4:int
+ native: true
+ Group By Operator
+ aggregations: sum(_col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFSumLong(col 4:int) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 4:int
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: [0]
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 341 Data size: 76542 Basic stats: COMPLETE Column stats: NONE
- TopN Hash Memory Usage: 0.1
- value expressions: _col1 (type: bigint)
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ Statistics: Num rows: 341 Data size: 76542 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col1 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: all inputs (cache only)
Map Vectorization:
http://git-wip-us.apache.org/repos/asf/hive/blob/851c8aba/ql/src/test/results/clientpositive/llap/topnkey.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/topnkey.q.out b/ql/src/test/results/clientpositive/llap/topnkey.q.out
new file mode 100644
index 0000000..c1d8874
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/topnkey.q.out
@@ -0,0 +1,318 @@
+PREHOOK: query: EXPLAIN
+SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), UDFToInteger(substr(value, 5)) (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Top N Key Operator
+ sort order: +
+ keys: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ top n: 5
+ Group By Operator
+ aggregations: sum(_col1)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col1 (type: bigint)
+ Execution mode: llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col1 (type: bigint)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 5
+ Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 5
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 0
+10 10
+100 200
+103 206
+104 208
+PREHOOK: query: EXPLAIN
+SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ Top N Key Operator
+ sort order: +
+ keys: key (type: string)
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ top n: 5
+ Group By Operator
+ keys: key (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE
+ TopN Hash Memory Usage: 0.1
+ Execution mode: llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE
+ TopN Hash Memory Usage: 0.1
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 5
+ Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 5
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0
+10
+100
+103
+104
+PREHOOK: query: explain vectorization detail
+SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: false
+ enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ filterExpr: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: src2
+ filterExpr: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: string)
+ Execution mode: llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col2
+ Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), _col2 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col1 (type: string)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 5
+ Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: 5
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 val_0
+0 val_0
+0 val_0
+0 val_0
+0 val_0
http://git-wip-us.apache.org/repos/asf/hive/blob/851c8aba/ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out b/ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out
index f801856..8c74a92 100644
--- a/ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out
@@ -141,31 +141,40 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [2]
Statistics: Num rows: 1049 Data size: 4196 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: sum(50), count(), sum(50.0D), count(50.0D), sum(50), count(50)
- Group By Vectorization:
- aggregators: VectorUDAFSumLong(ConstantVectorExpression(val 50) -> 12:int) -> bigint, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(ConstantVectorExpression(val 50.0) -> 13:double) -> double, VectorUDAFCount(ConstantVectorExpression(val 50.0) -> 14:double) -> bigint, VectorUDAFSumDecimal(ConstantVectorExpression(val 50) -> 15:decimal(10,0)) -> decimal(20,0), VectorUDAFCount(ConstantVectorExpression(val 50) -> 16:decimal(10,0)) -> bigint
- className: VectorGroupByOperator
- groupByMode: HASH
- keyExpressions: col 2:int
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
+ Top N Key Operator
+ sort order: +
keys: _col0 (type: int)
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
- Statistics: Num rows: 257 Data size: 40092 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ Statistics: Num rows: 1049 Data size: 4196 Basic stats: COMPLETE Column stats: COMPLETE
+ top n: 10
+ Top N Key Vectorization:
+ className: VectorTopNKeyOperator
+ keyExpressions: col 2:int
+ native: true
+ Group By Operator
+ aggregations: sum(50), count(), sum(50.0D), count(50.0D), sum(50), count(50)
+ Group By Vectorization:
+ aggregators: VectorUDAFSumLong(ConstantVectorExpression(val 50) -> 12:int) -> bigint, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(ConstantVectorExpression(val 50.0) -> 13:double) -> double, VectorUDAFCount(ConstantVectorExpression(val 50.0) -> 14:double) -> bigint, VectorUDAFSumDecimal(ConstantVectorExpression(val 50) -> 15:decimal(10,0)) -> decimal(20,0), VectorUDAFCount(ConstantVectorExpression(val 50) -> 16:decimal(10,0)) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 2:int
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 257 Data size: 40092 Basic stats: COMPLETE Column stats: COMPLETE
- TopN Hash Memory Usage: 0.1
- value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: double), _col4 (type: bigint), _col5 (type: decimal(12,0)), _col6 (type: bigint)
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ Statistics: Num rows: 257 Data size: 40092 Basic stats: COMPLETE Column stats: COMPLETE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: double), _col4 (type: bigint), _col5 (type: decimal(12,0)), _col6 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
http://git-wip-us.apache.org/repos/asf/hive/blob/851c8aba/ql/src/test/results/clientpositive/llap/vector_char_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_char_2.q.out b/ql/src/test/results/clientpositive/llap/vector_char_2.q.out
index 73e8060..b58de03 100644
--- a/ql/src/test/results/clientpositive/llap/vector_char_2.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_char_2.q.out
@@ -102,31 +102,40 @@ STAGE PLANS:
projectedOutputColumnNums: [1, 3]
selectExpressions: CastStringToLong(col 0:char(10)) -> 3:int
Statistics: Num rows: 501 Data size: 89178 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: sum(_col1), count()
- Group By Vectorization:
- aggregators: VectorUDAFSumLong(col 3:int) -> bigint, VectorUDAFCountStar(*) -> bigint
- className: VectorGroupByOperator
- groupByMode: HASH
- keyExpressions: col 1:char(20)
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: [0, 1]
+ Top N Key Operator
+ sort order: +
keys: _col0 (type: char(20))
- mode: hash
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: char(20))
- sort order: +
- Map-reduce partition columns: _col0 (type: char(20))
- Reduce Sink Vectorization:
- className: VectorReduceSinkStringOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ Statistics: Num rows: 501 Data size: 89178 Basic stats: COMPLETE Column stats: COMPLETE
+ top n: 5
+ Top N Key Vectorization:
+ className: VectorTopNKeyOperator
+ keyExpressions: col 1:char(20)
+ native: true
+ Group By Operator
+ aggregations: sum(_col1), count()
+ Group By Vectorization:
+ aggregators: VectorUDAFSumLong(col 3:int) -> bigint, VectorUDAFCountStar(*) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 1:char(20)
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: [0, 1]
+ keys: _col0 (type: char(20))
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE
- TopN Hash Memory Usage: 0.1
- value expressions: _col1 (type: bigint), _col2 (type: bigint)
+ Reduce Output Operator
+ key expressions: _col0 (type: char(20))
+ sort order: +
+ Map-reduce partition columns: _col0 (type: char(20))
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkStringOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col1 (type: bigint), _col2 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
@@ -298,31 +307,40 @@ STAGE PLANS:
projectedOutputColumnNums: [1, 3]
selectExpressions: CastStringToLong(col 0:char(10)) -> 3:int
Statistics: Num rows: 501 Data size: 89178 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: sum(_col1), count()
- Group By Vectorization:
- aggregators: VectorUDAFSumLong(col 3:int) -> bigint, VectorUDAFCountStar(*) -> bigint
- className: VectorGroupByOperator
- groupByMode: HASH
- keyExpressions: col 1:char(20)
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: [0, 1]
+ Top N Key Operator
+ sort order: -
keys: _col0 (type: char(20))
- mode: hash
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: char(20))
- sort order: -
- Map-reduce partition columns: _col0 (type: char(20))
- Reduce Sink Vectorization:
- className: VectorReduceSinkStringOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ Statistics: Num rows: 501 Data size: 89178 Basic stats: COMPLETE Column stats: COMPLETE
+ top n: 5
+ Top N Key Vectorization:
+ className: VectorTopNKeyOperator
+ keyExpressions: col 1:char(20)
+ native: true
+ Group By Operator
+ aggregations: sum(_col1), count()
+ Group By Vectorization:
+ aggregators: VectorUDAFSumLong(col 3:int) -> bigint, VectorUDAFCountStar(*) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 1:char(20)
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: [0, 1]
+ keys: _col0 (type: char(20))
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE
- TopN Hash Memory Usage: 0.1
- value expressions: _col1 (type: bigint), _col2 (type: bigint)
+ Reduce Output Operator
+ key expressions: _col0 (type: char(20))
+ sort order: -
+ Map-reduce partition columns: _col0 (type: char(20))
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkStringOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col1 (type: bigint), _col2 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
http://git-wip-us.apache.org/repos/asf/hive/blob/851c8aba/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out
index bddde5f..1f49804 100644
--- a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out
@@ -68,33 +68,42 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [0, 1]
Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- Group By Vectorization:
- aggregators: VectorUDAFCountStar(*) -> bigint
- className: VectorGroupByOperator
- groupByMode: HASH
- keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: [0]
+ Top N Key Operator
+ sort order: +++
keys: a (type: string), b (type: string), 0L (type: bigint)
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 24 Data size: 8832 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
- sort order: +++
- Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
- Reduce Sink Vectorization:
- className: VectorReduceSinkMultiKeyOperator
- keyColumnNums: [0, 1, 2]
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: [3]
+ Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE
+ top n: 10
+ Top N Key Vectorization:
+ className: VectorTopNKeyOperator
+ keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint
+ native: true
+ Group By Operator
+ aggregations: count()
+ Group By Vectorization:
+ aggregators: VectorUDAFCountStar(*) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: [0]
+ keys: a (type: string), b (type: string), 0L (type: bigint)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 24 Data size: 8832 Basic stats: COMPLETE Column stats: NONE
- TopN Hash Memory Usage: 0.1
- value expressions: _col3 (type: bigint)
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkMultiKeyOperator
+ keyColumnNums: [0, 1, 2]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: [3]
+ Statistics: Num rows: 24 Data size: 8832 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col3 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
@@ -111,7 +120,7 @@ STAGE PLANS:
includeColumns: [0, 1]
dataColumns: a:string, b:string, c:string
partitionColumnCount: 0
- scratchColumnTypeNames: [bigint]
+ scratchColumnTypeNames: [bigint, bigint]
Reducer 2
Execution mode: vectorized, llap
Reduce Vectorization:
@@ -269,33 +278,42 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [0, 1]
Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- Group By Vectorization:
- aggregators: VectorUDAFCountStar(*) -> bigint
- className: VectorGroupByOperator
- groupByMode: HASH
- keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: [0]
+ Top N Key Operator
+ sort order: +++
keys: a (type: string), b (type: string), 0L (type: bigint)
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 24 Data size: 8832 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
- sort order: +++
- Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
- Reduce Sink Vectorization:
- className: VectorReduceSinkMultiKeyOperator
- keyColumnNums: [0, 1, 2]
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: [3]
+ Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE
+ top n: 10
+ Top N Key Vectorization:
+ className: VectorTopNKeyOperator
+ keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint
+ native: true
+ Group By Operator
+ aggregations: count()
+ Group By Vectorization:
+ aggregators: VectorUDAFCountStar(*) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: [0]
+ keys: a (type: string), b (type: string), 0L (type: bigint)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 24 Data size: 8832 Basic stats: COMPLETE Column stats: NONE
- TopN Hash Memory Usage: 0.1
- value expressions: _col3 (type: bigint)
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkMultiKeyOperator
+ keyColumnNums: [0, 1, 2]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: [3]
+ Statistics: Num rows: 24 Data size: 8832 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col3 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
@@ -312,7 +330,7 @@ STAGE PLANS:
includeColumns: [0, 1]
dataColumns: a:string, b:string, c:string
partitionColumnCount: 0
- scratchColumnTypeNames: [bigint]
+ scratchColumnTypeNames: [bigint, bigint]
Reducer 2
Execution mode: vectorized, llap
Reduce Vectorization:
@@ -470,33 +488,42 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [0, 1]
Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- Group By Vectorization:
- aggregators: VectorUDAFCountStar(*) -> bigint
- className: VectorGroupByOperator
- groupByMode: HASH
- keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: [0]
+ Top N Key Operator
+ sort order: +++
keys: a (type: string), b (type: string), 0L (type: bigint)
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 12 Data size: 4416 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
- sort order: +++
- Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
- Reduce Sink Vectorization:
- className: VectorReduceSinkMultiKeyOperator
- keyColumnNums: [0, 1, 2]
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: [3]
+ Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE
+ top n: 10
+ Top N Key Vectorization:
+ className: VectorTopNKeyOperator
+ keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint
+ native: true
+ Group By Operator
+ aggregations: count()
+ Group By Vectorization:
+ aggregators: VectorUDAFCountStar(*) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: [0]
+ keys: a (type: string), b (type: string), 0L (type: bigint)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 12 Data size: 4416 Basic stats: COMPLETE Column stats: NONE
- TopN Hash Memory Usage: 0.1
- value expressions: _col3 (type: bigint)
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkMultiKeyOperator
+ keyColumnNums: [0, 1, 2]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: [3]
+ Statistics: Num rows: 12 Data size: 4416 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col3 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
@@ -513,7 +540,7 @@ STAGE PLANS:
includeColumns: [0, 1]
dataColumns: a:string, b:string, c:string
partitionColumnCount: 0
- scratchColumnTypeNames: [bigint]
+ scratchColumnTypeNames: [bigint, bigint]
Reducer 2
Execution mode: vectorized, llap
Reduce Vectorization:
@@ -671,30 +698,39 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [0, 1, 2]
Statistics: Num rows: 6 Data size: 3312 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- Group By Vectorization:
- className: VectorGroupByOperator
- groupByMode: HASH
- keyExpressions: col 0:string, col 1:string, col 2:string, ConstantVectorExpression(val 0) -> 4:bigint
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: []
+ Top N Key Operator
+ sort order: ++++
keys: a (type: string), b (type: string), c (type: string), 0L (type: bigint)
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 18 Data size: 9936 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: bigint)
- sort order: ++++
- Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: bigint)
- Reduce Sink Vectorization:
- className: VectorReduceSinkMultiKeyOperator
- keyColumnNums: [0, 1, 2, 3]
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: []
+ Statistics: Num rows: 6 Data size: 3312 Basic stats: COMPLETE Column stats: NONE
+ top n: 10
+ Top N Key Vectorization:
+ className: VectorTopNKeyOperator
+ keyExpressions: col 0:string, col 1:string, col 2:string, ConstantVectorExpression(val 0) -> 4:bigint
+ native: true
+ Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 0:string, col 1:string, col 2:string, ConstantVectorExpression(val 0) -> 5:bigint
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: []
+ keys: a (type: string), b (type: string), c (type: string), 0L (type: bigint)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 18 Data size: 9936 Basic stats: COMPLETE Column stats: NONE
- TopN Hash Memory Usage: 0.1
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: bigint)
+ sort order: ++++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: bigint)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkMultiKeyOperator
+ keyColumnNums: [0, 1, 2, 3]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: []
+ Statistics: Num rows: 18 Data size: 9936 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
@@ -711,7 +747,7 @@ STAGE PLANS:
includeColumns: [0, 1, 2]
dataColumns: a:string, b:string, c:string
partitionColumnCount: 0
- scratchColumnTypeNames: [bigint]
+ scratchColumnTypeNames: [bigint, bigint]
Reducer 2
Execution mode: vectorized, llap
Reduce Vectorization:
@@ -866,30 +902,39 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [0]
Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- Group By Vectorization:
- className: VectorGroupByOperator
- groupByMode: HASH
- keyExpressions: col 0:string
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: []
+ Top N Key Operator
+ sort order: +
keys: a (type: string)
- mode: hash
- outputColumnNames: _col0
Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Reduce Sink Vectorization:
- className: VectorReduceSinkStringOperator
- keyColumnNums: [0]
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: []
+ top n: 10
+ Top N Key Vectorization:
+ className: VectorTopNKeyOperator
+ keyExpressions: col 0:string
+ native: true
+ Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 0:string
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: []
+ keys: a (type: string)
+ mode: hash
+ outputColumnNames: _col0
Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE
- TopN Hash Memory Usage: 0.1
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkStringOperator
+ keyColumnNums: [0]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: []
+ Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
@@ -1048,33 +1093,42 @@ STAGE PLANS:
projectedOutputColumnNums: [6]
selectExpressions: DoubleColAddDoubleColumn(col 4:double, col 5:double)(children: CastStringToDouble(col 0:string) -> 4:double, CastStringToDouble(col 1:string) -> 5:double) -> 6:double
Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- Group By Vectorization:
- aggregators: VectorUDAFCountStar(*) -> bigint
- className: VectorGroupByOperator
- groupByMode: HASH
- keyExpressions: col 6:double
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: [0]
+ Top N Key Operator
+ sort order: +
keys: _col0 (type: double)
- mode: hash
- outputColumnNames: _col0, _col1
Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: double)
- sort order: +
- Map-reduce partition columns: _col0 (type: double)
- Reduce Sink Vectorization:
- className: VectorReduceSinkMultiKeyOperator
- keyColumnNums: [0]
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: [1]
+ top n: 10
+ Top N Key Vectorization:
+ className: VectorTopNKeyOperator
+ keyExpressions: col 6:double
+ native: true
+ Group By Operator
+ aggregations: count()
+ Group By Vectorization:
+ aggregators: VectorUDAFCountStar(*) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 6:double
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: [0]
+ keys: _col0 (type: double)
+ mode: hash
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE
- TopN Hash Memory Usage: 0.1
- value expressions: _col1 (type: bigint)
+ Reduce Output Operator
+ key expressions: _col0 (type: double)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: double)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkMultiKeyOperator
+ keyColumnNums: [0]
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumnNums: [1]
+ Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col1 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
http://git-wip-us.apache.org/repos/asf/hive/blob/851c8aba/ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out
index 1235bda..bdcc286 100644
--- a/ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out
@@ -266,28 +266,37 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [9]
Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- Group By Vectorization:
- className: VectorGroupByOperator
- groupByMode: HASH
- keyExpressions: col 9:int
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: []
+ Top N Key Operator
+ sort order: +
keys: ss_ticket_number (type: int)
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 85 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE
+ top n: 20
+ Top N Key Vectorization:
+ className: VectorTopNKeyOperator
+ keyExpressions: col 9:int
+ native: true
+ Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 9:int
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: []
+ keys: ss_ticket_number (type: int)
+ mode: hash
+ outputColumnNames: _col0
Statistics: Num rows: 85 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE
- TopN Hash Memory Usage: 0.1
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ Statistics: Num rows: 85 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE
+ TopN Hash Memory Usage: 0.1
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
http://git-wip-us.apache.org/repos/asf/hive/blob/851c8aba/ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out b/ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out
index fee5a5f..e81d7df 100644
--- a/ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out
@@ -365,19 +365,24 @@ STAGE PLANS:
1 _col0 (type: int)
outputColumnNames: _col4
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
+ Top N Key Operator
+ sort order: +
keys: _col4 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
+ top n: 100
+ Group By Operator
+ aggregations: count()
+ keys: _col4 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
- TopN Hash Memory Usage: 0.1
- value expressions: _col1 (type: bigint)
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col1 (type: bigint)
Reducer 4
Execution mode: vectorized, llap
Reduce Vectorization:
http://git-wip-us.apache.org/repos/asf/hive/blob/851c8aba/ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out b/ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out
index 983c71d..f65712a 100644
--- a/ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out
@@ -60,31 +60,40 @@ STAGE PLANS:
predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 2:decimal(20,10)), SelectColumnIsNotNull(col 3:decimal(23,14)))
predicate: (cdecimal1 is not null and cdecimal2 is not null) (type: boolean)
Statistics: Num rows: 5492 Data size: 1231540 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: min(cdecimal1)
- Group By Vectorization:
- aggregators: VectorUDAFMinDecimal(col 2:decimal(20,10)) -> decimal(20,10)
- className: VectorGroupByOperator
- groupByMode: HASH
- keyExpressions: col 0:int, col 1:double, col 2:decimal(20,10), col 3:decimal(23,14)
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: [0]
+ Top N Key Operator
+ sort order: ++++
keys: cint (type: int), cdouble (type: double), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14))
- mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 5492 Data size: 1231540 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int), _col1 (type: double), _col2 (type: decimal(20,10)), _col3 (type: decimal(23,14))
- sort order: ++++
- Map-reduce partition columns: _col0 (type: int), _col1 (type: double), _col2 (type: decimal(20,10)), _col3 (type: decimal(23,14))
- Reduce Sink Vectorization:
- className: VectorReduceSinkMultiKeyOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ top n: 50
+ Top N Key Vectorization:
+ className: VectorTopNKeyOperator
+ keyExpressions: col 0:int, col 1:double, col 2:decimal(20,10), col 3:decimal(23,14)
+ native: true
+ Group By Operator
+ aggregations: min(cdecimal1)
+ Group By Vectorization:
+ aggregators: VectorUDAFMinDecimal(col 2:decimal(20,10)) -> decimal(20,10)
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 0:int, col 1:double, col 2:decimal(20,10), col 3:decimal(23,14)
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: [0]
+ keys: cint (type: int), cdouble (type: double), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14))
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 5492 Data size: 1231540 Basic stats: COMPLETE Column stats: NONE
- TopN Hash Memory Usage: 0.1
- value expressions: _col4 (type: decimal(20,10))
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: double), _col2 (type: decimal(20,10)), _col3 (type: decimal(23,14))
+ sort order: ++++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: double), _col2 (type: decimal(20,10)), _col3 (type: decimal(23,14))
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkMultiKeyOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ Statistics: Num rows: 5492 Data size: 1231540 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col4 (type: decimal(20,10))
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
http://git-wip-us.apache.org/repos/asf/hive/blob/851c8aba/ql/src/test/results/clientpositive/llap/vector_string_concat.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_string_concat.q.out b/ql/src/test/results/clientpositive/llap/vector_string_concat.q.out
index 38d9172..c6b3dcc 100644
--- a/ql/src/test/results/clientpositive/llap/vector_string_concat.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_string_concat.q.out
@@ -352,28 +352,37 @@ STAGE PLANS:
projectedOutputColumnNums: [20]
selectExpressions: StringGroupConcatColCol(col 18:string, col 19:string)(children: StringGroupColConcatStringScalar(col 19:string, val -)(children: StringScalarConcatStringGroupCol(val Quarter , col 18:string)(children: CastLongToString(col 14:int)(children: CastDoubleToLong(col 16:double)(children: DoubleColAddDoubleScalar(col 17:double, val 1.0)(children: DoubleColDivideDoubleScalar(col 16:double, val 3.0)(children: CastLongToDouble(col 15:int)(children: LongColSubtractLongScalar(col 14:int, val 1)(children: VectorUDFMonthDate(col 12, field MONTH) -> 14:int) -> 15:int) -> 16:double) -> 17:double) -> 16:double) -> 14:int) -> 18:string) -> 19:string) -> 18:string, CastLongToString(col 14:int)(children: VectorUDFYearDate(col 12, field YEAR) -> 14:int) -> 19:string) -> 20:string
Statistics: Num rows: 2000 Data size: 106456 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- Group By Vectorization:
- className: VectorGroupByOperator
- groupByMode: HASH
- keyExpressions: col 20:string
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: []
+ Top N Key Operator
+ sort order: +
keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0
Statistics: Num rows: 2000 Data size: 106456 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Reduce Sink Vectorization:
- className: VectorReduceSinkStringOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ top n: 50
+ Top N Key Vectorization:
+ className: VectorTopNKeyOperator
+ keyExpressions: col 20:string
+ native: true
+ Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ keyExpressions: col 20:string
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: []
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0
Statistics: Num rows: 2000 Data size: 106456 Basic stats: COMPLETE Column stats: NONE
- TopN Hash Memory Usage: 0.1
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkStringOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ Statistics: Num rows: 2000 Data size: 106456 Basic stats: COMPLETE Column stats: NONE
+ TopN Hash Memory Usage: 0.1
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization: