You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2018/08/16 00:20:58 UTC
[24/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization:
Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions
(Matt McCline, reviewed by Teddy Choi)"
http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query54.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query54.q.out b/ql/src/test/results/clientpositive/perf/spark/query54.q.out
index aa43c3d..241d6d8 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query54.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query54.q.out
@@ -2,7 +2,7 @@ Warning: Shuffle Join JOIN[84][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hd
Warning: Shuffle Join JOIN[115][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 5' is a cross product
Warning: Map Join MAPJOIN[145][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
Warning: Map Join MAPJOIN[144][bigTable=?] in task 'Stage-1:MAPRED' is a cross product
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
with my_customers as (
select distinct c_customer_sk
, c_current_addr_sk
@@ -57,7 +57,7 @@ with my_customers as (
order by segment, num_customers
limit 100
PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
with my_customers as (
select distinct c_customer_sk
, c_current_addr_sk
@@ -112,10 +112,6 @@ with my_customers as (
order by segment, num_customers
limit 100
POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
STAGE DEPENDENCIES:
Stage-2 is a root stage
Stage-3 depends on stages: Stage-2
@@ -137,32 +133,14 @@ STAGE PLANS:
alias: date_dim
filterExpr: ((d_year = 1999) and (d_moy = 3)) (type: boolean)
Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 6:int, val 1999), FilterLongColEqualLongScalar(col 8:int, val 3))
predicate: ((d_moy = 3) and (d_year = 1999)) (type: boolean)
Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: (d_month_seq + 3) (type: int)
outputColumnNames: _col0
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [29]
- selectExpressions: LongColAddLongScalar(col 3:int, val 3) -> 29:int
Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- Group By Vectorization:
- className: VectorGroupByOperator
- groupByMode: HASH
- keyExpressions: col 29:int
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: []
keys: _col0 (type: int)
mode: hash
outputColumnNames: _col0
@@ -171,108 +149,43 @@ STAGE PLANS:
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reducer 28
Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reduce Operator Tree:
Group By Operator
- Group By Vectorization:
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:int
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: []
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0
Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE
Select Operator
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: []
Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
- Group By Vectorization:
- aggregators: VectorUDAFCountStar(*) -> bigint
- className: VectorGroupByOperator
- groupByMode: HASH
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: [0]
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
- Reduce Sink Vectorization:
- className: VectorReduceSinkEmptyKeyOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: bigint)
Reducer 29
Execution mode: vectorized
Local Work:
Map Reduce Local Work
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
- Group By Vectorization:
- aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- native: false
- vectorProcessingMode: GLOBAL
- projectedOutputColumnNums: [0]
mode: mergepartial
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterLongColLessEqualLongScalar(col 1:bigint, val 1)(children: FuncAbsLongToLong(col 0:bigint) -> 1:bigint)
predicate: (sq_count_check(_col0) <= 1) (type: boolean)
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Select Operator
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: []
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
- Spark Hash Table Sink Vectorization:
- className: VectorSparkHashTableSinkOperator
- native: true
keys:
0
1
@@ -290,32 +203,14 @@ STAGE PLANS:
alias: date_dim
filterExpr: ((d_year = 1999) and (d_moy = 3)) (type: boolean)
Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 6:int, val 1999), FilterLongColEqualLongScalar(col 8:int, val 3))
predicate: ((d_moy = 3) and (d_year = 1999)) (type: boolean)
Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: (d_month_seq + 1) (type: int)
outputColumnNames: _col0
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [29]
- selectExpressions: LongColAddLongScalar(col 3:int, val 1) -> 29:int
Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- Group By Vectorization:
- className: VectorGroupByOperator
- groupByMode: HASH
- keyExpressions: col 29:int
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: []
keys: _col0 (type: int)
mode: hash
outputColumnNames: _col0
@@ -324,108 +219,43 @@ STAGE PLANS:
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reducer 23
Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reduce Operator Tree:
Group By Operator
- Group By Vectorization:
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:int
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: []
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0
Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE
Select Operator
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: []
Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
- Group By Vectorization:
- aggregators: VectorUDAFCountStar(*) -> bigint
- className: VectorGroupByOperator
- groupByMode: HASH
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: [0]
mode: hash
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
- Reduce Sink Vectorization:
- className: VectorReduceSinkEmptyKeyOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: bigint)
Reducer 24
Execution mode: vectorized
Local Work:
Map Reduce Local Work
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
- Group By Vectorization:
- aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- native: false
- vectorProcessingMode: GLOBAL
- projectedOutputColumnNums: [0]
mode: mergepartial
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterLongColLessEqualLongScalar(col 1:bigint, val 1)(children: FuncAbsLongToLong(col 0:bigint) -> 1:bigint)
predicate: (sq_count_check(_col0) <= 1) (type: boolean)
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Select Operator
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: []
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
- Spark Hash Table Sink Vectorization:
- className: VectorSparkHashTableSinkOperator
- native: true
keys:
0
1
@@ -440,40 +270,18 @@ STAGE PLANS:
alias: store
filterExpr: (s_county is not null and s_state is not null) (type: boolean)
Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 23:string), SelectColumnIsNotNull(col 24:string))
predicate: (s_county is not null and s_state is not null) (type: boolean)
Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: s_county (type: string), s_state (type: string)
outputColumnNames: _col0, _col1
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [23, 24]
Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
- Spark Hash Table Sink Vectorization:
- className: VectorSparkHashTableSinkOperator
- native: true
keys:
0 _col1 (type: string), _col2 (type: string)
1 _col0 (type: string), _col1 (type: string)
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Local Work:
Map Reduce Local Work
@@ -502,65 +310,32 @@ STAGE PLANS:
alias: store_sales
filterExpr: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean)
Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 3:int), SelectColumnIsNotNull(col 0:int))
predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean)
Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), ss_ext_sales_price (type: decimal(7,2))
outputColumnNames: _col0, _col1, _col2
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 3, 15]
Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col2 (type: decimal(7,2))
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Map 10
Map Operator Tree:
TableScan
alias: customer_address
filterExpr: (ca_address_sk is not null and ca_county is not null and ca_state is not null) (type: boolean)
Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 7:string), SelectColumnIsNotNull(col 8:string))
predicate: (ca_address_sk is not null and ca_county is not null and ca_state is not null) (type: boolean)
Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ca_address_sk (type: int), ca_county (type: string), ca_state (type: string)
outputColumnNames: _col0, _col1, _col2
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 7, 8]
Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
@@ -568,10 +343,6 @@ STAGE PLANS:
keys:
0 _col1 (type: string), _col2 (type: string)
1 _col0 (type: string), _col1 (type: string)
- Map Join Vectorization:
- className: VectorMapJoinInnerBigOnlyMultiKeyOperator
- native: true
- nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true
outputColumnNames: _col0
input vertices:
1 Map 12
@@ -580,21 +351,8 @@ STAGE PLANS:
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 44000000 Data size: 44654715780 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Local Work:
Map Reduce Local Work
Map 13
@@ -603,245 +361,112 @@ STAGE PLANS:
alias: catalog_sales
filterExpr: (cs_item_sk is not null and cs_sold_date_sk is not null and cs_bill_customer_sk is not null) (type: boolean)
Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 15:int), SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 3:int))
predicate: (cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean)
Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cs_sold_date_sk (type: int), cs_bill_customer_sk (type: int), cs_item_sk (type: int)
outputColumnNames: _col0, _col1, _col2
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 3, 15]
Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 431992504 Data size: 58579807164 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col2 (type: int)
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Map 18
Map Operator Tree:
TableScan
alias: web_sales
filterExpr: (ws_item_sk is not null and ws_sold_date_sk is not null and ws_bill_customer_sk is not null) (type: boolean)
Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 3:int), SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 4:int))
predicate: (ws_bill_customer_sk is not null and ws_item_sk is not null and ws_sold_date_sk is not null) (type: boolean)
Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int), ws_item_sk (type: int)
outputColumnNames: _col0, _col1, _col2
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 4, 3]
Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 431992504 Data size: 58579807164 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col2 (type: int)
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Map 19
Map Operator Tree:
TableScan
alias: date_dim
filterExpr: ((d_moy = 3) and (d_year = 1999) and d_date_sk is not null) (type: boolean)
Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 8:int, val 3), FilterLongColEqualLongScalar(col 6:int, val 1999), SelectColumnIsNotNull(col 0:int))
predicate: ((d_moy = 3) and (d_year = 1999) and d_date_sk is not null) (type: boolean)
Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: d_date_sk (type: int)
outputColumnNames: _col0
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0]
Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Map 20
Map Operator Tree:
TableScan
alias: item
filterExpr: ((i_category = 'Jewelry') and (i_class = 'consignment') and i_item_sk is not null) (type: boolean)
Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: FilterStringGroupColEqualStringScalar(col 12:string, val Jewelry), FilterStringGroupColEqualStringScalar(col 10:string, val consignment), SelectColumnIsNotNull(col 0:int))
predicate: ((i_category = 'Jewelry') and (i_class = 'consignment') and i_item_sk is not null) (type: boolean)
Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: i_item_sk (type: int)
outputColumnNames: _col0
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0]
Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Map 21
Map Operator Tree:
TableScan
alias: customer
filterExpr: (c_customer_sk is not null and c_current_addr_sk is not null) (type: boolean)
Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 4:int))
predicate: (c_current_addr_sk is not null and c_customer_sk is not null) (type: boolean)
Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: c_customer_sk (type: int), c_current_addr_sk (type: int)
outputColumnNames: _col0, _col1
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 4]
Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int)
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Map 25
Map Operator Tree:
TableScan
alias: date_dim
filterExpr: ((d_year = 1999) and (d_moy = 3)) (type: boolean)
Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 6:int, val 1999), FilterLongColEqualLongScalar(col 8:int, val 3))
predicate: ((d_moy = 3) and (d_year = 1999)) (type: boolean)
Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: (d_month_seq + 1) (type: int)
outputColumnNames: _col0
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [29]
- selectExpressions: LongColAddLongScalar(col 3:int, val 1) -> 29:int
Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- Group By Vectorization:
- className: VectorGroupByOperator
- groupByMode: HASH
- keyExpressions: col 29:int
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: []
keys: _col0 (type: int)
mode: hash
outputColumnNames: _col0
@@ -850,53 +475,22 @@ STAGE PLANS:
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Map 30
Map Operator Tree:
TableScan
alias: date_dim
filterExpr: ((d_year = 1999) and (d_moy = 3)) (type: boolean)
Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 6:int, val 1999), FilterLongColEqualLongScalar(col 8:int, val 3))
predicate: ((d_moy = 3) and (d_year = 1999)) (type: boolean)
Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: (d_month_seq + 3) (type: int)
outputColumnNames: _col0
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [29]
- selectExpressions: LongColAddLongScalar(col 3:int, val 3) -> 29:int
Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- Group By Vectorization:
- className: VectorGroupByOperator
- groupByMode: HASH
- keyExpressions: col 29:int
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: []
keys: _col0 (type: int)
mode: hash
outputColumnNames: _col0
@@ -905,70 +499,29 @@ STAGE PLANS:
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Map 9
Map Operator Tree:
TableScan
alias: date_dim
filterExpr: d_date_sk is not null (type: boolean)
Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: SelectColumnIsNotNull(col 0:int)
predicate: d_date_sk is not null (type: boolean)
Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: d_date_sk (type: int), d_month_seq (type: int)
outputColumnNames: _col0, _col1
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 3]
Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int)
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Reducer 11
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: Tagging not supported
- vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -984,11 +537,6 @@ STAGE PLANS:
Map-reduce partition columns: _col5 (type: int)
Statistics: Num rows: 316240138 Data size: 42883351551 Basic stats: COMPLETE Column stats: NONE
Reducer 14
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: Tagging not supported
- vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -1005,11 +553,6 @@ STAGE PLANS:
Statistics: Num rows: 475191764 Data size: 64437789277 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int)
Reducer 15
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: Tagging not supported
- vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -1025,11 +568,6 @@ STAGE PLANS:
Map-reduce partition columns: _col1 (type: int)
Statistics: Num rows: 522710951 Data size: 70881569741 Basic stats: COMPLETE Column stats: NONE
Reducer 16
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: Tagging not supported
- vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -1051,21 +589,8 @@ STAGE PLANS:
Statistics: Num rows: 574982058 Data size: 77969728405 Basic stats: COMPLETE Column stats: NONE
Reducer 17
Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reduce Operator Tree:
Group By Operator
- Group By Vectorization:
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:int, col 1:int
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: []
keys: KEY._col0 (type: int), KEY._col1 (type: int)
mode: mergepartial
outputColumnNames: _col0, _col1
@@ -1073,27 +598,14 @@ STAGE PLANS:
Select Operator
expressions: _col1 (type: int), _col0 (type: int)
outputColumnNames: _col0, _col1
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [1, 0]
Statistics: Num rows: 287491029 Data size: 38984864202 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col1 (type: int)
sort order: +
Map-reduce partition columns: _col1 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 287491029 Data size: 38984864202 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: int)
Reducer 2
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: Tagging not supported
- vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -1111,41 +623,19 @@ STAGE PLANS:
value expressions: _col2 (type: decimal(7,2)), _col4 (type: int)
Reducer 26
Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reduce Operator Tree:
Group By Operator
- Group By Vectorization:
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:int
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: []
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0
Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
- Reduce Sink Vectorization:
- className: VectorReduceSinkEmptyKeyOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: int)
Reducer 3
Local Work:
Map Reduce Local Work
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: Tagging not supported
- vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -1171,41 +661,19 @@ STAGE PLANS:
value expressions: _col2 (type: decimal(7,2)), _col4 (type: int), _col10 (type: int)
Reducer 31
Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reduce Operator Tree:
Group By Operator
- Group By Vectorization:
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:int
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: []
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0
Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
- Reduce Sink Vectorization:
- className: VectorReduceSinkEmptyKeyOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 9131 Data size: 10217589 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: int)
Reducer 4
Local Work:
Map Reduce Local Work
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: Tagging not supported
- vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -1238,11 +706,6 @@ STAGE PLANS:
Statistics: Num rows: 6363893803988 Data size: 7803535707732365 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: int), _col1 (type: decimal(7,2)), _col2 (type: int), _col3 (type: int)
Reducer 5
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: Tagging not supported
- vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -1273,23 +736,9 @@ STAGE PLANS:
value expressions: _col1 (type: decimal(17,2))
Reducer 6
Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: sum(VALUE._col0)
- Group By Vectorization:
- aggregators: VectorUDAFSumDecimal(col 1:decimal(17,2)) -> decimal(17,2)
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:int
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0]
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0, _col1
@@ -1297,22 +746,9 @@ STAGE PLANS:
Select Operator
expressions: UDFToInteger((_col1 / 50)) (type: int)
outputColumnNames: _col0
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [3]
- selectExpressions: CastDecimalToLong(col 2:decimal(21,6))(children: DecimalColDivideDecimalScalar(col 1:decimal(17,2), val 50) -> 2:decimal(21,6)) -> 3:int
Statistics: Num rows: 3228261906900801 Data size: 512409557603043008 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
- Group By Vectorization:
- aggregators: VectorUDAFCountStar(*) -> bigint
- className: VectorGroupByOperator
- groupByMode: HASH
- keyExpressions: col 3:int
- native: false
- vectorProcessingMode: HASH
- projectedOutputColumnNums: [0]
keys: _col0 (type: int)
mode: hash
outputColumnNames: _col0, _col1
@@ -1321,31 +757,13 @@ STAGE PLANS:
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 3228261906900801 Data size: 512409557603043008 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint)
Reducer 7
Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
- Group By Vectorization:
- aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:int
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0]
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0, _col1
@@ -1353,50 +771,25 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: int), _col1 (type: bigint), (_col0 * 50) (type: int)
outputColumnNames: _col0, _col1, _col2
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1, 2]
- selectExpressions: LongColMultiplyLongScalar(col 0:int, val 50) -> 2:int
Statistics: Num rows: 1614130953450400 Data size: 256204778801521408 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int), _col1 (type: bigint)
sort order: ++
- Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 1614130953450400 Data size: 256204778801521408 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.1
value expressions: _col2 (type: int)
Reducer 8
Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: bigint), VALUE._col0 (type: int)
outputColumnNames: _col0, _col1, _col2
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1, 2]
Statistics: Num rows: 1614130953450400 Data size: 256204778801521408 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 100
- Limit Vectorization:
- className: VectorLimitOperator
- native: true
Statistics: Num rows: 100 Data size: 15800 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 100 Data size: 15800 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query55.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query55.q.out b/ql/src/test/results/clientpositive/perf/spark/query55.q.out
index 18baea7..afcd67c 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query55.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query55.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
select i_brand_id brand_id, i_brand brand,
sum(ss_ext_sales_price) ext_price
from date_dim, store_sales, item
@@ -11,7 +11,7 @@ select i_brand_id brand_id, i_brand brand,
order by ext_price desc, i_brand_id
limit 100
PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
select i_brand_id brand_id, i_brand brand,
sum(ss_ext_sales_price) ext_price
from date_dim, store_sales, item
@@ -24,10 +24,6 @@ select i_brand_id brand_id, i_brand brand,
order by ext_price desc, i_brand_id
limit 100
POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -48,134 +44,60 @@ STAGE PLANS:
alias: store_sales
filterExpr: (ss_sold_date_sk is not null and ss_item_sk is not null) (type: boolean)
Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 2:int))
predicate: (ss_item_sk is not null and ss_sold_date_sk is not null) (type: boolean)
Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_ext_sales_price (type: decimal(7,2))
outputColumnNames: _col0, _col1, _col2
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 2, 15]
Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col2 (type: decimal(7,2))
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Map 6
Map Operator Tree:
TableScan
alias: date_dim
filterExpr: ((d_moy = 12) and (d_year = 2001) and d_date_sk is not null) (type: boolean)
Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 8:int, val 12), FilterLongColEqualLongScalar(col 6:int, val 2001), SelectColumnIsNotNull(col 0:int))
predicate: ((d_moy = 12) and (d_year = 2001) and d_date_sk is not null) (type: boolean)
Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: d_date_sk (type: int)
outputColumnNames: _col0
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0]
Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Map 7
Map Operator Tree:
TableScan
alias: item
filterExpr: ((i_manager_id = 36) and i_item_sk is not null) (type: boolean)
Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 20:int, val 36), SelectColumnIsNotNull(col 0:int))
predicate: ((i_manager_id = 36) and i_item_sk is not null) (type: boolean)
Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: i_item_sk (type: int), i_brand_id (type: int), i_brand (type: string)
outputColumnNames: _col0, _col1, _col2
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 7, 8]
Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col2 (type: string)
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Reducer 2
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: Tagging not supported
- vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -192,11 +114,6 @@ STAGE PLANS:
Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
value expressions: _col2 (type: decimal(7,2))
Reducer 3
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: Tagging not supported
- vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -220,23 +137,9 @@ STAGE PLANS:
value expressions: _col2 (type: decimal(17,2))
Reducer 4
Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: sum(VALUE._col0)
- Group By Vectorization:
- aggregators: VectorUDAFSumDecimal(col 2:decimal(17,2)) -> decimal(17,2)
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:int, col 1:string
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0]
keys: KEY._col0 (type: int), KEY._col1 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
@@ -244,49 +147,25 @@ STAGE PLANS:
Select Operator
expressions: _col1 (type: string), _col2 (type: decimal(17,2)), _col0 (type: int)
outputColumnNames: _col1, _col2, _col3
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [1, 2, 0]
Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col2 (type: decimal(17,2)), _col3 (type: int)
sort order: -+
- Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.1
value expressions: _col1 (type: string)
Reducer 5
Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: int), VALUE._col0 (type: string), KEY.reducesinkkey0 (type: decimal(17,2))
outputColumnNames: _col0, _col1, _col2
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [1, 2, 0]
Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 100
- Limit Vectorization:
- className: VectorLimitOperator
- native: true
Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat