You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2018/08/16 00:21:02 UTC
[28/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization:
Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions
(Matt McCline, reviewed by Teddy Choi)"
http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query47.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query47.q.out b/ql/src/test/results/clientpositive/perf/spark/query47.q.out
index a2387e8..690b105 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query47.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query47.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
with v1 as(
select i_category, i_brand,
s_store_name, s_company_name,
@@ -48,7 +48,7 @@ with v1 as(
order by sum_sales - avg_monthly_sales, 3
limit 100
PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
with v1 as(
select i_category, i_brand,
s_store_name, s_company_name,
@@ -98,10 +98,6 @@ with v1 as(
order by sum_sales - avg_monthly_sales, 3
limit 100
POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
STAGE DEPENDENCIES:
Stage-2 is a root stage
Stage-3 depends on stages: Stage-2
@@ -120,40 +116,18 @@ STAGE PLANS:
alias: store
filterExpr: (s_store_sk is not null and s_store_name is not null and s_company_name is not null) (type: boolean)
Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 5:string), SelectColumnIsNotNull(col 17:string))
predicate: (s_company_name is not null and s_store_name is not null and s_store_sk is not null) (type: boolean)
Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: s_store_sk (type: int), s_store_name (type: string), s_company_name (type: string)
outputColumnNames: _col0, _col1, _col2
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 5, 17]
Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
- Spark Hash Table Sink Vectorization:
- className: VectorSparkHashTableSinkOperator
- native: true
keys:
0 _col2 (type: int)
1 _col0 (type: int)
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Local Work:
Map Reduce Local Work
@@ -167,40 +141,18 @@ STAGE PLANS:
alias: store
filterExpr: (s_store_sk is not null and s_store_name is not null and s_company_name is not null) (type: boolean)
Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 5:string), SelectColumnIsNotNull(col 17:string))
predicate: (s_company_name is not null and s_store_name is not null and s_store_sk is not null) (type: boolean)
Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: s_store_sk (type: int), s_store_name (type: string), s_company_name (type: string)
outputColumnNames: _col0, _col1, _col2
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 5, 17]
Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
- Spark Hash Table Sink Vectorization:
- className: VectorSparkHashTableSinkOperator
- native: true
keys:
0 _col2 (type: int)
1 _col0 (type: int)
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Local Work:
Map Reduce Local Work
@@ -214,40 +166,18 @@ STAGE PLANS:
alias: store
filterExpr: (s_store_sk is not null and s_store_name is not null and s_company_name is not null) (type: boolean)
Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 5:string), SelectColumnIsNotNull(col 17:string))
predicate: (s_company_name is not null and s_store_name is not null and s_store_sk is not null) (type: boolean)
Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: s_store_sk (type: int), s_store_name (type: string), s_company_name (type: string)
outputColumnNames: _col0, _col1, _col2
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 5, 17]
Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
- Spark Hash Table Sink Vectorization:
- className: VectorSparkHashTableSinkOperator
- native: true
keys:
0 _col2 (type: int)
1 _col0 (type: int)
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Local Work:
Map Reduce Local Work
@@ -277,393 +207,181 @@ STAGE PLANS:
alias: store_sales
filterExpr: (ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean)
Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 2:int), SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 7:int))
predicate: (ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean)
Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_store_sk (type: int), ss_sales_price (type: decimal(7,2))
outputColumnNames: _col0, _col1, _col2, _col3
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 2, 7, 13]
Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2))
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Map 11
Map Operator Tree:
TableScan
alias: store_sales
filterExpr: (ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean)
Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 2:int), SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 7:int))
predicate: (ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean)
Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_store_sk (type: int), ss_sales_price (type: decimal(7,2))
outputColumnNames: _col0, _col1, _col2, _col3
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 2, 7, 13]
Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2))
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Map 17
Map Operator Tree:
TableScan
alias: date_dim
filterExpr: (((d_year = 2000) or ((d_year = 1999) and (d_moy = 12)) or ((d_year = 2001) and (d_moy = 1))) and d_date_sk is not null) (type: boolean)
Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 6:int, val 2000), FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 6:int, val 1999), FilterLongColEqualLongScalar(col 8:int, val 12)), FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 6:int, val 2001), FilterLongColEqualLongScalar(col 8:int, val 1))), SelectColumnIsNotNull(col 0:int))
predicate: (((d_year = 2000) or ((d_year = 1999) and (d_moy = 12)) or ((d_year = 2001) and (d_moy = 1))) and d_date_sk is not null) (type: boolean)
Statistics: Num rows: 73048 Data size: 81740712 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: d_date_sk (type: int), d_year (type: int), d_moy (type: int)
outputColumnNames: _col0, _col1, _col2
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 6, 8]
Statistics: Num rows: 73048 Data size: 81740712 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 73048 Data size: 81740712 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col2 (type: int)
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Map 18
Map Operator Tree:
TableScan
alias: item
filterExpr: (i_item_sk is not null and i_category is not null and i_brand is not null) (type: boolean)
Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 12:string), SelectColumnIsNotNull(col 8:string))
predicate: (i_brand is not null and i_category is not null and i_item_sk is not null) (type: boolean)
Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: i_item_sk (type: int), i_brand (type: string), i_category (type: string)
outputColumnNames: _col0, _col1, _col2
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 8, 12]
Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string), _col2 (type: string)
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Map 20
Map Operator Tree:
TableScan
alias: store_sales
filterExpr: (ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean)
Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 2:int), SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 7:int))
predicate: (ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean)
Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_store_sk (type: int), ss_sales_price (type: decimal(7,2))
outputColumnNames: _col0, _col1, _col2, _col3
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 2, 7, 13]
Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2))
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Map 25
Map Operator Tree:
TableScan
alias: date_dim
filterExpr: (((d_year = 2000) or ((d_year = 1999) and (d_moy = 12)) or ((d_year = 2001) and (d_moy = 1))) and d_date_sk is not null) (type: boolean)
Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 6:int, val 2000), FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 6:int, val 1999), FilterLongColEqualLongScalar(col 8:int, val 12)), FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 6:int, val 2001), FilterLongColEqualLongScalar(col 8:int, val 1))), SelectColumnIsNotNull(col 0:int))
predicate: (((d_year = 2000) or ((d_year = 1999) and (d_moy = 12)) or ((d_year = 2001) and (d_moy = 1))) and d_date_sk is not null) (type: boolean)
Statistics: Num rows: 73048 Data size: 81740712 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: d_date_sk (type: int), d_year (type: int), d_moy (type: int)
outputColumnNames: _col0, _col1, _col2
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 6, 8]
Statistics: Num rows: 73048 Data size: 81740712 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 73048 Data size: 81740712 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col2 (type: int)
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Map 26
Map Operator Tree:
TableScan
alias: item
filterExpr: (i_item_sk is not null and i_category is not null and i_brand is not null) (type: boolean)
Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 12:string), SelectColumnIsNotNull(col 8:string))
predicate: (i_brand is not null and i_category is not null and i_item_sk is not null) (type: boolean)
Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: i_item_sk (type: int), i_brand (type: string), i_category (type: string)
outputColumnNames: _col0, _col1, _col2
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 8, 12]
Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string), _col2 (type: string)
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Map 8
Map Operator Tree:
TableScan
alias: date_dim
filterExpr: (((d_year = 2000) or ((d_year = 1999) and (d_moy = 12)) or ((d_year = 2001) and (d_moy = 1))) and d_date_sk is not null) (type: boolean)
Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 6:int, val 2000), FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 6:int, val 1999), FilterLongColEqualLongScalar(col 8:int, val 12)), FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 6:int, val 2001), FilterLongColEqualLongScalar(col 8:int, val 1))), SelectColumnIsNotNull(col 0:int))
predicate: (((d_year = 2000) or ((d_year = 1999) and (d_moy = 12)) or ((d_year = 2001) and (d_moy = 1))) and d_date_sk is not null) (type: boolean)
Statistics: Num rows: 73048 Data size: 81740712 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: d_date_sk (type: int), d_year (type: int), d_moy (type: int)
outputColumnNames: _col0, _col1, _col2
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 6, 8]
Statistics: Num rows: 73048 Data size: 81740712 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 73048 Data size: 81740712 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col2 (type: int)
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Map 9
Map Operator Tree:
TableScan
alias: item
filterExpr: (i_item_sk is not null and i_category is not null and i_brand is not null) (type: boolean)
Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 12:string), SelectColumnIsNotNull(col 8:string))
predicate: (i_brand is not null and i_category is not null and i_item_sk is not null) (type: boolean)
Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: i_item_sk (type: int), i_brand (type: string), i_category (type: string)
outputColumnNames: _col0, _col1, _col2
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 8, 12]
Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string), _col2 (type: string)
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Reducer 12
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: Tagging not supported
- vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -682,11 +400,6 @@ STAGE PLANS:
Reducer 13
Local Work:
Map Reduce Local Work
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: Tagging not supported
- vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -720,23 +433,9 @@ STAGE PLANS:
value expressions: _col6 (type: decimal(17,2))
Reducer 14
Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: sum(VALUE._col0)
- Group By Vectorization:
- aggregators: VectorUDAFSumDecimal(col 6:decimal(17,2)) -> decimal(17,2)
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:int, col 1:int, col 2:string, col 3:string, col 4:string, col 5:string
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0]
keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
@@ -745,28 +444,14 @@ STAGE PLANS:
key expressions: _col3 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string), _col0 (type: int)
sort order: +++++
Map-reduce partition columns: _col3 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string), _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkMultiKeyOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col6 (type: decimal(17,2))
Reducer 15
Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey4 (type: int), VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col1 (type: decimal(17,2))
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [4, 5, 1, 0, 2, 3, 6]
Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
PTF Operator
Function definitions:
@@ -787,38 +472,18 @@ STAGE PLANS:
name: avg
window function: GenericUDAFAverageEvaluatorDecimal
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
- PTF Vectorization:
- className: VectorPTFOperator
- evaluatorClasses: [VectorPTFEvaluatorDecimalAvg]
- functionInputExpressions: [col 6:decimal(17,2)]
- functionNames: [avg]
- native: true
- orderExpressions: [col 0:string, col 1:string, col 2:string, col 3:string, col 4:int]
Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: avg_window_0 (type: decimal(21,6)), _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: decimal(17,2))
outputColumnNames: avg_window_0, _col0, _col1, _col2, _col3, _col4, _col5, _col6
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [7, 4, 5, 1, 0, 2, 3, 6]
Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col3 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string), _col0 (type: int), _col1 (type: int)
sort order: ++++++
Map-reduce partition columns: _col3 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string)
- Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
value expressions: avg_window_0 (type: decimal(21,6)), _col6 (type: decimal(17,2))
Reducer 16
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: PTF operator: More than 1 argument expression of aggregation function rank
- vectorized: false
Reduce Operator Tree:
Select Operator
expressions: VALUE._col0 (type: decimal(21,6)), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col1 (type: decimal(17,2))
@@ -866,11 +531,6 @@ STAGE PLANS:
Statistics: Num rows: 31943759 Data size: 2818087693 Basic stats: COMPLETE Column stats: NONE
value expressions: _col4 (type: int), _col5 (type: int), _col6 (type: decimal(17,2)), _col7 (type: decimal(21,6))
Reducer 2
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: Tagging not supported
- vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -887,11 +547,6 @@ STAGE PLANS:
Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
value expressions: _col2 (type: int), _col3 (type: decimal(7,2)), _col5 (type: int), _col6 (type: int)
Reducer 21
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: Tagging not supported
- vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -910,11 +565,6 @@ STAGE PLANS:
Reducer 22
Local Work:
Map Reduce Local Work
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: Tagging not supported
- vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -948,23 +598,9 @@ STAGE PLANS:
value expressions: _col6 (type: decimal(17,2))
Reducer 23
Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: sum(VALUE._col0)
- Group By Vectorization:
- aggregators: VectorUDAFSumDecimal(col 6:decimal(17,2)) -> decimal(17,2)
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:int, col 1:int, col 2:string, col 3:string, col 4:string, col 5:string
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0]
keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
@@ -973,18 +609,9 @@ STAGE PLANS:
key expressions: _col3 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string), _col0 (type: int), _col1 (type: int)
sort order: ++++++
Map-reduce partition columns: _col3 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string)
- Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
value expressions: _col6 (type: decimal(17,2))
Reducer 24
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: PTF operator: More than 1 argument expression of aggregation function rank
- vectorized: false
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey4 (type: int), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col0 (type: decimal(17,2))
@@ -1027,11 +654,6 @@ STAGE PLANS:
Reducer 3
Local Work:
Map Reduce Local Work
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: Tagging not supported
- vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -1065,23 +687,9 @@ STAGE PLANS:
value expressions: _col6 (type: decimal(17,2))
Reducer 4
Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: sum(VALUE._col0)
- Group By Vectorization:
- aggregators: VectorUDAFSumDecimal(col 6:decimal(17,2)) -> decimal(17,2)
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:int, col 1:int, col 2:string, col 3:string, col 4:string, col 5:string
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0]
keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
@@ -1090,18 +698,9 @@ STAGE PLANS:
key expressions: _col3 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string), _col0 (type: int), _col1 (type: int)
sort order: ++++++
Map-reduce partition columns: _col3 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: string)
- Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
value expressions: _col6 (type: decimal(17,2))
Reducer 5
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: PTF operator: More than 1 argument expression of aggregation function rank
- vectorized: false
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey4 (type: int), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col0 (type: decimal(17,2))
@@ -1142,11 +741,6 @@ STAGE PLANS:
Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
value expressions: _col4 (type: decimal(17,2))
Reducer 6
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: Tagging not supported
- vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -1170,32 +764,16 @@ STAGE PLANS:
value expressions: _col0 (type: string), _col1 (type: int), _col3 (type: decimal(21,6)), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2))
Reducer 7
Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reduce Operator Tree:
Select Operator
expressions: VALUE._col0 (type: string), VALUE._col1 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col2 (type: decimal(21,6)), VALUE._col3 (type: decimal(17,2)), VALUE._col4 (type: decimal(17,2)), VALUE._col5 (type: decimal(17,2))
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [2, 3, 1, 4, 5, 6, 7]
Statistics: Num rows: 843315280 Data size: 74397518857 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 100
- Limit Vectorization:
- className: VectorLimitOperator
- native: true
Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query48.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query48.q.out b/ql/src/test/results/clientpositive/perf/spark/query48.q.out
index ba3a819..38ccff2 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query48.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query48.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
select sum (ss_quantity)
from store_sales, store, customer_demographics, customer_address, date_dim
where s_store_sk = ss_store_sk
@@ -63,7 +63,7 @@ select sum (ss_quantity)
)
)
PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
select sum (ss_quantity)
from store_sales, store, customer_demographics, customer_address, date_dim
where s_store_sk = ss_store_sk
@@ -128,10 +128,6 @@ select sum (ss_quantity)
)
)
POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
STAGE DEPENDENCIES:
Stage-2 is a root stage
Stage-1 depends on stages: Stage-2
@@ -148,40 +144,18 @@ STAGE PLANS:
alias: store
filterExpr: s_store_sk is not null (type: boolean)
Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: SelectColumnIsNotNull(col 0:int)
predicate: s_store_sk is not null (type: boolean)
Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: s_store_sk (type: int)
outputColumnNames: _col0
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0]
Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
- Spark Hash Table Sink Vectorization:
- className: VectorSparkHashTableSinkOperator
- native: true
keys:
0 _col3 (type: int)
1 _col0 (type: int)
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Local Work:
Map Reduce Local Work
@@ -200,176 +174,79 @@ STAGE PLANS:
alias: store_sales
filterExpr: ((ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and ss_store_sk is not null and ss_cdemo_sk is not null and ss_addr_sk is not null and ss_sold_date_sk is not null) (type: boolean)
Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: FilterExprOrExpr(children: FilterDecimal64ColumnBetween(col 13:decimal(7,2)/DECIMAL_64, decimal64LeftVal 10000, decimalLeftVal 10000, decimal64RightVal 15000, decimalRightVal 15000), FilterDecimal64ColumnBetween(col 13:decimal(7,2)/DECIMAL_64, decimal64LeftVal 5000, decimalLeftVal 5000, decimal64RightVal 10000, decimalRightVal 10000), FilterDecimal64ColumnBetween(col 13:decimal(7,2)/DECIMAL_64, decimal64LeftVal 15000, decimalLeftVal 15000, decimal64RightVal 20000, decimalRightVal 20000)), SelectColumnIsNotNull(col 7:int), SelectColumnIsNotNull(col 4:int), SelectColumnIsNotNull(col 6:int), SelectColumnIsNotNull(col 0:int))
predicate: ((ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean)
Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ss_sold_date_sk (type: int), ss_cdemo_sk (type: int), ss_addr_sk (type: int), ss_store_sk (type: int), ss_quantity (type: int), ss_net_profit (type: decimal(7,2))
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 4, 6, 7, 10, 22]
Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col6 (type: decimal(7,2))
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Map 6
Map Operator Tree:
TableScan
alias: date_dim
filterExpr: ((d_year = 1998) and d_date_sk is not null) (type: boolean)
Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 6:int, val 1998), SelectColumnIsNotNull(col 0:int))
predicate: ((d_year = 1998) and d_date_sk is not null) (type: boolean)
Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: d_date_sk (type: int)
outputColumnNames: _col0
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0]
Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Map 7
Map Operator Tree:
TableScan
alias: customer_demographics
filterExpr: ((cd_marital_status = 'M') and (cd_education_status = '4 yr Degree') and cd_demo_sk is not null) (type: boolean)
Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: FilterStringGroupColEqualStringScalar(col 2:string, val M), FilterStringGroupColEqualStringScalar(col 3:string, val 4 yr Degree), SelectColumnIsNotNull(col 0:int))
predicate: ((cd_education_status = '4 yr Degree') and (cd_marital_status = 'M') and cd_demo_sk is not null) (type: boolean)
Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cd_demo_sk (type: int)
outputColumnNames: _col0
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0]
Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Map 8
Map Operator Tree:
TableScan
alias: customer_address
filterExpr: ((ca_country = 'United States') and ca_address_sk is not null) (type: boolean)
Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: FilterStringGroupColEqualStringScalar(col 10:string, val United States), SelectColumnIsNotNull(col 0:int))
predicate: ((ca_country = 'United States') and ca_address_sk is not null) (type: boolean)
Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ca_address_sk (type: int), ca_state (type: string)
outputColumnNames: _col0, _col1
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 8]
Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Reducer 2
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: Tagging not supported
- vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -386,11 +263,6 @@ STAGE PLANS:
Statistics: Num rows: 211198404 Data size: 18631984502 Basic stats: COMPLETE Column stats: NONE
value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col6 (type: decimal(7,2))
Reducer 3
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: Tagging not supported
- vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -409,11 +281,6 @@ STAGE PLANS:
Reducer 4
Local Work:
Map Reduce Local Work
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: Tagging not supported
- vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -447,30 +314,14 @@ STAGE PLANS:
value expressions: _col0 (type: bigint)
Reducer 5
Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: sum(VALUE._col0)
- Group By Vectorization:
- aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- native: false
- vectorProcessingMode: GLOBAL
- projectedOutputColumnNums: [0]
mode: mergepartial
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat