You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2018/08/16 00:21:20 UTC
[46/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization:
Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions
(Matt McCline, reviewed by Teddy Choi)"
http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query11.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query11.q.out b/ql/src/test/results/clientpositive/perf/spark/query11.q.out
index 9a19fdf..87a0cc0 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query11.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query11.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
with year_total as (
select c_customer_id customer_id
,c_first_name customer_first_name
@@ -72,7 +72,7 @@ with year_total as (
order by t_s_secyear.c_preferred_cust_flag
limit 100
PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
with year_total as (
select c_customer_id customer_id
,c_first_name customer_first_name
@@ -146,10 +146,6 @@ with year_total as (
order by t_s_secyear.c_preferred_cust_flag
limit 100
POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -180,518 +176,237 @@ STAGE PLANS:
alias: web_sales
filterExpr: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean)
Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 4:int), SelectColumnIsNotNull(col 0:int))
predicate: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean)
Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int), ws_ext_discount_amt (type: decimal(7,2)), ws_ext_list_price (type: decimal(7,2))
outputColumnNames: _col0, _col1, _col2, _col3
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 4, 22, 25]
Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2))
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Map 13
Map Operator Tree:
TableScan
alias: date_dim
filterExpr: ((d_year = 2001) and d_date_sk is not null) (type: boolean)
Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 6:int, val 2001), SelectColumnIsNotNull(col 0:int))
predicate: ((d_year = 2001) and d_date_sk is not null) (type: boolean)
Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: d_date_sk (type: int)
outputColumnNames: _col0
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0]
Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Map 14
Map Operator Tree:
TableScan
alias: customer
filterExpr: (c_customer_sk is not null and c_customer_id is not null) (type: boolean)
Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 1:string))
predicate: (c_customer_id is not null and c_customer_sk is not null) (type: boolean)
Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: c_customer_sk (type: int), c_customer_id (type: string), c_first_name (type: string), c_last_name (type: string), c_preferred_cust_flag (type: string), c_birth_country (type: string), c_login (type: string), c_email_address (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1, 8, 9, 10, 14, 15, 16]
Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string)
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Map 15
Map Operator Tree:
TableScan
alias: web_sales
filterExpr: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean)
Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 4:int), SelectColumnIsNotNull(col 0:int))
predicate: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean)
Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int), ws_ext_discount_amt (type: decimal(7,2)), ws_ext_list_price (type: decimal(7,2))
outputColumnNames: _col0, _col1, _col2, _col3
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 4, 22, 25]
Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2))
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Map 19
Map Operator Tree:
TableScan
alias: date_dim
filterExpr: ((d_year = 2001) and d_date_sk is not null) (type: boolean)
Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 6:int, val 2001), SelectColumnIsNotNull(col 0:int))
predicate: ((d_year = 2001) and d_date_sk is not null) (type: boolean)
Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: d_date_sk (type: int)
outputColumnNames: _col0
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0]
Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Map 20
Map Operator Tree:
TableScan
alias: customer
filterExpr: (c_customer_sk is not null and c_customer_id is not null) (type: boolean)
Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 1:string))
predicate: (c_customer_id is not null and c_customer_sk is not null) (type: boolean)
Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: c_customer_sk (type: int), c_customer_id (type: string), c_first_name (type: string), c_last_name (type: string), c_preferred_cust_flag (type: string), c_birth_country (type: string), c_login (type: string), c_email_address (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1, 8, 9, 10, 14, 15, 16]
Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string)
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Map 21
Map Operator Tree:
TableScan
alias: store_sales
filterExpr: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean)
Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 3:int), SelectColumnIsNotNull(col 0:int))
predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean)
Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), ss_ext_discount_amt (type: decimal(7,2)), ss_ext_list_price (type: decimal(7,2))
outputColumnNames: _col0, _col1, _col2, _col3
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 3, 14, 17]
Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2))
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Map 25
Map Operator Tree:
TableScan
alias: date_dim
filterExpr: ((d_year = 2002) and d_date_sk is not null) (type: boolean)
Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 6:int, val 2002), SelectColumnIsNotNull(col 0:int))
predicate: ((d_year = 2002) and d_date_sk is not null) (type: boolean)
Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: d_date_sk (type: int)
outputColumnNames: _col0
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0]
Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Map 26
Map Operator Tree:
TableScan
alias: customer
filterExpr: (c_customer_sk is not null and c_customer_id is not null) (type: boolean)
Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 1:string))
predicate: (c_customer_id is not null and c_customer_sk is not null) (type: boolean)
Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: c_customer_sk (type: int), c_customer_id (type: string), c_first_name (type: string), c_last_name (type: string), c_preferred_cust_flag (type: string), c_birth_country (type: string), c_login (type: string), c_email_address (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1, 8, 9, 10, 14, 15, 16]
Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string)
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Map 7
Map Operator Tree:
TableScan
alias: date_dim
filterExpr: ((d_year = 2002) and d_date_sk is not null) (type: boolean)
Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 6:int, val 2002), SelectColumnIsNotNull(col 0:int))
predicate: ((d_year = 2002) and d_date_sk is not null) (type: boolean)
Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: d_date_sk (type: int)
outputColumnNames: _col0
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0]
Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Map 8
Map Operator Tree:
TableScan
alias: customer
filterExpr: (c_customer_sk is not null and c_customer_id is not null) (type: boolean)
Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 1:string))
predicate: (c_customer_id is not null and c_customer_sk is not null) (type: boolean)
Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: c_customer_sk (type: int), c_customer_id (type: string), c_first_name (type: string), c_last_name (type: string), c_preferred_cust_flag (type: string), c_birth_country (type: string), c_login (type: string), c_email_address (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1, 8, 9, 10, 14, 15, 16]
Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string)
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Map 9
Map Operator Tree:
TableScan
alias: store_sales
filterExpr: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean)
Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 3:int), SelectColumnIsNotNull(col 0:int))
predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean)
Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), ss_ext_discount_amt (type: decimal(7,2)), ss_ext_list_price (type: decimal(7,2))
outputColumnNames: _col0, _col1, _col2, _col3
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 3, 14, 17]
Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2))
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Reducer 10
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: Tagging not supported
- vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -708,11 +423,6 @@ STAGE PLANS:
Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
value expressions: _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2))
Reducer 11
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: Tagging not supported
- vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -740,23 +450,9 @@ STAGE PLANS:
value expressions: _col7 (type: decimal(18,2))
Reducer 12
Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: sum(VALUE._col0)
- Group By Vectorization:
- aggregators: VectorUDAFSumDecimal(col 7:decimal(18,2)) -> decimal(18,2)
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:string, col 1:string, col 2:string, col 3:string, col 4:string, col 5:string, col 6:string
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0]
keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string), KEY._col6 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
@@ -764,42 +460,21 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: string), _col7 (type: decimal(18,2))
outputColumnNames: _col0, _col7
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 7]
Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterDecimalColGreaterDecimalScalar(col 7:decimal(18,2), val 0)
predicate: (_col7 > 0) (type: boolean)
Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: string), _col7 (type: decimal(18,2))
outputColumnNames: _col0, _col1
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 7]
Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Reduce Sink Vectorization:
- className: VectorReduceSinkStringOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 116159124 Data size: 10247591639 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: decimal(18,2))
Reducer 16
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: Tagging not supported
- vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -816,11 +491,6 @@ STAGE PLANS:
Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
value expressions: _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2))
Reducer 17
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: Tagging not supported
- vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -848,23 +518,9 @@ STAGE PLANS:
value expressions: _col7 (type: decimal(18,2))
Reducer 18
Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: sum(VALUE._col0)
- Group By Vectorization:
- aggregators: VectorUDAFSumDecimal(col 7:decimal(18,2)) -> decimal(18,2)
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:string, col 1:string, col 2:string, col 3:string, col 4:string, col 5:string, col 6:string
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0]
keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string), KEY._col6 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
@@ -872,42 +528,21 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: string), _col7 (type: decimal(18,2))
outputColumnNames: _col0, _col7
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 7]
Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterDecimalColGreaterDecimalScalar(col 7:decimal(18,2), val 0)
predicate: (_col7 > 0) (type: boolean)
Statistics: Num rows: 29040539 Data size: 3948673454 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: string), _col7 (type: decimal(18,2))
outputColumnNames: _col0, _col1
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 7]
Statistics: Num rows: 29040539 Data size: 3948673454 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Reduce Sink Vectorization:
- className: VectorReduceSinkStringOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 29040539 Data size: 3948673454 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: decimal(18,2))
Reducer 2
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: Tagging not supported
- vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -924,11 +559,6 @@ STAGE PLANS:
Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
value expressions: _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2))
Reducer 22
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: Tagging not supported
- vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -945,11 +575,6 @@ STAGE PLANS:
Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
value expressions: _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2))
Reducer 23
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: Tagging not supported
- vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -977,23 +602,9 @@ STAGE PLANS:
value expressions: _col7 (type: decimal(18,2))
Reducer 24
Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: sum(VALUE._col0)
- Group By Vectorization:
- aggregators: VectorUDAFSumDecimal(col 7:decimal(18,2)) -> decimal(18,2)
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:string, col 1:string, col 2:string, col 3:string, col 4:string, col 5:string, col 6:string
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0]
keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string), KEY._col6 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
@@ -1001,27 +612,14 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: string), _col3 (type: string), _col7 (type: decimal(18,2))
outputColumnNames: _col0, _col1, _col2
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 3, 7]
Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Reduce Sink Vectorization:
- className: VectorReduceSinkStringOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string), _col2 (type: decimal(18,2))
Reducer 3
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: Tagging not supported
- vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -1049,23 +647,9 @@ STAGE PLANS:
value expressions: _col7 (type: decimal(18,2))
Reducer 4
Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: sum(VALUE._col0)
- Group By Vectorization:
- aggregators: VectorUDAFSumDecimal(col 7:decimal(18,2)) -> decimal(18,2)
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:string, col 1:string, col 2:string, col 3:string, col 4:string, col 5:string, col 6:string
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0]
keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string), KEY._col6 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
@@ -1073,27 +657,14 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: string), _col7 (type: decimal(18,2))
outputColumnNames: _col0, _col1
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 7]
Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Reduce Sink Vectorization:
- className: VectorReduceSinkStringOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: decimal(18,2))
Reducer 5
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: Tagging not supported
- vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -1121,32 +692,16 @@ STAGE PLANS:
TopN Hash Memory Usage: 0.1
Reducer 6
Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: string)
outputColumnNames: _col0
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0]
Statistics: Num rows: 574987679 Data size: 50725579961 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 100
- Limit Vectorization:
- className: VectorLimitOperator
- native: true
Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query12.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query12.q.out b/ql/src/test/results/clientpositive/perf/spark/query12.q.out
index b025865..413930c 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query12.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query12.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
select i_item_desc
,i_category
,i_class
@@ -30,7 +30,7 @@ order by
,revenueratio
limit 100
PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
select i_item_desc
,i_category
,i_class
@@ -62,10 +62,6 @@ order by
,revenueratio
limit 100
POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
STAGE DEPENDENCIES:
Stage-2 is a root stage
Stage-1 depends on stages: Stage-2
@@ -82,40 +78,18 @@ STAGE PLANS:
alias: date_dim
filterExpr: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-01-12 00:00:00' AND TIMESTAMP'2001-02-11 00:00:00' and d_date_sk is not null) (type: boolean)
Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: FilterTimestampColumnBetween(col 29:timestamp, left 2001-01-11 16:00:00.0, right 2001-02-10 16:00:00.0)(children: CastStringToTimestamp(col 2:string) -> 29:timestamp), SelectColumnIsNotNull(col 0:int))
predicate: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-01-12 00:00:00' AND TIMESTAMP'2001-02-11 00:00:00' and d_date_sk is not null) (type: boolean)
Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: d_date_sk (type: int)
outputColumnNames: _col0
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0]
Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
- Spark Hash Table Sink Vectorization:
- className: VectorSparkHashTableSinkOperator
- native: true
keys:
0 _col0 (type: int)
1 _col0 (type: int)
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Local Work:
Map Reduce Local Work
@@ -134,22 +108,12 @@ STAGE PLANS:
alias: web_sales
filterExpr: (ws_item_sk is not null and ws_sold_date_sk is not null) (type: boolean)
Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 3:int), SelectColumnIsNotNull(col 0:int))
predicate: (ws_item_sk is not null and ws_sold_date_sk is not null) (type: boolean)
Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_ext_sales_price (type: decimal(7,2))
outputColumnNames: _col0, _col1, _col2
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 3, 23]
Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
@@ -157,10 +121,6 @@ STAGE PLANS:
keys:
0 _col0 (type: int)
1 _col0 (type: int)
- Map Join Vectorization:
- className: VectorMapJoinInnerBigOnlyLongOperator
- native: true
- nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true
outputColumnNames: _col1, _col2
input vertices:
1 Map 6
@@ -169,22 +129,9 @@ STAGE PLANS:
key expressions: _col1 (type: int)
sort order: +
Map-reduce partition columns: _col1 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE
value expressions: _col2 (type: decimal(7,2))
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Local Work:
Map Reduce Local Work
Map 7
@@ -193,49 +140,21 @@ STAGE PLANS:
alias: item
filterExpr: ((i_category) IN ('Jewelry', 'Sports', 'Books') and i_item_sk is not null) (type: boolean)
Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprAndExpr(children: FilterStringColumnInList(col 12, values Jewelry, Sports, Books), SelectColumnIsNotNull(col 0:int))
predicate: ((i_category) IN ('Jewelry', 'Sports', 'Books') and i_item_sk is not null) (type: boolean)
Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: i_item_sk (type: int), i_item_id (type: string), i_item_desc (type: string), i_current_price (type: decimal(7,2)), i_class (type: string), i_category (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [0, 1, 4, 5, 10, 12]
Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: decimal(7,2)), _col4 (type: string), _col5 (type: string)
Execution mode: vectorized
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFormatFeatureSupport: [DECIMAL_64]
- featureSupportInUse: [DECIMAL_64]
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Reducer 2
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: Tagging not supported
- vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -259,23 +178,9 @@ STAGE PLANS:
value expressions: _col5 (type: decimal(17,2))
Reducer 3
Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: sum(VALUE._col0)
- Group By Vectorization:
- aggregators: VectorUDAFSumDecimal(col 5:decimal(17,2)) -> decimal(17,2)
- className: VectorGroupByOperator
- groupByMode: MERGEPARTIAL
- keyExpressions: col 0:string, col 1:string, col 2:string, col 3:string, col 4:decimal(7,2)
- native: false
- vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0]
keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: decimal(7,2))
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
@@ -284,28 +189,14 @@ STAGE PLANS:
key expressions: _col1 (type: string)
sort order: +
Map-reduce partition columns: _col1 (type: string)
- Reduce Sink Vectorization:
- className: VectorReduceSinkStringOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: decimal(7,2)), _col5 (type: decimal(17,2))
Reducer 4
Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Reduce Operator Tree:
Select Operator
expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: decimal(7,2)), VALUE._col4 (type: decimal(17,2))
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [1, 0, 2, 3, 4, 5]
Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
PTF Operator
Function definitions:
@@ -326,61 +217,29 @@ STAGE PLANS:
name: sum
window function: GenericUDAFSumHiveDecimal
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
- PTF Vectorization:
- className: VectorPTFOperator
- evaluatorClasses: [VectorPTFEvaluatorDecimalSum]
- functionInputExpressions: [col 5:decimal(17,2)]
- functionNames: [sum]
- native: true
- orderExpressions: [col 0:string]
Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col3 (type: string), _col0 (type: string), _col1 (type: string), _col4 (type: decimal(7,2)), _col5 (type: decimal(17,2)), ((_col5 * 100) / sum_window_0) (type: decimal(38,17)), _col2 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [3, 1, 0, 4, 5, 8, 2]
- selectExpressions: DecimalColDivideDecimalColumn(col 7:decimal(21,2), col 6:decimal(27,2))(children: DecimalColMultiplyDecimalScalar(col 5:decimal(17,2), val 100) -> 7:decimal(21,2)) -> 8:decimal(38,17)
Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col1 (type: string), _col2 (type: string), _col6 (type: string), _col0 (type: string), _col5 (type: decimal(38,17))
sort order: +++++
- Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.1
value expressions: _col3 (type: decimal(7,2)), _col4 (type: decimal(17,2))
Reducer 5
Execution mode: vectorized
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey3 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: decimal(7,2)), VALUE._col1 (type: decimal(17,2)), KEY.reducesinkkey4 (type: decimal(38,17))
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumnNums: [3, 0, 1, 5, 6, 4]
Statistics: Num rows: 87121617 Data size: 11846020363 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 100
- Limit Vectorization:
- className: VectorLimitOperator
- native: true
Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat