You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2018/08/08 07:38:06 UTC
[43/51] [partial] hive git commit: HIVE-20315: Vectorization: Fix
more NULL / Wrong Results issues and avoid unnecessary casts/conversions
(Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/470ba3e2/ql/src/test/results/clientpositive/perf/spark/query19.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query19.q.out b/ql/src/test/results/clientpositive/perf/spark/query19.q.out
index d2994e6..51a403a 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query19.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query19.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact,
sum(ss_ext_sales_price) ext_price
from date_dim, store_sales, item,customer,customer_address,store
@@ -22,7 +22,7 @@ select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact,
,i_manufact
limit 100
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact,
sum(ss_ext_sales_price) ext_price
from date_dim, store_sales, item,customer,customer_address,store
@@ -46,6 +46,10 @@ select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact,
,i_manufact
limit 100
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-2 is a root stage
Stage-1 depends on stages: Stage-2
@@ -62,18 +66,40 @@ STAGE PLANS:
alias: store
filterExpr: s_store_sk is not null (type: boolean)
Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: SelectColumnIsNotNull(col 0:int)
predicate: s_store_sk is not null (type: boolean)
Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: s_store_sk (type: int), s_zip (type: string)
outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 25]
Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
+ Spark Hash Table Sink Vectorization:
+ className: VectorSparkHashTableSinkOperator
+ native: true
keys:
0 _col7 (type: int)
1 _col0 (type: int)
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
Local Work:
Map Reduce Local Work
@@ -94,100 +120,220 @@ STAGE PLANS:
alias: customer
filterExpr: (c_customer_sk is not null and c_current_addr_sk is not null) (type: boolean)
Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 4:int))
predicate: (c_current_addr_sk is not null and c_customer_sk is not null) (type: boolean)
Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: c_customer_sk (type: int), c_current_addr_sk (type: int)
outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 4]
Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col1 (type: int)
sort order: +
Map-reduce partition columns: _col1 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: int)
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
Map 10
Map Operator Tree:
TableScan
alias: date_dim
filterExpr: ((d_moy = 11) and (d_year = 1999) and d_date_sk is not null) (type: boolean)
Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 8:int, val 11), FilterLongColEqualLongScalar(col 6:int, val 1999), SelectColumnIsNotNull(col 0:int))
predicate: ((d_moy = 11) and (d_year = 1999) and d_date_sk is not null) (type: boolean)
Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: d_date_sk (type: int)
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0]
Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
Map 11
Map Operator Tree:
TableScan
alias: item
filterExpr: ((i_manager_id = 7) and i_item_sk is not null) (type: boolean)
Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 20:int, val 7), SelectColumnIsNotNull(col 0:int))
predicate: ((i_manager_id = 7) and i_item_sk is not null) (type: boolean)
Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: i_item_sk (type: int), i_brand_id (type: int), i_brand (type: string), i_manufact_id (type: int), i_manufact (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 7, 8, 13, 14]
Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: string)
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
Map 6
Map Operator Tree:
TableScan
alias: customer_address
filterExpr: ca_address_sk is not null (type: boolean)
Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: SelectColumnIsNotNull(col 0:int)
predicate: ca_address_sk is not null (type: boolean)
Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ca_address_sk (type: int), ca_zip (type: string)
outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 9]
Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
Map 7
Map Operator Tree:
TableScan
alias: store_sales
filterExpr: (ss_sold_date_sk is not null and ss_item_sk is not null and ss_customer_sk is not null and ss_store_sk is not null) (type: boolean)
Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 2:int), SelectColumnIsNotNull(col 3:int), SelectColumnIsNotNull(col 7:int))
predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean)
Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ext_sales_price (type: decimal(7,2))
outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 2, 3, 7, 15]
Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2))
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ notVectorizedReason: Tagging not supported
+ vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -206,6 +352,11 @@ STAGE PLANS:
Reducer 3
Local Work:
Map Reduce Local Work
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ notVectorizedReason: Tagging not supported
+ vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -246,9 +397,23 @@ STAGE PLANS:
value expressions: _col4 (type: decimal(17,2))
Reducer 4
Execution mode: vectorized
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: sum(VALUE._col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFSumDecimal(col 4:decimal(17,2)) -> decimal(17,2)
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ keyExpressions: col 0:string, col 1:int, col 2:int, col 3:string
+ native: false
+ vectorProcessingMode: MERGE_PARTIAL
+ projectedOutputColumnNums: [0]
keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: int), KEY._col3 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4
@@ -256,30 +421,59 @@ STAGE PLANS:
Select Operator
expressions: _col2 (type: int), _col3 (type: string), _col4 (type: decimal(17,2)), _col0 (type: string), _col1 (type: int)
outputColumnNames: _col2, _col3, _col4, _col5, _col6
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [2, 3, 4, 0, 1]
Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col4 (type: decimal(17,2)), _col5 (type: string), _col6 (type: int), _col2 (type: int), _col3 (type: string)
sort order: -++++
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.1
Reducer 5
Execution mode: vectorized
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey2 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey0 (type: decimal(17,2))
outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [2, 1, 3, 4, 0]
Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 100
+ Limit Vectorization:
+ className: VectorLimitOperator
+ native: true
Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Reducer 8
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ notVectorizedReason: Tagging not supported
+ vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -296,6 +490,11 @@ STAGE PLANS:
Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE
value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2))
Reducer 9
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ notVectorizedReason: Tagging not supported
+ vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
http://git-wip-us.apache.org/repos/asf/hive/blob/470ba3e2/ql/src/test/results/clientpositive/perf/spark/query2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query2.q.out b/ql/src/test/results/clientpositive/perf/spark/query2.q.out
index f615661..499ecab 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query2.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query2.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
with wscs as
(select sold_date_sk
,sales_price
@@ -57,7 +57,7 @@ with wscs as
where d_week_seq1=d_week_seq2-53
order by d_week_seq1
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
with wscs as
(select sold_date_sk
,sales_price
@@ -116,6 +116,10 @@ with wscs as
where d_week_seq1=d_week_seq2-53
order by d_week_seq1
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -139,159 +143,348 @@ STAGE PLANS:
alias: web_sales
filterExpr: ws_sold_date_sk is not null (type: boolean)
Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: SelectColumnIsNotNull(col 0:int)
predicate: ws_sold_date_sk is not null (type: boolean)
Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ws_sold_date_sk (type: int), ws_ext_sales_price (type: decimal(7,2))
outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 23]
Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 431992504 Data size: 58579807164 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: decimal(7,2))
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
Map 13
Map Operator Tree:
TableScan
alias: catalog_sales
filterExpr: cs_sold_date_sk is not null (type: boolean)
Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: SelectColumnIsNotNull(col 0:int)
predicate: cs_sold_date_sk is not null (type: boolean)
Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cs_sold_date_sk (type: int), cs_ext_sales_price (type: decimal(7,2))
outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 23]
Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 431992504 Data size: 58579807164 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: decimal(7,2))
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
Map 14
Map Operator Tree:
TableScan
alias: date_dim
filterExpr: (d_date_sk is not null and d_week_seq is not null) (type: boolean)
Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 4:int))
predicate: (d_date_sk is not null and d_week_seq is not null) (type: boolean)
Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: d_date_sk (type: int), d_week_seq (type: int), d_day_name (type: string)
outputColumnNames: _col0, _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 4, 14]
Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col2 (type: string)
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
Map 15
Map Operator Tree:
TableScan
alias: date_dim
filterExpr: ((d_year = 2002) and d_week_seq is not null) (type: boolean)
Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 6:int, val 2002), SelectColumnIsNotNull(col 4:int))
predicate: ((d_year = 2002) and d_week_seq is not null) (type: boolean)
Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: d_week_seq (type: int)
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [4]
Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
Map 6
Map Operator Tree:
TableScan
alias: catalog_sales
filterExpr: cs_sold_date_sk is not null (type: boolean)
Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: SelectColumnIsNotNull(col 0:int)
predicate: cs_sold_date_sk is not null (type: boolean)
Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cs_sold_date_sk (type: int), cs_ext_sales_price (type: decimal(7,2))
outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 23]
Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 431992504 Data size: 58579807164 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: decimal(7,2))
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
Map 7
Map Operator Tree:
TableScan
alias: date_dim
filterExpr: (d_date_sk is not null and d_week_seq is not null) (type: boolean)
Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 4:int))
predicate: (d_date_sk is not null and d_week_seq is not null) (type: boolean)
Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: d_date_sk (type: int), d_week_seq (type: int), d_day_name (type: string)
outputColumnNames: _col0, _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 4, 14]
Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col2 (type: string)
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
Map 8
Map Operator Tree:
TableScan
alias: date_dim
filterExpr: ((d_year = 2001) and d_week_seq is not null) (type: boolean)
Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 6:int, val 2001), SelectColumnIsNotNull(col 4:int))
predicate: ((d_year = 2001) and d_week_seq is not null) (type: boolean)
Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: d_week_seq (type: int)
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [4]
Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
Map 9
Map Operator Tree:
TableScan
alias: web_sales
filterExpr: ws_sold_date_sk is not null (type: boolean)
Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: SelectColumnIsNotNull(col 0:int)
predicate: ws_sold_date_sk is not null (type: boolean)
Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ws_sold_date_sk (type: int), ws_ext_sales_price (type: decimal(7,2))
outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 23]
Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 431992504 Data size: 58579807164 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: decimal(7,2))
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 10
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ notVectorizedReason: Tagging not supported
+ vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -319,9 +512,23 @@ STAGE PLANS:
value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2))
Reducer 11
Execution mode: vectorized
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6)
+ Group By Vectorization:
+ aggregators: VectorUDAFSumDecimal(col 1:decimal(17,2)) -> decimal(17,2), VectorUDAFSumDecimal(col 2:decimal(17,2)) -> decimal(17,2), VectorUDAFSumDecimal(col 3:decimal(17,2)) -> decimal(17,2), VectorUDAFSumDecimal(col 4:decimal(17,2)) -> decimal(17,2), VectorUDAFSumDecimal(col 5:decimal(17,2)) -> decimal(17,2), VectorUDAFSumDecimal(col 6:decimal(17,2)) -> decimal(17,2), VectorUDAFSumDecimal(col 7:decimal(17,2)) -> decimal(17,2)
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ keyExpressions: col 0:int
+ native: false
+ vectorProcessingMode: MERGE_PARTIAL
+ projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6]
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
@@ -330,9 +537,18 @@ STAGE PLANS:
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 237595882 Data size: 32218894638 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2))
Reducer 12
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ notVectorizedReason: Tagging not supported
+ vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -349,6 +565,11 @@ STAGE PLANS:
Statistics: Num rows: 261355475 Data size: 35440784869 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2))
Reducer 2
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ notVectorizedReason: Tagging not supported
+ vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -376,9 +597,23 @@ STAGE PLANS:
value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2))
Reducer 3
Execution mode: vectorized
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6)
+ Group By Vectorization:
+ aggregators: VectorUDAFSumDecimal(col 1:decimal(17,2)) -> decimal(17,2), VectorUDAFSumDecimal(col 2:decimal(17,2)) -> decimal(17,2), VectorUDAFSumDecimal(col 3:decimal(17,2)) -> decimal(17,2), VectorUDAFSumDecimal(col 4:decimal(17,2)) -> decimal(17,2), VectorUDAFSumDecimal(col 5:decimal(17,2)) -> decimal(17,2), VectorUDAFSumDecimal(col 6:decimal(17,2)) -> decimal(17,2), VectorUDAFSumDecimal(col 7:decimal(17,2)) -> decimal(17,2)
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ keyExpressions: col 0:int
+ native: false
+ vectorProcessingMode: MERGE_PARTIAL
+ projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6]
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
@@ -387,9 +622,18 @@ STAGE PLANS:
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 237595882 Data size: 32218894638 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)), _col5 (type: decimal(17,2)), _col6 (type: decimal(17,2)), _col7 (type: decimal(17,2))
Reducer 4
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ notVectorizedReason: Tagging not supported
+ vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -412,13 +656,26 @@ STAGE PLANS:
value expressions: _col1 (type: decimal(20,2)), _col2 (type: decimal(20,2)), _col3 (type: decimal(20,2)), _col4 (type: decimal(20,2)), _col5 (type: decimal(20,2)), _col6 (type: decimal(20,2)), _col7 (type: decimal(20,2))
Reducer 5
Execution mode: vectorized
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: decimal(20,2)), VALUE._col1 (type: decimal(20,2)), VALUE._col2 (type: decimal(20,2)), VALUE._col3 (type: decimal(20,2)), VALUE._col4 (type: decimal(20,2)), VALUE._col5 (type: decimal(20,2)), VALUE._col6 (type: decimal(20,2))
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7]
Statistics: Num rows: 574982057 Data size: 77969728401 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 574982057 Data size: 77969728401 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/470ba3e2/ql/src/test/results/clientpositive/perf/spark/query20.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query20.q.out b/ql/src/test/results/clientpositive/perf/spark/query20.q.out
index 86e0e72..17e20a4 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query20.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query20.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
select i_item_desc
,i_category
,i_class
@@ -26,7 +26,7 @@ select i_item_desc
,revenueratio
limit 100
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
select i_item_desc
,i_category
,i_class
@@ -54,6 +54,10 @@ select i_item_desc
,revenueratio
limit 100
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-2 is a root stage
Stage-1 depends on stages: Stage-2
@@ -70,18 +74,40 @@ STAGE PLANS:
alias: date_dim
filterExpr: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-01-12 00:00:00' AND TIMESTAMP'2001-02-11 00:00:00' and d_date_sk is not null) (type: boolean)
Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterExprAndExpr(children: FilterTimestampColumnBetween(col 29:timestamp, left 2001-01-11 16:00:00.0, right 2001-02-10 16:00:00.0)(children: CastStringToTimestamp(col 2:string) -> 29:timestamp), SelectColumnIsNotNull(col 0:int))
predicate: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-01-12 00:00:00' AND TIMESTAMP'2001-02-11 00:00:00' and d_date_sk is not null) (type: boolean)
Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: d_date_sk (type: int)
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0]
Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
+ Spark Hash Table Sink Vectorization:
+ className: VectorSparkHashTableSinkOperator
+ native: true
keys:
0 _col0 (type: int)
1 _col0 (type: int)
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
Local Work:
Map Reduce Local Work
@@ -100,12 +126,22 @@ STAGE PLANS:
alias: catalog_sales
filterExpr: (cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean)
Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 15:int), SelectColumnIsNotNull(col 0:int))
predicate: (cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean)
Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cs_sold_date_sk (type: int), cs_item_sk (type: int), cs_ext_sales_price (type: decimal(7,2))
outputColumnNames: _col0, _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 15, 23]
Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
@@ -113,6 +149,10 @@ STAGE PLANS:
keys:
0 _col0 (type: int)
1 _col0 (type: int)
+ Map Join Vectorization:
+ className: VectorMapJoinInnerBigOnlyLongOperator
+ native: true
+ nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true
outputColumnNames: _col1, _col2
input vertices:
1 Map 6
@@ -121,9 +161,22 @@ STAGE PLANS:
key expressions: _col1 (type: int)
sort order: +
Map-reduce partition columns: _col1 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE
value expressions: _col2 (type: decimal(7,2))
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
Local Work:
Map Reduce Local Work
Map 7
@@ -132,21 +185,49 @@ STAGE PLANS:
alias: item
filterExpr: ((i_category) IN ('Jewelry', 'Sports', 'Books') and i_item_sk is not null) (type: boolean)
Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterExprAndExpr(children: FilterStringColumnInList(col 12, values Jewelry, Sports, Books), SelectColumnIsNotNull(col 0:int))
predicate: ((i_category) IN ('Jewelry', 'Sports', 'Books') and i_item_sk is not null) (type: boolean)
Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: i_item_sk (type: int), i_item_id (type: string), i_item_desc (type: string), i_current_price (type: decimal(7,2)), i_class (type: string), i_category (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 4, 5, 10, 12]
Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: decimal(7,2)), _col4 (type: string), _col5 (type: string)
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ notVectorizedReason: Tagging not supported
+ vectorized: false
Reduce Operator Tree:
Join Operator
condition map:
@@ -170,9 +251,23 @@ STAGE PLANS:
value expressions: _col5 (type: decimal(17,2))
Reducer 3
Execution mode: vectorized
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: sum(VALUE._col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFSumDecimal(col 5:decimal(17,2)) -> decimal(17,2)
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ keyExpressions: col 0:string, col 1:string, col 2:string, col 3:string, col 4:decimal(7,2)
+ native: false
+ vectorProcessingMode: MERGE_PARTIAL
+ projectedOutputColumnNums: [0]
keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: decimal(7,2))
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
@@ -181,14 +276,28 @@ STAGE PLANS:
key expressions: _col1 (type: string)
sort order: +
Map-reduce partition columns: _col1 (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkStringOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: decimal(7,2)), _col5 (type: decimal(17,2))
Reducer 4
Execution mode: vectorized
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: decimal(7,2)), VALUE._col4 (type: decimal(17,2))
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [1, 0, 2, 3, 4, 5]
Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE
PTF Operator
Function definitions:
@@ -209,29 +318,61 @@ STAGE PLANS:
name: sum
window function: GenericUDAFSumHiveDecimal
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ PTF Vectorization:
+ className: VectorPTFOperator
+ evaluatorClasses: [VectorPTFEvaluatorDecimalSum]
+ functionInputExpressions: [col 5:decimal(17,2)]
+ functionNames: [sum]
+ native: true
+ orderExpressions: [col 0:string]
Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col3 (type: string), _col0 (type: string), _col1 (type: string), _col4 (type: decimal(7,2)), _col5 (type: decimal(17,2)), ((_col5 * 100) / sum_window_0) (type: decimal(38,17)), _col2 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [3, 1, 0, 4, 5, 8, 2]
+ selectExpressions: DecimalColDivideDecimalColumn(col 7:decimal(21,2), col 6:decimal(27,2))(children: DecimalColMultiplyDecimalScalar(col 5:decimal(17,2), val 100) -> 7:decimal(21,2)) -> 8:decimal(38,17)
Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col1 (type: string), _col2 (type: string), _col6 (type: string), _col0 (type: string), _col5 (type: decimal(38,17))
sort order: +++++
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.1
value expressions: _col3 (type: decimal(7,2)), _col4 (type: decimal(17,2))
Reducer 5
Execution mode: vectorized
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey3 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: decimal(7,2)), VALUE._col1 (type: decimal(17,2)), KEY.reducesinkkey4 (type: decimal(38,17))
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [3, 0, 1, 5, 6, 4]
Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 100
+ Limit Vectorization:
+ className: VectorLimitOperator
+ native: true
Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 100 Data size: 13500 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat