You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2017/02/03 20:03:44 UTC
[19/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN
display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)
http://git-wip-us.apache.org/repos/asf/hive/blob/fcb57100/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out b/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out
index d0efdb0..dfae461 100644
--- a/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out
@@ -1,15 +1,19 @@
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
select p.p_partkey, li.l_suppkey
from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey
where li.l_linenumber = 1 and
li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR')
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
select p.p_partkey, li.l_suppkey
from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey
where li.l_linenumber = 1 and
li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR')
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -32,6 +36,10 @@ STAGE PLANS:
predicate: l_partkey is not null (type: boolean)
Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
+ Group By Vectorization:
+ vectorOutput: false
+ native: false
+ projectedOutputColumns: null
keys: l_partkey (type: int)
mode: hash
outputColumnNames: _col0
@@ -43,6 +51,10 @@ STAGE PLANS:
Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: no inputs
+ Map Vectorization:
+ enabled: false
+ enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
Map 3
Map Operator Tree:
TableScan
@@ -63,6 +75,10 @@ STAGE PLANS:
value expressions: _col0 (type: int), _col2 (type: int)
Execution mode: llap
LLAP IO: no inputs
+ Map Vectorization:
+ enabled: false
+ enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
Map 4
Map Operator Tree:
TableScan
@@ -76,6 +92,10 @@ STAGE PLANS:
outputColumnNames: l_orderkey
Statistics: Num rows: 14 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
+ Group By Vectorization:
+ vectorOutput: false
+ native: false
+ projectedOutputColumns: null
keys: l_orderkey (type: int)
mode: hash
outputColumnNames: _col0
@@ -87,10 +107,27 @@ STAGE PLANS:
Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: no inputs
+ Map Vectorization:
+ enabled: false
+ enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
Reducer 2
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ vectorOutput: true
+ keyExpressions: col 0
+ native: false
+ projectedOutputColumns: []
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0
@@ -101,6 +138,10 @@ STAGE PLANS:
keys:
0 _col0 (type: int)
1 _col1 (type: int)
+ Map Join Vectorization:
+ className: VectorMapJoinInnerLongOperator
+ native: true
+ nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true
outputColumnNames: _col0, _col1, _col3
input vertices:
1 Map 3
@@ -111,6 +152,10 @@ STAGE PLANS:
keys:
0 _col1 (type: int)
1 _col0 (type: int)
+ Map Join Vectorization:
+ className: VectorMapJoinInnerBigOnlyLongOperator
+ native: true
+ nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true
outputColumnNames: _col0, _col3
input vertices:
1 Reducer 5
@@ -118,9 +163,16 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: int), _col3 (type: int)
outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 2]
Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -128,8 +180,21 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Reducer 5
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ vectorOutput: true
+ keyExpressions: col 0
+ native: false
+ projectedOutputColumns: []
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0
@@ -138,6 +203,10 @@ STAGE PLANS:
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
Stage: Stage-0
@@ -170,18 +239,22 @@ POSTHOOK: Input: default@lineitem
61336 8855
64128 9141
82704 7721
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
select p.p_partkey, li.l_suppkey
from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey
where li.l_linenumber = 1 and
li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber)
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
select p.p_partkey, li.l_suppkey
from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey
where li.l_linenumber = 1 and
li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber)
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -207,6 +280,10 @@ STAGE PLANS:
predicate: l_partkey is not null (type: boolean)
Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
+ Group By Vectorization:
+ vectorOutput: false
+ native: false
+ projectedOutputColumns: null
keys: l_partkey (type: int)
mode: hash
outputColumnNames: _col0
@@ -218,6 +295,10 @@ STAGE PLANS:
Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: no inputs
+ Map Vectorization:
+ enabled: false
+ enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
Map 3
Map Operator Tree:
TableScan
@@ -238,6 +319,10 @@ STAGE PLANS:
value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int)
Execution mode: llap
LLAP IO: no inputs
+ Map Vectorization:
+ enabled: false
+ enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
Map 4
Map Operator Tree:
TableScan
@@ -258,6 +343,10 @@ STAGE PLANS:
value expressions: _col0 (type: int)
Execution mode: llap
LLAP IO: no inputs
+ Map Vectorization:
+ enabled: false
+ enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
Map 5
Map Operator Tree:
TableScan
@@ -267,6 +356,10 @@ STAGE PLANS:
predicate: l_partkey is not null (type: boolean)
Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
+ Group By Vectorization:
+ vectorOutput: false
+ native: false
+ projectedOutputColumns: null
keys: l_partkey (type: int)
mode: hash
outputColumnNames: _col0
@@ -278,6 +371,10 @@ STAGE PLANS:
Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: no inputs
+ Map Vectorization:
+ enabled: false
+ enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
Map 7
Map Operator Tree:
TableScan
@@ -301,6 +398,10 @@ STAGE PLANS:
0 Reducer 6
Statistics: Num rows: 34 Data size: 136 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
+ Group By Vectorization:
+ vectorOutput: false
+ native: false
+ projectedOutputColumns: null
keys: _col2 (type: int)
mode: hash
outputColumnNames: _col0
@@ -312,10 +413,27 @@ STAGE PLANS:
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: no inputs
+ Map Vectorization:
+ enabled: false
+ enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
Reducer 2
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ vectorOutput: true
+ keyExpressions: col 0
+ native: false
+ projectedOutputColumns: []
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0
@@ -326,6 +444,10 @@ STAGE PLANS:
keys:
0 _col0 (type: int)
1 _col1 (type: int)
+ Map Join Vectorization:
+ className: VectorMapJoinInnerLongOperator
+ native: true
+ nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true
outputColumnNames: _col0, _col1, _col3, _col4
input vertices:
1 Map 3
@@ -334,12 +456,29 @@ STAGE PLANS:
key expressions: _col1 (type: int), _col4 (type: int)
sort order: ++
Map-reduce partition columns: _col1 (type: int), _col4 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkMultiKeyOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 5 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: int), _col3 (type: int)
Reducer 6
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ vectorOutput: true
+ keyExpressions: col 0
+ native: false
+ projectedOutputColumns: []
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0
@@ -348,11 +487,28 @@ STAGE PLANS:
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
Reducer 8
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ vectorOutput: true
+ keyExpressions: col 0
+ native: false
+ projectedOutputColumns: []
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0
@@ -363,11 +519,21 @@ STAGE PLANS:
keys:
0 _col1 (type: int)
1 _col0 (type: int)
+ Map Join Vectorization:
+ className: VectorMapJoinInnerLongOperator
+ native: true
+ nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true
outputColumnNames: _col0, _col3
input vertices:
0 Map 4
Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ vectorOutput: true
+ keyExpressions: col 1, col 0
+ native: false
+ projectedOutputColumns: []
keys: _col0 (type: int), _col3 (type: int)
mode: hash
outputColumnNames: _col0, _col1
@@ -376,11 +542,28 @@ STAGE PLANS:
key expressions: _col0 (type: int), _col1 (type: int)
sort order: ++
Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkMultiKeyOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
Reducer 9
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ vectorOutput: true
+ keyExpressions: col 0, col 1
+ native: false
+ projectedOutputColumns: []
keys: KEY._col0 (type: int), KEY._col1 (type: int)
mode: mergepartial
outputColumnNames: _col0, _col1
@@ -391,6 +574,10 @@ STAGE PLANS:
keys:
0 _col1 (type: int), _col4 (type: int)
1 _col0 (type: int), _col1 (type: int)
+ Map Join Vectorization:
+ className: VectorMapJoinInnerMultiKeyOperator
+ native: true
+ nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true
outputColumnNames: _col0, _col3
input vertices:
0 Reducer 2
@@ -398,9 +585,16 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: int), _col3 (type: int)
outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [2, 3]
Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/fcb57100/ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out b/ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out
index 1960c0c..f66a0c4 100644
--- a/ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out
@@ -208,7 +208,7 @@ stored as orc
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@store
-PREHOOK: query: explain select
+PREHOOK: query: explain vectorization select
s_state, count(1)
from store_sales,
store,
@@ -220,7 +220,7 @@ PREHOOK: query: explain select
order by s_state
limit 100
PREHOOK: type: QUERY
-POSTHOOK: query: explain select
+POSTHOOK: query: explain vectorization select
s_state, count(1)
from store_sales,
store,
@@ -232,6 +232,10 @@ POSTHOOK: query: explain select
order by s_state
limit 100
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -267,6 +271,12 @@ STAGE PLANS:
value expressions: _col0 (type: int)
Execution mode: vectorized, llap
LLAP IO: unknown
+ Map Vectorization:
+ enabled: true
+ groupByVectorOutput: true
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
Map 6
Map Operator Tree:
TableScan
@@ -287,6 +297,14 @@ STAGE PLANS:
value expressions: _col1 (type: string)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
Map 7
Map Operator Tree:
TableScan
@@ -306,6 +324,14 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: llap
Reduce Operator Tree:
@@ -353,6 +379,13 @@ STAGE PLANS:
value expressions: _col1 (type: bigint)
Reducer 4
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
@@ -368,6 +401,13 @@ STAGE PLANS:
value expressions: _col1 (type: bigint)
Reducer 5
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint)
http://git-wip-us.apache.org/repos/asf/hive/blob/fcb57100/ql/src/test/results/clientpositive/llap/vector_multi_insert.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_multi_insert.q.out b/ql/src/test/results/clientpositive/llap/vector_multi_insert.q.out
index 469c702..d537297 100644
--- a/ql/src/test/results/clientpositive/llap/vector_multi_insert.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_multi_insert.q.out
@@ -65,16 +65,20 @@ POSTHOOK: query: analyze table orc1 compute statistics
POSTHOOK: type: QUERY
POSTHOOK: Input: default@orc1
POSTHOOK: Output: default@orc1
-PREHOOK: query: explain from orc1 a
+PREHOOK: query: explain vectorization from orc1 a
insert overwrite table orc_rn1 select a.* where a.rn < 100
insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000
insert overwrite table orc_rn3 select a.* where a.rn >= 1000
PREHOOK: type: QUERY
-POSTHOOK: query: explain from orc1 a
+POSTHOOK: query: explain vectorization from orc1 a
insert overwrite table orc_rn1 select a.* where a.rn < 100
insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000
insert overwrite table orc_rn3 select a.* where a.rn >= 1000
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-3 is a root stage
Stage-4 depends on stages: Stage-3
@@ -142,6 +146,14 @@ STAGE PLANS:
name: default.orc_rn3
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Stage: Stage-4
Dependency Collection
http://git-wip-us.apache.org/repos/asf/hive/blob/fcb57100/ql/src/test/results/clientpositive/llap/vector_non_constant_in_expr.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_non_constant_in_expr.q.out b/ql/src/test/results/clientpositive/llap/vector_non_constant_in_expr.q.out
new file mode 100644
index 0000000..6edc474
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vector_non_constant_in_expr.q.out
@@ -0,0 +1,51 @@
+PREHOOK: query: explain vectorization SELECT * FROM alltypesorc WHERE cint in (ctinyint, cbigint)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization SELECT * FROM alltypesorc WHERE cint in (ctinyint, cbigint)
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (cint) IN (ctinyint, cbigint) (type: boolean)
+ Statistics: Num rows: 6144 Data size: 1546640 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
+ Statistics: Num rows: 6144 Data size: 1546640 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 6144 Data size: 1546640 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ notVectorizedReason: Predicate expression for FILTER operator: Cannot vectorize IN() - casting a column is not supported. Column type is int but the common type is bigint
+ vectorized: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
http://git-wip-us.apache.org/repos/asf/hive/blob/fcb57100/ql/src/test/results/clientpositive/llap/vector_null_projection.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_null_projection.q.out b/ql/src/test/results/clientpositive/llap/vector_null_projection.q.out
index a4a36e0..84266a2 100644
--- a/ql/src/test/results/clientpositive/llap/vector_null_projection.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_null_projection.q.out
@@ -28,12 +28,16 @@ POSTHOOK: query: insert into table b values('aaa')
POSTHOOK: type: QUERY
POSTHOOK: Output: default@b
POSTHOOK: Lineage: b.s SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
select NULL from a
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
select NULL from a
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -61,6 +65,12 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ notVectorizedReason: Select expression for SELECT operator: Data type void of Const void null not supported
+ vectorized: false
Stage: Stage-0
Fetch Operator
@@ -77,12 +87,16 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@a
#### A masked pattern was here ####
NULL
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
select NULL as x from a union distinct select NULL as x from b
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
select NULL as x from a union distinct select NULL as x from b
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -118,6 +132,12 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ notVectorizedReason: Key expression for GROUPBY operator: Data type void of Const void null not supported
+ vectorized: false
Map 4
Map Operator Tree:
TableScan
@@ -139,8 +159,19 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ notVectorizedReason: Key expression for GROUPBY operator: Data type void of Const void null not supported
+ vectorized: false
Reducer 3
Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: Key expression for GROUPBY operator: Data type void of Column[KEY._col0] not supported
+ vectorized: false
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: void)