You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2016/10/13 10:50:40 UTC
[21/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN
display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)
http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out b/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out
index a075662..1fde0a9 100644
--- a/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out
@@ -4,7 +4,7 @@ PREHOOK: query: -- SORT_QUERY_RESULTS
-- Query copied from subquery_in.q
-- non agg, non corr, with join in Parent Query
-explain
+explain vectorization expression
select p.p_partkey, li.l_suppkey
from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey
where li.l_linenumber = 1 and
@@ -16,12 +16,16 @@ POSTHOOK: query: -- SORT_QUERY_RESULTS
-- Query copied from subquery_in.q
-- non agg, non corr, with join in Parent Query
-explain
+explain vectorization expression
select p.p_partkey, li.l_suppkey
from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey
where li.l_linenumber = 1 and
li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR')
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -65,6 +69,10 @@ STAGE PLANS:
value expressions: _col2 (type: int)
Execution mode: llap
LLAP IO: no inputs
+ Map Vectorization:
+ enabled: false
+ enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
Map 2
Map Operator Tree:
TableScan
@@ -89,6 +97,10 @@ STAGE PLANS:
Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: no inputs
+ Map Vectorization:
+ enabled: false
+ enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
Map 3
Map Operator Tree:
TableScan
@@ -109,10 +121,27 @@ STAGE PLANS:
Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: no inputs
+ Map Vectorization:
+ enabled: false
+ enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
Reducer 4
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ vectorOutput: true
+ keyExpressions: col 0
+ native: false
+ projectedOutputColumns: []
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0
@@ -123,6 +152,10 @@ STAGE PLANS:
keys:
0 _col1 (type: int)
1 _col0 (type: int)
+ Map Join Vectorization:
+ className: VectorMapJoinInnerLongOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true
outputColumnNames: _col2, _col4
input vertices:
0 Map 1
@@ -130,9 +163,16 @@ STAGE PLANS:
Select Operator
expressions: _col4 (type: int), _col2 (type: int)
outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1]
Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -170,19 +210,23 @@ POSTHOOK: Input: default@lineitem
64128 9141
82704 7721
PREHOOK: query: -- non agg, corr, with join in Parent Query
-explain
+explain vectorization expression
select p.p_partkey, li.l_suppkey
from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey
where li.l_linenumber = 1 and
li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber)
PREHOOK: type: QUERY
POSTHOOK: query: -- non agg, corr, with join in Parent Query
-explain
+explain vectorization expression
select p.p_partkey, li.l_suppkey
from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey
where li.l_linenumber = 1 and
li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber)
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -226,6 +270,10 @@ STAGE PLANS:
value expressions: _col2 (type: int)
Execution mode: llap
LLAP IO: no inputs
+ Map Vectorization:
+ enabled: false
+ enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
Map 2
Map Operator Tree:
TableScan
@@ -250,6 +298,10 @@ STAGE PLANS:
Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: no inputs
+ Map Vectorization:
+ enabled: false
+ enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
Map 3
Map Operator Tree:
TableScan
@@ -270,10 +322,27 @@ STAGE PLANS:
Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: no inputs
+ Map Vectorization:
+ enabled: false
+ enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
Reducer 4
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ vectorOutput: true
+ keyExpressions: col 0
+ native: false
+ projectedOutputColumns: []
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0
@@ -284,6 +353,10 @@ STAGE PLANS:
keys:
0 _col1 (type: int)
1 _col0 (type: int)
+ Map Join Vectorization:
+ className: VectorMapJoinInnerLongOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true
outputColumnNames: _col2, _col4
input vertices:
0 Map 1
@@ -291,9 +364,16 @@ STAGE PLANS:
Select Operator
expressions: _col4 (type: int), _col2 (type: int)
outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1]
Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out b/ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out
index b6a3b9a..d7ebd2b 100644
--- a/ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out
@@ -211,7 +211,7 @@ POSTHOOK: Output: default@store
PREHOOK: query: -- For MR, we are verifying this query DOES NOT vectorize the Map vertex with
-- the 2 TableScanOperators that have different schema.
-explain select
+explain vectorization select
s_state, count(1)
from store_sales,
store,
@@ -226,7 +226,7 @@ PREHOOK: type: QUERY
POSTHOOK: query: -- For MR, we are verifying this query DOES NOT vectorize the Map vertex with
-- the 2 TableScanOperators that have different schema.
-explain select
+explain vectorization select
s_state, count(1)
from store_sales,
store,
@@ -238,6 +238,10 @@ explain select
order by s_state
limit 100
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -273,6 +277,12 @@ STAGE PLANS:
value expressions: _col0 (type: int)
Execution mode: vectorized, llap
LLAP IO: unknown
+ Map Vectorization:
+ enabled: true
+ groupByVectorOutput: true
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
Map 6
Map Operator Tree:
TableScan
@@ -293,6 +303,14 @@ STAGE PLANS:
value expressions: _col1 (type: string)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
Map 7
Map Operator Tree:
TableScan
@@ -312,6 +330,14 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: llap
Reduce Operator Tree:
@@ -359,6 +385,13 @@ STAGE PLANS:
value expressions: _col1 (type: bigint)
Reducer 4
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
@@ -374,6 +407,13 @@ STAGE PLANS:
value expressions: _col1 (type: bigint)
Reducer 5
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint)
http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_multi_insert.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_multi_insert.q.out b/ql/src/test/results/clientpositive/llap/vector_multi_insert.q.out
index 469c702..d537297 100644
--- a/ql/src/test/results/clientpositive/llap/vector_multi_insert.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_multi_insert.q.out
@@ -65,16 +65,20 @@ POSTHOOK: query: analyze table orc1 compute statistics
POSTHOOK: type: QUERY
POSTHOOK: Input: default@orc1
POSTHOOK: Output: default@orc1
-PREHOOK: query: explain from orc1 a
+PREHOOK: query: explain vectorization from orc1 a
insert overwrite table orc_rn1 select a.* where a.rn < 100
insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000
insert overwrite table orc_rn3 select a.* where a.rn >= 1000
PREHOOK: type: QUERY
-POSTHOOK: query: explain from orc1 a
+POSTHOOK: query: explain vectorization from orc1 a
insert overwrite table orc_rn1 select a.* where a.rn < 100
insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000
insert overwrite table orc_rn3 select a.* where a.rn >= 1000
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-3 is a root stage
Stage-4 depends on stages: Stage-3
@@ -142,6 +146,14 @@ STAGE PLANS:
name: default.orc_rn3
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Stage: Stage-4
Dependency Collection
http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_null_projection.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_null_projection.q.out b/ql/src/test/results/clientpositive/llap/vector_null_projection.q.out
index 4bfe41a..45520e2 100644
--- a/ql/src/test/results/clientpositive/llap/vector_null_projection.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_null_projection.q.out
@@ -37,13 +37,17 @@ POSTHOOK: Input: default@values__tmp__table__2
POSTHOOK: Output: default@b
POSTHOOK: Lineage: b.s SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
PREHOOK: query: -- We expect no vectorization due to NULL (void) projection type.
-explain
+explain vectorization expression
select NULL from a
PREHOOK: type: QUERY
POSTHOOK: query: -- We expect no vectorization due to NULL (void) projection type.
-explain
+explain vectorization expression
select NULL from a
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -71,6 +75,12 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ notVectorizedReason: Select expression for SELECT operator: Data type void of Const void null not supported
+ vectorized: false
Stage: Stage-0
Fetch Operator
@@ -87,12 +97,16 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@a
#### A masked pattern was here ####
NULL
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
select NULL as x from a union distinct select NULL as x from b
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
select NULL as x from a union distinct select NULL as x from b
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -128,6 +142,12 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ notVectorizedReason: Key expression for GROUPBY operator: Data type void of Const void null not supported
+ vectorized: false
Map 4
Map Operator Tree:
TableScan
@@ -149,8 +169,19 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ notVectorizedReason: Key expression for GROUPBY operator: Data type void of Const void null not supported
+ vectorized: false
Reducer 3
Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: Key expression for GROUPBY operator: Data type void of Column[KEY._col0] not supported
+ vectorized: false
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: void)