You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by we...@apache.org on 2016/10/19 00:35:38 UTC
[21/62] [partial] hive git commit: Revert "Revert "Revert
"HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline,
reviewed by Gopal Vijayaraghavan)"""
http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out b/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out
index 1fde0a9..a075662 100644
--- a/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out
@@ -4,7 +4,7 @@ PREHOOK: query: -- SORT_QUERY_RESULTS
-- Query copied from subquery_in.q
-- non agg, non corr, with join in Parent Query
-explain vectorization expression
+explain
select p.p_partkey, li.l_suppkey
from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey
where li.l_linenumber = 1 and
@@ -16,16 +16,12 @@ POSTHOOK: query: -- SORT_QUERY_RESULTS
-- Query copied from subquery_in.q
-- non agg, non corr, with join in Parent Query
-explain vectorization expression
+explain
select p.p_partkey, li.l_suppkey
from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey
where li.l_linenumber = 1 and
li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR')
POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -69,10 +65,6 @@ STAGE PLANS:
value expressions: _col2 (type: int)
Execution mode: llap
LLAP IO: no inputs
- Map Vectorization:
- enabled: false
- enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false
- inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
Map 2
Map Operator Tree:
TableScan
@@ -97,10 +89,6 @@ STAGE PLANS:
Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: no inputs
- Map Vectorization:
- enabled: false
- enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false
- inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
Map 3
Map Operator Tree:
TableScan
@@ -121,27 +109,10 @@ STAGE PLANS:
Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: no inputs
- Map Vectorization:
- enabled: false
- enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false
- inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
Reducer 4
Execution mode: vectorized, llap
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- groupByVectorOutput: true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reduce Operator Tree:
Group By Operator
- Group By Vectorization:
- className: VectorGroupByOperator
- vectorOutput: true
- keyExpressions: col 0
- native: false
- projectedOutputColumns: []
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0
@@ -152,10 +123,6 @@ STAGE PLANS:
keys:
0 _col1 (type: int)
1 _col0 (type: int)
- Map Join Vectorization:
- className: VectorMapJoinInnerLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true
outputColumnNames: _col2, _col4
input vertices:
0 Map 1
@@ -163,16 +130,9 @@ STAGE PLANS:
Select Operator
expressions: _col4 (type: int), _col2 (type: int)
outputColumnNames: _col0, _col1
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumns: [0, 1]
Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -210,23 +170,19 @@ POSTHOOK: Input: default@lineitem
64128 9141
82704 7721
PREHOOK: query: -- non agg, corr, with join in Parent Query
-explain vectorization expression
+explain
select p.p_partkey, li.l_suppkey
from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey
where li.l_linenumber = 1 and
li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber)
PREHOOK: type: QUERY
POSTHOOK: query: -- non agg, corr, with join in Parent Query
-explain vectorization expression
+explain
select p.p_partkey, li.l_suppkey
from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey
where li.l_linenumber = 1 and
li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber)
POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -270,10 +226,6 @@ STAGE PLANS:
value expressions: _col2 (type: int)
Execution mode: llap
LLAP IO: no inputs
- Map Vectorization:
- enabled: false
- enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false
- inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
Map 2
Map Operator Tree:
TableScan
@@ -298,10 +250,6 @@ STAGE PLANS:
Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: no inputs
- Map Vectorization:
- enabled: false
- enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false
- inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
Map 3
Map Operator Tree:
TableScan
@@ -322,27 +270,10 @@ STAGE PLANS:
Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: no inputs
- Map Vectorization:
- enabled: false
- enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false
- inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
Reducer 4
Execution mode: vectorized, llap
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- groupByVectorOutput: true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reduce Operator Tree:
Group By Operator
- Group By Vectorization:
- className: VectorGroupByOperator
- vectorOutput: true
- keyExpressions: col 0
- native: false
- projectedOutputColumns: []
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0
@@ -353,10 +284,6 @@ STAGE PLANS:
keys:
0 _col1 (type: int)
1 _col0 (type: int)
- Map Join Vectorization:
- className: VectorMapJoinInnerLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true
outputColumnNames: _col2, _col4
input vertices:
0 Map 1
@@ -364,16 +291,9 @@ STAGE PLANS:
Select Operator
expressions: _col4 (type: int), _col2 (type: int)
outputColumnNames: _col0, _col1
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumns: [0, 1]
Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out b/ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out
index d7ebd2b..b6a3b9a 100644
--- a/ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out
@@ -211,7 +211,7 @@ POSTHOOK: Output: default@store
PREHOOK: query: -- For MR, we are verifying this query DOES NOT vectorize the Map vertex with
-- the 2 TableScanOperators that have different schema.
-explain vectorization select
+explain select
s_state, count(1)
from store_sales,
store,
@@ -226,7 +226,7 @@ PREHOOK: type: QUERY
POSTHOOK: query: -- For MR, we are verifying this query DOES NOT vectorize the Map vertex with
-- the 2 TableScanOperators that have different schema.
-explain vectorization select
+explain select
s_state, count(1)
from store_sales,
store,
@@ -238,10 +238,6 @@ explain vectorization select
order by s_state
limit 100
POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -277,12 +273,6 @@ STAGE PLANS:
value expressions: _col0 (type: int)
Execution mode: vectorized, llap
LLAP IO: unknown
- Map Vectorization:
- enabled: true
- groupByVectorOutput: true
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Map 6
Map Operator Tree:
TableScan
@@ -303,14 +293,6 @@ STAGE PLANS:
value expressions: _col1 (type: string)
Execution mode: vectorized, llap
LLAP IO: all inputs
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- groupByVectorOutput: true
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Map 7
Map Operator Tree:
TableScan
@@ -330,14 +312,6 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Execution mode: vectorized, llap
LLAP IO: all inputs
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- groupByVectorOutput: true
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Reducer 2
Execution mode: llap
Reduce Operator Tree:
@@ -385,13 +359,6 @@ STAGE PLANS:
value expressions: _col1 (type: bigint)
Reducer 4
Execution mode: vectorized, llap
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- groupByVectorOutput: true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
@@ -407,13 +374,6 @@ STAGE PLANS:
value expressions: _col1 (type: bigint)
Reducer 5
Execution mode: vectorized, llap
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- groupByVectorOutput: true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint)
http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/test/results/clientpositive/llap/vector_multi_insert.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_multi_insert.q.out b/ql/src/test/results/clientpositive/llap/vector_multi_insert.q.out
index d537297..469c702 100644
--- a/ql/src/test/results/clientpositive/llap/vector_multi_insert.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_multi_insert.q.out
@@ -65,20 +65,16 @@ POSTHOOK: query: analyze table orc1 compute statistics
POSTHOOK: type: QUERY
POSTHOOK: Input: default@orc1
POSTHOOK: Output: default@orc1
-PREHOOK: query: explain vectorization from orc1 a
+PREHOOK: query: explain from orc1 a
insert overwrite table orc_rn1 select a.* where a.rn < 100
insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000
insert overwrite table orc_rn3 select a.* where a.rn >= 1000
PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization from orc1 a
+POSTHOOK: query: explain from orc1 a
insert overwrite table orc_rn1 select a.* where a.rn < 100
insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000
insert overwrite table orc_rn3 select a.* where a.rn >= 1000
POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
STAGE DEPENDENCIES:
Stage-3 is a root stage
Stage-4 depends on stages: Stage-3
@@ -146,14 +142,6 @@ STAGE PLANS:
name: default.orc_rn3
Execution mode: vectorized, llap
LLAP IO: all inputs
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- groupByVectorOutput: true
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Stage: Stage-4
Dependency Collection
http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/test/results/clientpositive/llap/vector_null_projection.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_null_projection.q.out b/ql/src/test/results/clientpositive/llap/vector_null_projection.q.out
index 45520e2..4bfe41a 100644
--- a/ql/src/test/results/clientpositive/llap/vector_null_projection.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_null_projection.q.out
@@ -37,17 +37,13 @@ POSTHOOK: Input: default@values__tmp__table__2
POSTHOOK: Output: default@b
POSTHOOK: Lineage: b.s SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
PREHOOK: query: -- We expect no vectorization due to NULL (void) projection type.
-explain vectorization expression
+explain
select NULL from a
PREHOOK: type: QUERY
POSTHOOK: query: -- We expect no vectorization due to NULL (void) projection type.
-explain vectorization expression
+explain
select NULL from a
POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -75,12 +71,6 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: llap
LLAP IO: all inputs
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- notVectorizedReason: Select expression for SELECT operator: Data type void of Const void null not supported
- vectorized: false
Stage: Stage-0
Fetch Operator
@@ -97,16 +87,12 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@a
#### A masked pattern was here ####
NULL
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
select NULL as x from a union distinct select NULL as x from b
PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
select NULL as x from a union distinct select NULL as x from b
POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -142,12 +128,6 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: all inputs
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- notVectorizedReason: Key expression for GROUPBY operator: Data type void of Const void null not supported
- vectorized: false
Map 4
Map Operator Tree:
TableScan
@@ -169,19 +149,8 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: all inputs
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- notVectorizedReason: Key expression for GROUPBY operator: Data type void of Const void null not supported
- vectorized: false
Reducer 3
Execution mode: llap
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- notVectorizedReason: Key expression for GROUPBY operator: Data type void of Column[KEY._col0] not supported
- vectorized: false
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: void)