You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2016/10/13 10:50:40 UTC
[21/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out b/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out
index a075662..1fde0a9 100644
--- a/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out
@@ -4,7 +4,7 @@ PREHOOK: query: -- SORT_QUERY_RESULTS
 -- Query copied from subquery_in.q
 
 -- non agg, non corr, with join in Parent Query
-explain
+explain vectorization expression
 select p.p_partkey, li.l_suppkey 
 from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey 
 where li.l_linenumber = 1 and
@@ -16,12 +16,16 @@ POSTHOOK: query: -- SORT_QUERY_RESULTS
 -- Query copied from subquery_in.q
 
 -- non agg, non corr, with join in Parent Query
-explain
+explain vectorization expression
 select p.p_partkey, li.l_suppkey 
 from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey 
 where li.l_linenumber = 1 and
  li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR')
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -65,6 +69,10 @@ STAGE PLANS:
                           value expressions: _col2 (type: int)
             Execution mode: llap
             LLAP IO: no inputs
+            Map Vectorization:
+                enabled: false
+                enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
         Map 2 
             Map Operator Tree:
                 TableScan
@@ -89,6 +97,10 @@ STAGE PLANS:
                           Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: llap
             LLAP IO: no inputs
+            Map Vectorization:
+                enabled: false
+                enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
         Map 3 
             Map Operator Tree:
                 TableScan
@@ -109,10 +121,27 @@ STAGE PLANS:
                         Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: llap
             LLAP IO: no inputs
+            Map Vectorization:
+                enabled: false
+                enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
         Reducer 4 
             Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Reduce Operator Tree:
               Group By Operator
+                Group By Vectorization:
+                    className: VectorGroupByOperator
+                    vectorOutput: true
+                    keyExpressions: col 0
+                    native: false
+                    projectedOutputColumns: []
                 keys: KEY._col0 (type: int)
                 mode: mergepartial
                 outputColumnNames: _col0
@@ -123,6 +152,10 @@ STAGE PLANS:
                   keys:
                     0 _col1 (type: int)
                     1 _col0 (type: int)
+                  Map Join Vectorization:
+                      className: VectorMapJoinInnerLongOperator
+                      native: true
+                      nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true
                   outputColumnNames: _col2, _col4
                   input vertices:
                     0 Map 1
@@ -130,9 +163,16 @@ STAGE PLANS:
                   Select Operator
                     expressions: _col4 (type: int), _col2 (type: int)
                     outputColumnNames: _col0, _col1
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumns: [0, 1]
                     Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
                     File Output Operator
                       compressed: false
+                      File Sink Vectorization:
+                          className: VectorFileSinkOperator
+                          native: false
                       Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
                       table:
                           input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -170,19 +210,23 @@ POSTHOOK: Input: default@lineitem
 64128	9141
 82704	7721
 PREHOOK: query: -- non agg, corr, with join in Parent Query
-explain
+explain vectorization expression
 select p.p_partkey, li.l_suppkey 
 from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey 
 where li.l_linenumber = 1 and
  li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber)
 PREHOOK: type: QUERY
 POSTHOOK: query: -- non agg, corr, with join in Parent Query
-explain
+explain vectorization expression
 select p.p_partkey, li.l_suppkey 
 from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey 
 where li.l_linenumber = 1 and
  li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber)
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -226,6 +270,10 @@ STAGE PLANS:
                           value expressions: _col2 (type: int)
             Execution mode: llap
             LLAP IO: no inputs
+            Map Vectorization:
+                enabled: false
+                enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
         Map 2 
             Map Operator Tree:
                 TableScan
@@ -250,6 +298,10 @@ STAGE PLANS:
                           Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: llap
             LLAP IO: no inputs
+            Map Vectorization:
+                enabled: false
+                enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
         Map 3 
             Map Operator Tree:
                 TableScan
@@ -270,10 +322,27 @@ STAGE PLANS:
                         Statistics: Num rows: 50 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: llap
             LLAP IO: no inputs
+            Map Vectorization:
+                enabled: false
+                enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
         Reducer 4 
             Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Reduce Operator Tree:
               Group By Operator
+                Group By Vectorization:
+                    className: VectorGroupByOperator
+                    vectorOutput: true
+                    keyExpressions: col 0
+                    native: false
+                    projectedOutputColumns: []
                 keys: KEY._col0 (type: int)
                 mode: mergepartial
                 outputColumnNames: _col0
@@ -284,6 +353,10 @@ STAGE PLANS:
                   keys:
                     0 _col1 (type: int)
                     1 _col0 (type: int)
+                  Map Join Vectorization:
+                      className: VectorMapJoinInnerLongOperator
+                      native: true
+                      nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Supports Key Types IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true
                   outputColumnNames: _col2, _col4
                   input vertices:
                     0 Map 1
@@ -291,9 +364,16 @@ STAGE PLANS:
                   Select Operator
                     expressions: _col4 (type: int), _col2 (type: int)
                     outputColumnNames: _col0, _col1
+                    Select Vectorization:
+                        className: VectorSelectOperator
+                        native: true
+                        projectedOutputColumns: [0, 1]
                     Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
                     File Output Operator
                       compressed: false
+                      File Sink Vectorization:
+                          className: VectorFileSinkOperator
+                          native: false
                       Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
                       table:
                           input format: org.apache.hadoop.mapred.SequenceFileInputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out b/ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out
index b6a3b9a..d7ebd2b 100644
--- a/ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out
@@ -211,7 +211,7 @@ POSTHOOK: Output: default@store
 PREHOOK: query: -- For MR, we are verifying this query DOES NOT vectorize the Map vertex with
 -- the 2 TableScanOperators that have different schema.
 
-explain select
+explain vectorization select
         s_state, count(1)
  from store_sales,
  store,
@@ -226,7 +226,7 @@ PREHOOK: type: QUERY
 POSTHOOK: query: -- For MR, we are verifying this query DOES NOT vectorize the Map vertex with
 -- the 2 TableScanOperators that have different schema.
 
-explain select
+explain vectorization select
         s_state, count(1)
  from store_sales,
  store,
@@ -238,6 +238,10 @@ explain select
  order by s_state
  limit 100
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -273,6 +277,12 @@ STAGE PLANS:
                         value expressions: _col0 (type: int)
             Execution mode: vectorized, llap
             LLAP IO: unknown
+            Map Vectorization:
+                enabled: true
+                groupByVectorOutput: true
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Map 6 
             Map Operator Tree:
                 TableScan
@@ -293,6 +303,14 @@ STAGE PLANS:
                         value expressions: _col1 (type: string)
             Execution mode: vectorized, llap
             LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Map 7 
             Map Operator Tree:
                 TableScan
@@ -312,6 +330,14 @@ STAGE PLANS:
                         Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
             Execution mode: vectorized, llap
             LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: true
+                usesVectorUDFAdaptor: false
+                vectorized: true
         Reducer 2 
             Execution mode: llap
             Reduce Operator Tree:
@@ -359,6 +385,13 @@ STAGE PLANS:
                       value expressions: _col1 (type: bigint)
         Reducer 4 
             Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Reduce Operator Tree:
               Group By Operator
                 aggregations: count(VALUE._col0)
@@ -374,6 +407,13 @@ STAGE PLANS:
                   value expressions: _col1 (type: bigint)
         Reducer 5 
             Execution mode: vectorized, llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                groupByVectorOutput: true
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
             Reduce Operator Tree:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint)

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_multi_insert.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_multi_insert.q.out b/ql/src/test/results/clientpositive/llap/vector_multi_insert.q.out
index 469c702..d537297 100644
--- a/ql/src/test/results/clientpositive/llap/vector_multi_insert.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_multi_insert.q.out
@@ -65,16 +65,20 @@ POSTHOOK: query: analyze table orc1 compute statistics
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@orc1
 POSTHOOK: Output: default@orc1
-PREHOOK: query: explain from orc1 a
+PREHOOK: query: explain vectorization from orc1 a
 insert overwrite table orc_rn1 select a.* where a.rn < 100
 insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000
 insert overwrite table orc_rn3 select a.* where a.rn >= 1000
 PREHOOK: type: QUERY
-POSTHOOK: query: explain from orc1 a
+POSTHOOK: query: explain vectorization from orc1 a
 insert overwrite table orc_rn1 select a.* where a.rn < 100
 insert overwrite table orc_rn2 select a.* where a.rn >= 100 and a.rn < 1000
 insert overwrite table orc_rn3 select a.* where a.rn >= 1000
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-3 is a root stage
   Stage-4 depends on stages: Stage-3
@@ -142,6 +146,14 @@ STAGE PLANS:
                             name: default.orc_rn3
             Execution mode: vectorized, llap
             LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+                groupByVectorOutput: true
+                inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
 
   Stage: Stage-4
     Dependency Collection

http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vector_null_projection.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_null_projection.q.out b/ql/src/test/results/clientpositive/llap/vector_null_projection.q.out
index 4bfe41a..45520e2 100644
--- a/ql/src/test/results/clientpositive/llap/vector_null_projection.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_null_projection.q.out
@@ -37,13 +37,17 @@ POSTHOOK: Input: default@values__tmp__table__2
 POSTHOOK: Output: default@b
 POSTHOOK: Lineage: b.s SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
 PREHOOK: query: -- We expect no vectorization due to NULL (void) projection type.
-explain
+explain vectorization expression
 select NULL from a
 PREHOOK: type: QUERY
 POSTHOOK: query: -- We expect no vectorization due to NULL (void) projection type.
-explain
+explain vectorization expression
 select NULL from a
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -71,6 +75,12 @@ STAGE PLANS:
                           serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
             Execution mode: llap
             LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+                inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                notVectorizedReason: Select expression for SELECT operator: Data type void of Const void null not supported
+                vectorized: false
 
   Stage: Stage-0
     Fetch Operator
@@ -87,12 +97,16 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@a
 #### A masked pattern was here ####
 NULL
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
 select NULL as x from a union distinct select NULL as x from b
 PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
 select NULL as x from a union distinct select NULL as x from b
 POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -128,6 +142,12 @@ STAGE PLANS:
                           Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: llap
             LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+                inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                notVectorizedReason: Key expression for GROUPBY operator: Data type void of Const void null not supported
+                vectorized: false
         Map 4 
             Map Operator Tree:
                 TableScan
@@ -149,8 +169,19 @@ STAGE PLANS:
                           Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: llap
             LLAP IO: all inputs
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+                inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+                notVectorizedReason: Key expression for GROUPBY operator: Data type void of Const void null not supported
+                vectorized: false
         Reducer 3 
             Execution mode: llap
+            Reduce Vectorization:
+                enabled: true
+                enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+                notVectorizedReason: Key expression for GROUPBY operator: Data type void of Column[KEY._col0] not supported
+                vectorized: false
             Reduce Operator Tree:
               Group By Operator
                 keys: KEY._col0 (type: void)