You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by we...@apache.org on 2016/10/19 00:35:28 UTC
[11/62] [partial] hive git commit: Revert "Revert "Revert
"HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline,
reviewed by Gopal Vijayaraghavan)"""
http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out b/ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out
index 14606ed..6c6c6d6 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out
@@ -94,16 +94,12 @@ POSTHOOK: Input: default@alltypesorc
POSTHOOK: Output: default@vsmb_bucket_txt
POSTHOOK: Lineage: vsmb_bucket_txt.key SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ]
POSTHOOK: Lineage: vsmb_bucket_txt.value SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ]
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key
PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
select /*+MAPJOIN(a)*/ * from vsmb_bucket_1 a join vsmb_bucket_2 b on a.key = b.key
POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -121,71 +117,33 @@ STAGE PLANS:
TableScan
alias: a
Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
- projectedOutputColumns: [0, 1]
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: SelectColumnIsNotNull(col 0) -> boolean
predicate: key is not null (type: boolean)
Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: key (type: int)
sort order: +
Map-reduce partition columns: key (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE
value expressions: value (type: string)
Execution mode: vectorized, llap
LLAP IO: all inputs
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- groupByVectorOutput: true
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Map 3
Map Operator Tree:
TableScan
alias: b
Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
- projectedOutputColumns: [0, 1]
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: SelectColumnIsNotNull(col 0) -> boolean
predicate: key is not null (type: boolean)
Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: key (type: int)
sort order: +
Map-reduce partition columns: key (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE
value expressions: value (type: string)
Execution mode: vectorized, llap
LLAP IO: all inputs
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- groupByVectorOutput: true
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Reducer 2
Execution mode: llap
Reduce Operator Tree:
@@ -229,16 +187,12 @@ POSTHOOK: Input: default@vsmb_bucket_2
528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p
528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p
528534767 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key
PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_RC b on a.key = b.key
POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -256,36 +210,17 @@ STAGE PLANS:
TableScan
alias: a
Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
- projectedOutputColumns: [0, 1]
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: SelectColumnIsNotNull(col 0) -> boolean
predicate: key is not null (type: boolean)
Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: key (type: int)
sort order: +
Map-reduce partition columns: key (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE
value expressions: value (type: string)
Execution mode: vectorized, llap
LLAP IO: all inputs
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- groupByVectorOutput: true
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Map 3
Map Operator Tree:
TableScan
@@ -302,10 +237,6 @@ STAGE PLANS:
value expressions: value (type: string)
Execution mode: llap
LLAP IO: no inputs
- Map Vectorization:
- enabled: false
- enabledConditionsNotMet: hive.vectorized.use.row.serde.deserialize IS false
- inputFileFormats: org.apache.hadoop.hive.ql.io.RCFileInputFormat
Reducer 2
Execution mode: llap
Reduce Operator Tree:
@@ -354,7 +285,7 @@ PREHOOK: query: -- RC file does not yet provide the vectorized CommonRCFileforma
-- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key;
-- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key;
-explain vectorization expression
+explain
select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key
PREHOOK: type: QUERY
POSTHOOK: query: -- RC file does not yet provide the vectorized CommonRCFileformat out-of-the-box
@@ -362,13 +293,9 @@ POSTHOOK: query: -- RC file does not yet provide the vectorized CommonRCFileform
-- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key;
-- select /*+MAPJOIN(b)*/ * from vsmb_bucket_RC a join vsmb_bucket_2 b on a.key = b.key;
-explain vectorization expression
+explain
select /*+MAPJOIN(b)*/ * from vsmb_bucket_1 a join vsmb_bucket_TXT b on a.key = b.key
POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -386,36 +313,17 @@ STAGE PLANS:
TableScan
alias: a
Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
- projectedOutputColumns: [0, 1]
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: SelectColumnIsNotNull(col 0) -> boolean
predicate: key is not null (type: boolean)
Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: key (type: int)
sort order: +
Map-reduce partition columns: key (type: int)
- Reduce Sink Vectorization:
- className: VectorReduceSinkLongOperator
- native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, Uniform Hash IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE
value expressions: value (type: string)
Execution mode: vectorized, llap
LLAP IO: all inputs
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- groupByVectorOutput: true
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Map 3
Map Operator Tree:
TableScan
@@ -432,10 +340,6 @@ STAGE PLANS:
value expressions: value (type: string)
Execution mode: llap
LLAP IO: no inputs
- Map Vectorization:
- enabled: false
- enabledConditionsNotMet: hive.vectorized.use.vector.serde.deserialize IS false
- inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
Reducer 2
Execution mode: llap
Reduce Operator Tree:
http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/test/results/clientpositive/llap/vectorized_case.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_case.q.out b/ql/src/test/results/clientpositive/llap/vectorized_case.q.out
index 127c2c3..6e13369 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_case.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_case.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
select
csmallint,
case
@@ -16,7 +16,7 @@ where csmallint = 418
or csmallint = 12205
or csmallint = 10583
PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
select
csmallint,
case
@@ -34,10 +34,6 @@ where csmallint = 418
or csmallint = 12205
or csmallint = 10583
POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -52,30 +48,15 @@ STAGE PLANS:
TableScan
alias: alltypesorc
Statistics: Num rows: 12288 Data size: 36700 Basic stats: COMPLETE Column stats: COMPLETE
- TableScan Vectorization:
- native: true
- projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 1, val 418) -> boolean, FilterLongColEqualLongScalar(col 1, val 12205) -> boolean, FilterLongColEqualLongScalar(col 1, val 10583) -> boolean) -> boolean
predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean)
Statistics: Num rows: 6 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string)
outputColumnNames: _col0, _col1, _col2
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumns: [1, 14, 15]
- selectExpressions: VectorUDFAdaptor(CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END)(children: LongColEqualLongScalar(col 1, val 418) -> 12:long, LongColEqualLongScalar(col 1, val 12205) -> 13:long) -> 14:String, VectorUDFAdaptor(CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END)(children: LongColEqualLongScalar(col 1, val 418) -> 12:long, LongColEqualLongScalar(col 1, val 12205) -> 13:long) -> 15:String
Statistics: Num rows: 6 Data size: 2228 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 6 Data size: 2228 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -83,14 +64,6 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized, llap
LLAP IO: all inputs
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- groupByVectorOutput: true
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: false
- usesVectorUDFAdaptor: true
- vectorized: true
Stage: Stage-0
Fetch Operator
@@ -140,7 +113,7 @@ POSTHOOK: Input: default@alltypesorc
10583 c c
418 a a
12205 b b
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
select
csmallint,
case
@@ -158,7 +131,7 @@ where csmallint = 418
or csmallint = 12205
or csmallint = 10583
PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
select
csmallint,
case
@@ -176,10 +149,6 @@ where csmallint = 418
or csmallint = 12205
or csmallint = 10583
POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -194,30 +163,15 @@ STAGE PLANS:
TableScan
alias: alltypesorc
Statistics: Num rows: 12288 Data size: 36700 Basic stats: COMPLETE Column stats: COMPLETE
- TableScan Vectorization:
- native: true
- projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
Filter Operator
- Filter Vectorization:
- className: VectorFilterOperator
- native: true
- predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 1, val 418) -> boolean, FilterLongColEqualLongScalar(col 1, val 12205) -> boolean, FilterLongColEqualLongScalar(col 1, val 10583) -> boolean) -> boolean
predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean)
Statistics: Num rows: 6 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE (null) END (type: string), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN (null) ELSE ('c') END (type: string)
outputColumnNames: _col0, _col1, _col2
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumns: [1, 14, 15]
- selectExpressions: VectorUDFAdaptor(CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE (null) END)(children: LongColEqualLongScalar(col 1, val 418) -> 12:long, LongColEqualLongScalar(col 1, val 12205) -> 13:long) -> 14:String, VectorUDFAdaptor(CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN (null) ELSE ('c') END)(children: LongColEqualLongScalar(col 1, val 418) -> 12:long, LongColEqualLongScalar(col 1, val 12205) -> 13:long) -> 15:String
Statistics: Num rows: 6 Data size: 2228 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 6 Data size: 2228 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -225,14 +179,6 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized, llap
LLAP IO: all inputs
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- groupByVectorOutput: true
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: false
- usesVectorUDFAdaptor: true
- vectorized: true
Stage: Stage-0
Fetch Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/test/results/clientpositive/llap/vectorized_casts.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_casts.q.out b/ql/src/test/results/clientpositive/llap/vectorized_casts.q.out
index 0fb8552..a95702d 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_casts.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_casts.q.out
@@ -3,7 +3,7 @@ PREHOOK: query: -- SORT_QUERY_RESULTS
-- Currently, vectorization is not supported in fetch task (hive.fetch.task.conversion=none)
-- Test type casting in vectorized mode to verify end-to-end functionality.
-explain vectorization
+explain
select
-- to boolean
cast (ctinyint as boolean)
@@ -82,7 +82,7 @@ POSTHOOK: query: -- SORT_QUERY_RESULTS
-- Currently, vectorization is not supported in fetch task (hive.fetch.task.conversion=none)
-- Test type casting in vectorized mode to verify end-to-end functionality.
-explain vectorization
+explain
select
-- to boolean
cast (ctinyint as boolean)
@@ -156,10 +156,6 @@ from alltypesorc
-- limit output to a reasonably small number of rows
where cbigint % 250 = 0
POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -190,14 +186,6 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized, llap
LLAP IO: all inputs
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- groupByVectorOutput: true
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: false
- usesVectorUDFAdaptor: true
- vectorized: true
Stage: Stage-0
Fetch Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/test/results/clientpositive/llap/vectorized_context.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_context.q.out b/ql/src/test/results/clientpositive/llap/vectorized_context.q.out
index 855a50f..1f70a01 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_context.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_context.q.out
@@ -82,24 +82,20 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@alltypesorc
POSTHOOK: Output: default@household_demographics
POSTHOOK: Lineage: household_demographics.hd_demo_sk SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ]
-PREHOOK: query: explain vectorization
+PREHOOK: query: explain
select store.s_city, ss_net_profit
from store_sales
JOIN store ON store_sales.ss_store_sk = store.s_store_sk
JOIN household_demographics ON store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
limit 100
PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization
+POSTHOOK: query: explain
select store.s_city, ss_net_profit
from store_sales
JOIN store ON store_sales.ss_store_sk = store.s_store_sk
JOIN household_demographics ON store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
limit 100
POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -132,14 +128,6 @@ STAGE PLANS:
value expressions: _col1 (type: int), _col2 (type: double)
Execution mode: vectorized, llap
LLAP IO: all inputs
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- groupByVectorOutput: true
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Map 2
Map Operator Tree:
TableScan
@@ -188,14 +176,6 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized, llap
LLAP IO: all inputs
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- groupByVectorOutput: true
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Map 3
Map Operator Tree:
TableScan
@@ -215,14 +195,6 @@ STAGE PLANS:
Statistics: Num rows: 6075 Data size: 24300 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized, llap
LLAP IO: all inputs
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- groupByVectorOutput: true
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: true
- usesVectorUDFAdaptor: false
- vectorized: true
Stage: Stage-0
Fetch Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out b/ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out
index e2999a5..f45e730 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out
@@ -203,7 +203,7 @@ POSTHOOK: Input: default@date_udf_flight_orc
2010-10-31 2010-10-31 07:00:00
2010-10-31 2010-10-31 07:00:00
2010-10-31 2010-10-31 07:00:00
-PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT
+PREHOOK: query: EXPLAIN SELECT
to_unix_timestamp(fl_time),
year(fl_time),
month(fl_time),
@@ -218,7 +218,7 @@ PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT
datediff(fl_time, "2000-01-01")
FROM date_udf_flight_orc
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT
+POSTHOOK: query: EXPLAIN SELECT
to_unix_timestamp(fl_time),
year(fl_time),
month(fl_time),
@@ -233,62 +233,20 @@ POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT
datediff(fl_time, "2000-01-01")
FROM date_udf_flight_orc
POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Tez
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: date_udf_flight_orc
- Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
- projectedOutputColumns: [0, 1]
- Select Operator
- expressions: to_unix_timestamp(fl_time) (type: bigint), year(fl_time) (type: int), month(fl_time) (type: int), day(fl_time) (type: int), dayofmonth(fl_time) (type: int), dayofweek(fl_time) (type: int), weekofyear(fl_time) (type: int), CAST( fl_time AS DATE) (type: date), to_date(fl_time) (type: date), date_add(fl_time, 2) (type: date), date_sub(fl_time, 2) (type: date), datediff(fl_time, '2000-01-01') (type: int)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
- selectExpressions: VectorUDFUnixTimeStampTimestamp(col 1) -> 2:long, VectorUDFYearTimestamp(col 1, field YEAR) -> 3:long, VectorUDFMonthTimestamp(col 1, field MONTH) -> 4:long, VectorUDFDayOfMonthTimestamp(col 1, field DAY_OF_MONTH) -> 5:long, VectorUDFDayOfMonthTimestamp(col 1, field DAY_OF_MONTH) -> 6:long, VectorUDFDayOfWeekTimestamp(col 1, field DAY_OF_WEEK) -> 7:long, VectorUDFWeekOfYearTimestamp(col 1, field WEEK_OF_YEAR) -> 8:long, CastTimestampToDate(col 1) -> 9:date, VectorUDFDateTimestamp(col 1) -> 10:date, VectorUDFDateAddColScalar(col 1, val 2) -> 11:date, VectorUDFDateSubColScalar(col 1, val 2) -> 12:date, VectorUDFDateDiffColScalar(col 1, val 2000-01-01) -> 13:long
- Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
- Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Execution mode: vectorized, llap
- LLAP IO: all inputs
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- groupByVectorOutput: true
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
-
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- ListSink
+ TableScan
+ alias: date_udf_flight_orc
+ Select Operator
+ expressions: to_unix_timestamp(fl_time) (type: bigint), year(fl_time) (type: int), month(fl_time) (type: int), day(fl_time) (type: int), dayofmonth(fl_time) (type: int), dayofweek(fl_time) (type: int), weekofyear(fl_time) (type: int), CAST( fl_time AS DATE) (type: date), to_date(fl_time) (type: date), date_add(fl_time, 2) (type: date), date_sub(fl_time, 2) (type: date), datediff(fl_time, '2000-01-01') (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
+ ListSink
PREHOOK: query: SELECT
to_unix_timestamp(fl_time),
@@ -461,7 +419,7 @@ POSTHOOK: Input: default@date_udf_flight_orc
1288533600 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956
1288533600 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956
1288533600 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956
-PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT
+PREHOOK: query: EXPLAIN SELECT
to_unix_timestamp(fl_date),
year(fl_date),
month(fl_date),
@@ -476,7 +434,7 @@ PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT
datediff(fl_date, "2000-01-01")
FROM date_udf_flight_orc
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT
+POSTHOOK: query: EXPLAIN SELECT
to_unix_timestamp(fl_date),
year(fl_date),
month(fl_date),
@@ -491,62 +449,20 @@ POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT
datediff(fl_date, "2000-01-01")
FROM date_udf_flight_orc
POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Tez
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: date_udf_flight_orc
- Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
- projectedOutputColumns: [0, 1]
- Select Operator
- expressions: to_unix_timestamp(fl_date) (type: bigint), year(fl_date) (type: int), month(fl_date) (type: int), day(fl_date) (type: int), dayofmonth(fl_date) (type: int), dayofweek(fl_date) (type: int), weekofyear(fl_date) (type: int), fl_date (type: date), to_date(fl_date) (type: date), date_add(fl_date, 2) (type: date), date_sub(fl_date, 2) (type: date), datediff(fl_date, '2000-01-01') (type: int)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8, 0, 9, 10, 11, 12]
- selectExpressions: VectorUDFUnixTimeStampDate(col 0) -> 2:long, VectorUDFYearDate(col 0, field YEAR) -> 3:long, VectorUDFMonthDate(col 0, field MONTH) -> 4:long, VectorUDFDayOfMonthDate(col 0, field DAY_OF_MONTH) -> 5:long, VectorUDFDayOfMonthDate(col 0, field DAY_OF_MONTH) -> 6:long, VectorUDFDayOfWeekDate(col 0, field DAY_OF_WEEK) -> 7:long, VectorUDFWeekOfYearDate(col 0, field WEEK_OF_YEAR) -> 8:long, VectorUDFDateLong(col 0) -> 9:date, VectorUDFDateAddColScalar(col 0, val 2) -> 10:date, VectorUDFDateSubColScalar(col 0, val 2) -> 11:date, VectorUDFDateDiffColScalar(col 0, val 2000-01-01) -> 12:long
- Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
- Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Execution mode: vectorized, llap
- LLAP IO: all inputs
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- groupByVectorOutput: true
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
-
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- ListSink
+ TableScan
+ alias: date_udf_flight_orc
+ Select Operator
+ expressions: to_unix_timestamp(fl_date) (type: bigint), year(fl_date) (type: int), month(fl_date) (type: int), day(fl_date) (type: int), dayofmonth(fl_date) (type: int), dayofweek(fl_date) (type: int), weekofyear(fl_date) (type: int), fl_date (type: date), to_date(fl_date) (type: date), date_add(fl_date, 2) (type: date), date_sub(fl_date, 2) (type: date), datediff(fl_date, '2000-01-01') (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
+ ListSink
PREHOOK: query: SELECT
to_unix_timestamp(fl_date),
@@ -719,7 +635,7 @@ POSTHOOK: Input: default@date_udf_flight_orc
1288508400 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956
1288508400 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956
1288508400 2010 10 31 31 1 43 2010-10-31 2010-10-31 2010-11-02 2010-10-29 3956
-PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT
+PREHOOK: query: EXPLAIN SELECT
year(fl_time) = year(fl_date),
month(fl_time) = month(fl_date),
day(fl_time) = day(fl_date),
@@ -733,7 +649,7 @@ PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT
datediff(fl_time, "2000-01-01") = datediff(fl_date, "2000-01-01")
FROM date_udf_flight_orc
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT
+POSTHOOK: query: EXPLAIN SELECT
year(fl_time) = year(fl_date),
month(fl_time) = month(fl_date),
day(fl_time) = day(fl_date),
@@ -747,62 +663,20 @@ POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT
datediff(fl_time, "2000-01-01") = datediff(fl_date, "2000-01-01")
FROM date_udf_flight_orc
POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Tez
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: date_udf_flight_orc
- Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
- projectedOutputColumns: [0, 1]
- Select Operator
- expressions: (year(fl_time) = year(fl_date)) (type: boolean), (month(fl_time) = month(fl_date)) (type: boolean), (day(fl_time) = day(fl_date)) (type: boolean), (dayofmonth(fl_time) = dayofmonth(fl_date)) (type: boolean), (dayofweek(fl_time) = dayofweek(fl_date)) (type: boolean), (weekofyear(fl_time) = weekofyear(fl_date)) (type: boolean), (CAST( fl_time AS DATE) = fl_date) (type: boolean), (to_date(fl_time) = to_date(fl_date)) (type: boolean), (date_add(fl_time, 2) = date_add(fl_date, 2)) (type: boolean), (date_sub(fl_time, 2) = date_sub(fl_date, 2)) (type: boolean), (datediff(fl_time, '2000-01-01') = datediff(fl_date, '2000-01-01')) (type: boolean)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumns: [4, 5, 6, 7, 8, 9, 2, 3, 12, 13, 16]
- selectExpressions: LongColEqualLongColumn(col 2, col 3)(children: VectorUDFYearTimestamp(col 1, field YEAR) -> 2:long, VectorUDFYearDate(col 0, field YEAR) -> 3:long) -> 4:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFMonthTimestamp(col 1, field MONTH) -> 2:long, VectorUDFMonthDate(col 0, field MONTH) -> 3:long) -> 5:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFDayOfMonthTimestamp(col 1, field DAY_OF_MONTH) -> 2:long, VectorUDFDayOfMonthDate(col 0, field DAY_OF_MONTH) -> 3:long) -> 6:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFDayOfMonthTimestamp(col 1, field DAY_OF_MONTH) -> 2:long, VectorUDFDayOfMonthDate(col 0, field DAY_OF_MONTH) -> 3:long) -> 7:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFDayOfWeekTimestamp(col 1, field DAY_OF_WEEK) -> 2:long, VectorUDFDayOfWeekDate(col 0, field DAY_OF_WEEK) -> 3:long) -> 8:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFWeekOfYearTimestamp
(col 1, field WEEK_OF_YEAR) -> 2:long, VectorUDFWeekOfYearDate(col 0, field WEEK_OF_YEAR) -> 3:long) -> 9:long, LongColEqualLongColumn(col 10, col 0)(children: CastTimestampToDate(col 1) -> 10:date) -> 2:long, LongColEqualLongColumn(col 10, col 11)(children: VectorUDFDateTimestamp(col 1) -> 10:date, VectorUDFDateLong(col 0) -> 11:date) -> 3:long, LongColEqualLongColumn(col 10, col 11)(children: VectorUDFDateAddColScalar(col 1, val 2) -> 10:date, VectorUDFDateAddColScalar(col 0, val 2) -> 11:date) -> 12:long, LongColEqualLongColumn(col 10, col 11)(children: VectorUDFDateSubColScalar(col 1, val 2) -> 10:date, VectorUDFDateSubColScalar(col 0, val 2) -> 11:date) -> 13:long, LongColEqualLongColumn(col 14, col 15)(children: VectorUDFDateDiffColScalar(col 1, val 2000-01-01) -> 14:long, VectorUDFDateDiffColScalar(col 0, val 2000-01-01) -> 15:long) -> 16:long
- Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
- Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Execution mode: vectorized, llap
- LLAP IO: all inputs
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- groupByVectorOutput: true
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
-
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- ListSink
+ TableScan
+ alias: date_udf_flight_orc
+ Select Operator
+ expressions: (year(fl_time) = year(fl_date)) (type: boolean), (month(fl_time) = month(fl_date)) (type: boolean), (day(fl_time) = day(fl_date)) (type: boolean), (dayofmonth(fl_time) = dayofmonth(fl_date)) (type: boolean), (dayofweek(fl_time) = dayofweek(fl_date)) (type: boolean), (weekofyear(fl_time) = weekofyear(fl_date)) (type: boolean), (CAST( fl_time AS DATE) = fl_date) (type: boolean), (to_date(fl_time) = to_date(fl_date)) (type: boolean), (date_add(fl_time, 2) = date_add(fl_date, 2)) (type: boolean), (date_sub(fl_time, 2) = date_sub(fl_date, 2)) (type: boolean), (datediff(fl_time, '2000-01-01') = datediff(fl_date, '2000-01-01')) (type: boolean)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
+ ListSink
PREHOOK: query: -- Should all be true or NULL
SELECT
@@ -975,7 +849,7 @@ true true true true true true true true true true true
true true true true true true true true true true true
true true true true true true true true true true true
true true true true true true true true true true true
-PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT
+PREHOOK: query: EXPLAIN SELECT
fl_date,
to_date(date_add(fl_date, 2)),
to_date(date_sub(fl_date, 2)),
@@ -984,7 +858,7 @@ PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT
datediff(date_add(fl_date, 2), date_sub(fl_date, 2))
FROM date_udf_flight_orc LIMIT 10
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT
+POSTHOOK: query: EXPLAIN SELECT
fl_date,
to_date(date_add(fl_date, 2)),
to_date(date_sub(fl_date, 2)),
@@ -993,68 +867,22 @@ POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT
datediff(date_add(fl_date, 2), date_sub(fl_date, 2))
FROM date_udf_flight_orc LIMIT 10
POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
STAGE DEPENDENCIES:
- Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-0 is a root stage
STAGE PLANS:
- Stage: Stage-1
- Tez
-#### A masked pattern was here ####
- Vertices:
- Map 1
- Map Operator Tree:
- TableScan
- alias: date_udf_flight_orc
- Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
- projectedOutputColumns: [0, 1]
- Select Operator
- expressions: fl_date (type: date), to_date(date_add(fl_date, 2)) (type: date), to_date(date_sub(fl_date, 2)) (type: date), datediff(fl_date, date_add(fl_date, 2)) (type: int), datediff(fl_date, date_sub(fl_date, 2)) (type: int), datediff(date_add(fl_date, 2), date_sub(fl_date, 2)) (type: int)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumns: [0, 3, 4, 5, 6, 8]
- selectExpressions: VectorUDFDateLong(col 2)(children: VectorUDFDateAddColScalar(col 0, val 2) -> 2:date) -> 3:date, VectorUDFDateLong(col 2)(children: VectorUDFDateSubColScalar(col 0, val 2) -> 2:date) -> 4:date, VectorUDFDateDiffColCol(col 0, col 2)(children: VectorUDFDateAddColScalar(col 0, val 2) -> 2:date) -> 5:long, VectorUDFDateDiffColCol(col 0, col 2)(children: VectorUDFDateSubColScalar(col 0, val 2) -> 2:date) -> 6:long, VectorUDFDateDiffColCol(col 2, col 7)(children: VectorUDFDateAddColScalar(col 0, val 2) -> 2:date, VectorUDFDateSubColScalar(col 0, val 2) -> 7:date) -> 8:long
- Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE
- Limit
- Number of rows: 10
- Limit Vectorization:
- className: VectorLimitOperator
- native: true
- Statistics: Num rows: 10 Data size: 960 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
- Statistics: Num rows: 10 Data size: 960 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Execution mode: vectorized, llap
- LLAP IO: all inputs
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- groupByVectorOutput: true
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
-
Stage: Stage-0
Fetch Operator
limit: 10
Processor Tree:
- ListSink
+ TableScan
+ alias: date_udf_flight_orc
+ Select Operator
+ expressions: fl_date (type: date), to_date(date_add(fl_date, 2)) (type: date), to_date(date_sub(fl_date, 2)) (type: date), datediff(fl_date, date_add(fl_date, 2)) (type: int), datediff(fl_date, date_sub(fl_date, 2)) (type: int), datediff(date_add(fl_date, 2), date_sub(fl_date, 2)) (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Limit
+ Number of rows: 10
+ ListSink
PREHOOK: query: SELECT
fl_date,
@@ -1099,7 +927,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@date_udf_flight_orc
#### A masked pattern was here ####
2009-07-30
-PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT
+PREHOOK: query: EXPLAIN SELECT
min(fl_date) AS c1,
max(fl_date),
count(fl_date),
@@ -1107,7 +935,7 @@ PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT
FROM date_udf_flight_orc
ORDER BY c1
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT
+POSTHOOK: query: EXPLAIN SELECT
min(fl_date) AS c1,
max(fl_date),
count(fl_date),
@@ -1115,10 +943,6 @@ POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT
FROM date_udf_flight_orc
ORDER BY c1
POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -1137,101 +961,43 @@ STAGE PLANS:
TableScan
alias: date_udf_flight_orc
Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE
- TableScan Vectorization:
- native: true
- projectedOutputColumns: [0, 1]
Select Operator
expressions: fl_date (type: date)
outputColumnNames: fl_date
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumns: [0]
Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: min(fl_date), max(fl_date), count(fl_date), count()
- Group By Vectorization:
- aggregators: VectorUDAFMinLong(col 0) -> date, VectorUDAFMaxLong(col 0) -> date, VectorUDAFCount(col 0) -> bigint, VectorUDAFCountStar(*) -> bigint
- className: VectorGroupByOperator
- vectorOutput: true
- native: false
- projectedOutputColumns: [0, 1, 2, 3]
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
- Reduce Sink Vectorization:
- className: VectorReduceSinkOperator
- native: false
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: bigint), _col3 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: all inputs
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- groupByVectorOutput: true
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reducer 2
Execution mode: vectorized, llap
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- groupByVectorOutput: true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3)
- Group By Vectorization:
- aggregators: VectorUDAFMinLong(col 0) -> date, VectorUDAFMaxLong(col 1) -> date, VectorUDAFCountMerge(col 2) -> bigint, VectorUDAFCountMerge(col 3) -> bigint
- className: VectorGroupByOperator
- vectorOutput: true
- native: false
- projectedOutputColumns: [0, 1, 2, 3]
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: date)
sort order: +
- Reduce Sink Vectorization:
- className: VectorReduceSinkOperator
- native: false
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: date), _col2 (type: bigint), _col3 (type: bigint)
Reducer 3
Execution mode: vectorized, llap
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- groupByVectorOutput: true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3
- Select Vectorization:
- className: VectorSelectOperator
- native: true
- projectedOutputColumns: [0, 1, 2, 3]
Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/ad6ce078/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out b/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out
index 0b5d516..ced9795 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out
@@ -16,14 +16,10 @@ POSTHOOK: Input: default@src
POSTHOOK: Output: default@dtest
POSTHOOK: Lineage: dtest.a SCRIPT []
POSTHOOK: Lineage: dtest.b SIMPLE []
-PREHOOK: query: explain vectorization select sum(distinct a), count(distinct a) from dtest
+PREHOOK: query: explain select sum(distinct a), count(distinct a) from dtest
PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization select sum(distinct a), count(distinct a) from dtest
+POSTHOOK: query: explain select sum(distinct a), count(distinct a) from dtest
POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -61,23 +57,8 @@ STAGE PLANS:
value expressions: _col0 (type: bigint), _col1 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: all inputs
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- groupByVectorOutput: true
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reducer 2
Execution mode: vectorized, llap
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- groupByVectorOutput: true
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: sum(VALUE._col0), count(VALUE._col1)
@@ -107,14 +88,10 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@dtest
#### A masked pattern was here ####
300 1
-PREHOOK: query: explain vectorization select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc
+PREHOOK: query: explain select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc
PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc
+POSTHOOK: query: explain select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc
POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
- enabled: true
- enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -149,23 +126,8 @@ STAGE PLANS:
Statistics: Num rows: 5775 Data size: 17248 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: vectorized, llap
LLAP IO: all inputs
- Map Vectorization:
- enabled: true
- enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
- groupByVectorOutput: true
- inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reducer 2
Execution mode: vectorized, llap
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- groupByVectorOutput: false
- allNative: false
- usesVectorUDFAdaptor: false
- vectorized: true
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: int)
@@ -183,11 +145,6 @@ STAGE PLANS:
value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: struct<count:bigint,sum:double,input:int>), _col3 (type: struct<count:bigint,sum:double,variance:double>)
Reducer 3
Execution mode: llap
- Reduce Vectorization:
- enabled: true
- enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct<count:bigint,sum:double,input:int> of Column[VALUE._col2] not supported
- vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: sum(VALUE._col0), count(VALUE._col1), avg(VALUE._col2), std(VALUE._col3)