You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2017/02/03 20:03:50 UTC
[25/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN
display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)
http://git-wip-us.apache.org/repos/asf/hive/blob/fcb57100/ql/src/test/results/clientpositive/llap/vector_interval_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_interval_1.q.out b/ql/src/test/results/clientpositive/llap/vector_interval_1.q.out
index 207e9bb..6da29e0 100644
--- a/ql/src/test/results/clientpositive/llap/vector_interval_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_interval_1.q.out
@@ -38,20 +38,24 @@ POSTHOOK: Lineage: vector_interval_1.dt EXPRESSION []
POSTHOOK: Lineage: vector_interval_1.str1 EXPRESSION []
POSTHOOK: Lineage: vector_interval_1.str2 EXPRESSION []
POSTHOOK: Lineage: vector_interval_1.ts EXPRESSION []
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
select
str1,
interval '1-2' year to month, interval_year_month(str1),
interval '1 2:3:4' day to second, interval_day_time(str2)
from vector_interval_1 order by str1
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
select
str1,
interval '1-2' year to month, interval_year_month(str1),
interval '1 2:3:4' day to second, interval_day_time(str2)
from vector_interval_1 order by str1
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -69,26 +73,62 @@ STAGE PLANS:
TableScan
alias: vector_interval_1
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3]
Select Operator
expressions: str1 (type: string), CAST( str1 AS INTERVAL YEAR TO MONTH) (type: interval_year_month), CAST( str2 AS INTERVAL DAY TO SECOND) (type: interval_day_time)
outputColumnNames: _col0, _col2, _col4
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [2, 4, 5]
+ selectExpressions: CastStringToIntervalYearMonth(col 2) -> 4:interval_year_month, CastStringToIntervalDayTime(col 3) -> 5:interval_day_time
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
value expressions: _col2 (type: interval_year_month), _col4 (type: interval_day_time)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: string), 1-2 (type: interval_year_month), VALUE._col0 (type: interval_year_month), 1 02:03:04.000000000 (type: interval_day_time), VALUE._col1 (type: interval_day_time)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 3, 1, 4, 2]
+ selectExpressions: ConstantVectorExpression(val 14) -> 3:long, ConstantVectorExpression(val 1 02:03:04.000000000) -> 4:interval_day_time
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -119,7 +159,7 @@ POSTHOOK: Input: default@vector_interval_1
#### A masked pattern was here ####
NULL 1-2 NULL 1 02:03:04.000000000 NULL
1-2 1-2 1-2 1 02:03:04.000000000 1 02:03:04.000000000
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
select
dt,
interval '1-2' year to month + interval '1-2' year to month,
@@ -130,7 +170,7 @@ select
interval '1-2' year to month - interval_year_month(str1)
from vector_interval_1 order by dt
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
select
dt,
interval '1-2' year to month + interval '1-2' year to month,
@@ -141,6 +181,10 @@ select
interval '1-2' year to month - interval_year_month(str1)
from vector_interval_1 order by dt
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -158,26 +202,62 @@ STAGE PLANS:
TableScan
alias: vector_interval_1
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3]
Select Operator
expressions: dt (type: date), (CAST( str1 AS INTERVAL YEAR TO MONTH) + CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: interval_year_month), (1-2 + CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: interval_year_month), (CAST( str1 AS INTERVAL YEAR TO MONTH) - CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: interval_year_month), (1-2 - CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: interval_year_month)
outputColumnNames: _col0, _col2, _col3, _col5, _col6
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [1, 6, 5, 8, 7]
+ selectExpressions: IntervalYearMonthColAddIntervalYearMonthColumn(col 4, col 5)(children: CastStringToIntervalYearMonth(col 2) -> 4:interval_year_month, CastStringToIntervalYearMonth(col 2) -> 5:interval_year_month) -> 6:long, IntervalYearMonthScalarAddIntervalYearMonthColumn(val 14, col 4)(children: CastStringToIntervalYearMonth(col 2) -> 4:interval_year_month) -> 5:long, IntervalYearMonthColSubtractIntervalYearMonthColumn(col 4, col 7)(children: CastStringToIntervalYearMonth(col 2) -> 4:interval_year_month, CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> 8:long, IntervalYearMonthScalarSubtractIntervalYearMonthColumn(val 14, col 4)(children: CastStringToIntervalYearMonth(col 2) -> 4:interval_year_month) -> 7:long
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: date)
sort order: +
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
value expressions: _col2 (type: interval_year_month), _col3 (type: interval_year_month), _col5 (type: interval_year_month), _col6 (type: interval_year_month)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: date), 2-4 (type: interval_year_month), VALUE._col0 (type: interval_year_month), VALUE._col1 (type: interval_year_month), 0-0 (type: interval_year_month), VALUE._col2 (type: interval_year_month), VALUE._col3 (type: interval_year_month)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 5, 1, 2, 6, 3, 4]
+ selectExpressions: ConstantVectorExpression(val 28) -> 5:long, ConstantVectorExpression(val 0) -> 6:long
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -216,7 +296,7 @@ POSTHOOK: Input: default@vector_interval_1
#### A masked pattern was here ####
NULL 2-4 NULL NULL 0-0 NULL NULL
2001-01-01 2-4 2-4 2-4 0-0 0-0 0-0
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
select
dt,
interval '1 2:3:4' day to second + interval '1 2:3:4' day to second,
@@ -227,7 +307,7 @@ select
interval '1 2:3:4' day to second - interval_day_time(str2)
from vector_interval_1 order by dt
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
select
dt,
interval '1 2:3:4' day to second + interval '1 2:3:4' day to second,
@@ -238,6 +318,10 @@ select
interval '1 2:3:4' day to second - interval_day_time(str2)
from vector_interval_1 order by dt
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -255,26 +339,62 @@ STAGE PLANS:
TableScan
alias: vector_interval_1
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3]
Select Operator
expressions: dt (type: date), (CAST( str2 AS INTERVAL DAY TO SECOND) + CAST( str2 AS INTERVAL DAY TO SECOND)) (type: interval_day_time), (1 02:03:04.000000000 + CAST( str2 AS INTERVAL DAY TO SECOND)) (type: interval_day_time), (CAST( str2 AS INTERVAL DAY TO SECOND) - CAST( str2 AS INTERVAL DAY TO SECOND)) (type: interval_day_time), (1 02:03:04.000000000 - CAST( str2 AS INTERVAL DAY TO SECOND)) (type: interval_day_time)
outputColumnNames: _col0, _col2, _col3, _col5, _col6
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [1, 6, 5, 8, 7]
+ selectExpressions: IntervalDayTimeColAddIntervalDayTimeColumn(col 4, col 5)(children: CastStringToIntervalDayTime(col 3) -> 4:interval_day_time, CastStringToIntervalDayTime(col 3) -> 5:interval_day_time) -> 6:interval_day_time, IntervalDayTimeScalarAddIntervalDayTimeColumn(val 1 02:03:04.000000000, col 4)(children: CastStringToIntervalDayTime(col 3) -> 4:interval_day_time) -> 5:timestamp, IntervalDayTimeColSubtractIntervalDayTimeColumn(col 4, col 7)(children: CastStringToIntervalDayTime(col 3) -> 4:interval_day_time, CastStringToIntervalDayTime(col 3) -> 7:interval_day_time) -> 8:interval_day_time, IntervalDayTimeScalarSubtractIntervalDayTimeColumn(val 1 02:03:04.000000000, col 4)(children: CastStringToIntervalDayTime(col 3) -> 4:interval_day_time) -> 7:timestamp
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: date)
sort order: +
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
value expressions: _col2 (type: interval_day_time), _col3 (type: interval_day_time), _col5 (type: interval_day_time), _col6 (type: interval_day_time)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: date), 2 04:06:08.000000000 (type: interval_day_time), VALUE._col0 (type: interval_day_time), VALUE._col1 (type: interval_day_time), 0 00:00:00.000000000 (type: interval_day_time), VALUE._col2 (type: interval_day_time), VALUE._col3 (type: interval_day_time)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 5, 1, 2, 6, 3, 4]
+ selectExpressions: ConstantVectorExpression(val 2 04:06:08.000000000) -> 5:interval_day_time, ConstantVectorExpression(val 0 00:00:00.000000000) -> 6:interval_day_time
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -313,7 +433,7 @@ POSTHOOK: Input: default@vector_interval_1
#### A masked pattern was here ####
NULL 2 04:06:08.000000000 NULL NULL 0 00:00:00.000000000 NULL NULL
2001-01-01 2 04:06:08.000000000 2 04:06:08.000000000 2 04:06:08.000000000 0 00:00:00.000000000 0 00:00:00.000000000 0 00:00:00.000000000
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
select
dt,
dt + interval '1-2' year to month,
@@ -330,7 +450,7 @@ select
dt - interval_day_time(str2)
from vector_interval_1 order by dt
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
select
dt,
dt + interval '1-2' year to month,
@@ -347,6 +467,10 @@ select
dt - interval_day_time(str2)
from vector_interval_1 order by dt
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -364,26 +488,61 @@ STAGE PLANS:
TableScan
alias: vector_interval_1
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3]
Select Operator
expressions: dt (type: date), (dt + 1-2) (type: date), (dt + CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: date), (1-2 + dt) (type: date), (CAST( str1 AS INTERVAL YEAR TO MONTH) + dt) (type: date), (dt - 1-2) (type: date), (dt - CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: date), (dt + 1 02:03:04.000000000) (type: timestamp), (dt + CAST( str2 AS INTERVAL DAY TO SECOND)) (type: timestamp), (1 02:03:04.000000000 + dt) (type: timestamp), (CAST( str2 AS INTERVAL DAY TO SECOND) + dt) (type: timestamp), (dt - 1 02:03:04.000000000) (type: timestamp), (dt - CAST( str2 AS INTERVAL DAY TO SECOND)) (type: timestamp)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [1, 4, 6, 5, 8, 7, 10, 11, 13, 14, 15, 16, 17]
+ selectExpressions: DateColAddIntervalYearMonthScalar(col 1, val 1-2) -> 4:long, DateColAddIntervalYearMonthColumn(col 1, col 5)(children: CastStringToIntervalYearMonth(col 2) -> 5:interval_year_month) -> 6:long, IntervalYearMonthScalarAddDateColumn(val 1-2, col 1) -> 5:long, IntervalYearMonthColAddDateColumn(col 7, col 1)(children: CastStringToIntervalYearMonth(col 2) -> 7:interval_year_month) -> 8:long, DateColSubtractIntervalYearMonthScalar(col 1, val 1-2) -> 7:long, DateColSubtractIntervalYearMonthColumn(col 1, col 9)(children: CastStringToIntervalYearMonth(col 2) -> 9:interval_year_month) -> 10:long, DateColAddIntervalDayTimeScalar(col 1, val 1 02:03:04.000000000) -> 11:timestamp, DateColAddIntervalDayTimeColumn(col 1, col 12)(children: CastStringToIntervalDayTime(col 3) -> 12:interval_day_time) -> 13:timestamp, IntervalDayTimeScalarAddDateColumn(val 1 02:03:04.000000000, col 1) -> 14:timestamp, IntervalDayTimeColAddDateColumn(col 12, col 1)(children: Cas
tStringToIntervalDayTime(col 3) -> 12:interval_day_time) -> 15:interval_day_time, DateColSubtractIntervalDayTimeScalar(col 1, val 1 02:03:04.000000000) -> 16:timestamp, DateColSubtractIntervalDayTimeColumn(col 1, col 12)(children: CastStringToIntervalDayTime(col 3) -> 12:interval_day_time) -> 17:timestamp
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: date)
sort order: +
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: date), _col2 (type: date), _col3 (type: date), _col4 (type: date), _col5 (type: date), _col6 (type: date), _col7 (type: timestamp), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: timestamp), _col11 (type: timestamp), _col12 (type: timestamp)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date), VALUE._col1 (type: date), VALUE._col2 (type: date), VALUE._col3 (type: date), VALUE._col4 (type: date), VALUE._col5 (type: date), VALUE._col6 (type: timestamp), VALUE._col7 (type: timestamp), VALUE._col8 (type: timestamp), VALUE._col9 (type: timestamp), VALUE._col10 (type: timestamp), VALUE._col11 (type: timestamp)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -434,7 +593,7 @@ POSTHOOK: Input: default@vector_interval_1
#### A masked pattern was here ####
NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
2001-01-01 2002-03-01 2002-03-01 2002-03-01 2002-03-01 1999-11-01 1999-11-01 2001-01-02 02:03:04 2001-01-02 02:03:04 2001-01-02 02:03:04 2001-01-02 02:03:04 2000-12-30 21:56:56 2000-12-30 21:56:56
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
select
ts,
ts + interval '1-2' year to month,
@@ -451,7 +610,7 @@ select
ts - interval_day_time(str2)
from vector_interval_1 order by ts
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
select
ts,
ts + interval '1-2' year to month,
@@ -468,6 +627,10 @@ select
ts - interval_day_time(str2)
from vector_interval_1 order by ts
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -485,26 +648,61 @@ STAGE PLANS:
TableScan
alias: vector_interval_1
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3]
Select Operator
expressions: ts (type: timestamp), (ts + 1-2) (type: timestamp), (ts + CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: timestamp), (1-2 + ts) (type: timestamp), (CAST( str1 AS INTERVAL YEAR TO MONTH) + ts) (type: timestamp), (ts - 1-2) (type: timestamp), (ts - CAST( str1 AS INTERVAL YEAR TO MONTH)) (type: timestamp), (ts + 1 02:03:04.000000000) (type: timestamp), (ts + CAST( str2 AS INTERVAL DAY TO SECOND)) (type: timestamp), (1 02:03:04.000000000 + ts) (type: timestamp), (CAST( str2 AS INTERVAL DAY TO SECOND) + ts) (type: timestamp), (ts - 1 02:03:04.000000000) (type: timestamp), (ts - CAST( str2 AS INTERVAL DAY TO SECOND)) (type: timestamp)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 4, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17]
+ selectExpressions: TimestampColAddIntervalYearMonthScalar(col 0, val 1-2) -> 4:timestamp, TimestampColAddIntervalYearMonthColumn(col 0, col 5)(children: CastStringToIntervalYearMonth(col 2) -> 5:interval_year_month) -> 6:timestamp, IntervalYearMonthScalarAddTimestampColumn(val 1-2, col 0) -> 7:timestamp, IntervalYearMonthColAddTimestampColumn(col 5, col 0)(children: CastStringToIntervalYearMonth(col 2) -> 5:interval_year_month) -> 8:timestamp, TimestampColSubtractIntervalYearMonthScalar(col 0, val 1-2) -> 9:timestamp, TimestampColSubtractIntervalYearMonthColumn(col 0, col 5)(children: CastStringToIntervalYearMonth(col 2) -> 5:interval_year_month) -> 10:timestamp, TimestampColAddIntervalDayTimeScalar(col 0, val 1 02:03:04.000000000) -> 11:timestamp, TimestampColAddIntervalDayTimeColumn(col 0, col 12)(children: CastStringToIntervalDayTime(col 3) -> 12:interval_day_time) -> 13:timestamp, IntervalDayTimeScalarAddTimestampColumn(val 1 02:03:04.000000000, col 0) ->
14:timestamp, IntervalDayTimeColAddTimestampColumn(col 12, col 0)(children: CastStringToIntervalDayTime(col 3) -> 12:interval_day_time) -> 15:timestamp, TimestampColSubtractIntervalDayTimeScalar(col 0, val 1 02:03:04.000000000) -> 16:timestamp, TimestampColSubtractIntervalDayTimeColumn(col 0, col 12)(children: CastStringToIntervalDayTime(col 3) -> 12:interval_day_time) -> 17:timestamp
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: timestamp)
sort order: +
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: timestamp), _col2 (type: timestamp), _col3 (type: timestamp), _col4 (type: timestamp), _col5 (type: timestamp), _col6 (type: timestamp), _col7 (type: timestamp), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: timestamp), _col11 (type: timestamp), _col12 (type: timestamp)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: timestamp), VALUE._col1 (type: timestamp), VALUE._col2 (type: timestamp), VALUE._col3 (type: timestamp), VALUE._col4 (type: timestamp), VALUE._col5 (type: timestamp), VALUE._col6 (type: timestamp), VALUE._col7 (type: timestamp), VALUE._col8 (type: timestamp), VALUE._col9 (type: timestamp), VALUE._col10 (type: timestamp), VALUE._col11 (type: timestamp)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -555,7 +753,7 @@ POSTHOOK: Input: default@vector_interval_1
#### A masked pattern was here ####
NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
2001-01-01 01:02:03 2002-03-01 01:02:03 2002-03-01 01:02:03 2002-03-01 01:02:03 2002-03-01 01:02:03 1999-11-01 01:02:03 1999-11-01 01:02:03 2001-01-02 03:05:07 2001-01-02 03:05:07 2001-01-02 03:05:07 2001-01-02 03:05:07 2000-12-30 22:58:59 2000-12-30 22:58:59
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
select
ts,
ts - ts,
@@ -563,7 +761,7 @@ select
ts - timestamp '2001-01-01 01:02:03'
from vector_interval_1 order by ts
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
select
ts,
ts - ts,
@@ -571,6 +769,10 @@ select
ts - timestamp '2001-01-01 01:02:03'
from vector_interval_1 order by ts
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -588,26 +790,61 @@ STAGE PLANS:
TableScan
alias: vector_interval_1
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3]
Select Operator
expressions: ts (type: timestamp), (ts - ts) (type: interval_day_time), (2001-01-01 01:02:03.0 - ts) (type: interval_day_time), (ts - 2001-01-01 01:02:03.0) (type: interval_day_time)
outputColumnNames: _col0, _col1, _col2, _col3
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 4, 5, 6]
+ selectExpressions: TimestampColSubtractTimestampColumn(col 0, col 0) -> 4:interval_day_time, TimestampScalarSubtractTimestampColumn(val 2001-01-01 01:02:03.0, col 0) -> 5:timestamp, TimestampColSubtractTimestampScalar(col 0, val 2001-01-01 01:02:03.0) -> 6:interval_day_time
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: timestamp)
sort order: +
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: interval_day_time), _col2 (type: interval_day_time), _col3 (type: interval_day_time)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: interval_day_time), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time)
outputColumnNames: _col0, _col1, _col2, _col3
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3]
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -640,7 +877,7 @@ POSTHOOK: Input: default@vector_interval_1
#### A masked pattern was here ####
NULL NULL NULL NULL
2001-01-01 01:02:03 0 00:00:00.000000000 0 00:00:00.000000000 0 00:00:00.000000000
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
select
dt,
dt - dt,
@@ -648,7 +885,7 @@ select
dt - date '2001-01-01'
from vector_interval_1 order by dt
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
select
dt,
dt - dt,
@@ -656,6 +893,10 @@ select
dt - date '2001-01-01'
from vector_interval_1 order by dt
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -673,26 +914,61 @@ STAGE PLANS:
TableScan
alias: vector_interval_1
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3]
Select Operator
expressions: dt (type: date), (dt - dt) (type: interval_day_time), (2001-01-01 - dt) (type: interval_day_time), (dt - 2001-01-01) (type: interval_day_time)
outputColumnNames: _col0, _col1, _col2, _col3
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [1, 4, 5, 6]
+ selectExpressions: DateColSubtractDateColumn(col 1, col 1) -> 4:timestamp, DateScalarSubtractDateColumn(val 2001-01-01 00:00:00.0, col 1) -> 5:timestamp, DateColSubtractDateScalar(col 1, val 2001-01-01 00:00:00.0) -> 6:timestamp
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: date)
sort order: +
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: interval_day_time), _col2 (type: interval_day_time), _col3 (type: interval_day_time)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: interval_day_time), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time)
outputColumnNames: _col0, _col1, _col2, _col3
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3]
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -725,7 +1001,7 @@ POSTHOOK: Input: default@vector_interval_1
#### A masked pattern was here ####
NULL NULL NULL NULL
2001-01-01 0 00:00:00.000000000 0 00:00:00.000000000 0 00:00:00.000000000
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
select
dt,
ts - dt,
@@ -736,7 +1012,7 @@ select
date '2001-01-01' - ts
from vector_interval_1 order by dt
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
select
dt,
ts - dt,
@@ -747,6 +1023,10 @@ select
date '2001-01-01' - ts
from vector_interval_1 order by dt
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -764,26 +1044,61 @@ STAGE PLANS:
TableScan
alias: vector_interval_1
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3]
Select Operator
expressions: dt (type: date), (ts - dt) (type: interval_day_time), (2001-01-01 01:02:03.0 - dt) (type: interval_day_time), (ts - 2001-01-01) (type: interval_day_time), (dt - ts) (type: interval_day_time), (dt - 2001-01-01 01:02:03.0) (type: interval_day_time), (2001-01-01 - ts) (type: interval_day_time)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [1, 4, 5, 6, 7, 8, 9]
+ selectExpressions: TimestampColSubtractDateColumn(col 0, col 1) -> 4:interval_day_time, TimestampScalarSubtractDateColumn(val 2001-01-01 01:02:03.0, col 1) -> 5:interval_day_time, TimestampColSubtractDateScalar(col 0, val 2001-01-01 00:00:00.0) -> 6:interval_day_time, DateColSubtractTimestampColumn(col 1, col 0) -> 7:interval_day_time, DateColSubtractTimestampScalar(col 1, val 2001-01-01 01:02:03.0) -> 8:interval_day_time, DateScalarSubtractTimestampColumn(val 2001-01-01 00:00:00.0, col 0) -> 9:interval_day_time
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: date)
sort order: +
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: interval_day_time), _col2 (type: interval_day_time), _col3 (type: interval_day_time), _col4 (type: interval_day_time), _col5 (type: interval_day_time), _col6 (type: interval_day_time)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: interval_day_time), VALUE._col1 (type: interval_day_time), VALUE._col2 (type: interval_day_time), VALUE._col3 (type: interval_day_time), VALUE._col4 (type: interval_day_time), VALUE._col5 (type: interval_day_time)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6]
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 2 Data size: 442 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat