You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2017/02/03 20:03:27 UTC
[02/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN
display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)
http://git-wip-us.apache.org/repos/asf/hive/blob/fcb57100/ql/src/test/results/clientpositive/llap/vectorized_string_funcs.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_string_funcs.q.out b/ql/src/test/results/clientpositive/llap/vectorized_string_funcs.q.out
index 560235d..8e7e313 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_string_funcs.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_string_funcs.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization
select
substr(cstring1, 1, 2)
,substr(cstring1, 2)
@@ -20,7 +20,7 @@ where cbigint % 237 = 0
and length(substr(cstring1, 1, 2)) <= 2
and cstring1 like '%'
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization
select
substr(cstring1, 1, 2)
,substr(cstring1, 2)
@@ -42,22 +42,54 @@ where cbigint % 237 = 0
and length(substr(cstring1, 1, 2)) <= 2
and cstring1 like '%'
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 1816546 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (((cbigint % 237) = 0) and (length(substr(cstring1, 1, 2)) <= 2) and (cstring1 like '%')) (type: boolean)
+ Statistics: Num rows: 1024 Data size: 151470 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: substr(cstring1, 1, 2) (type: string), substr(cstring1, 2) (type: string), lower(cstring1) (type: string), upper(cstring1) (type: string), upper(cstring1) (type: string), length(cstring1) (type: int), trim(cstring1) (type: string), ltrim(cstring1) (type: string), rtrim(cstring1) (type: string), concat(cstring1, cstring2) (type: string), concat('>', cstring1) (type: string), concat(cstring1, '<') (type: string), concat(substr(cstring1, 1, 2), substr(cstring2, 1, 2)) (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+ Statistics: Num rows: 1024 Data size: 2265088 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1024 Data size: 2265088 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- TableScan
- alias: alltypesorc
- Filter Operator
- predicate: (((cbigint % 237) = 0) and (length(substr(cstring1, 1, 2)) <= 2) and (cstring1 like '%')) (type: boolean)
- Select Operator
- expressions: substr(cstring1, 1, 2) (type: string), substr(cstring1, 2) (type: string), lower(cstring1) (type: string), upper(cstring1) (type: string), upper(cstring1) (type: string), length(cstring1) (type: int), trim(cstring1) (type: string), ltrim(cstring1) (type: string), rtrim(cstring1) (type: string), concat(cstring1, cstring2) (type: string), concat('>', cstring1) (type: string), concat(cstring1, '<') (type: string), concat(substr(cstring1, 1, 2), substr(cstring2, 1, 2)) (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- ListSink
+ ListSink
PREHOOK: query: select
substr(cstring1, 1, 2)
http://git-wip-us.apache.org/repos/asf/hive/blob/fcb57100/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out b/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out
index 5193d20..63fff72 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out
@@ -17,10 +17,10 @@ POSTHOOK: query: INSERT INTO TABLE test VALUES ('0001-01-01 00:00:00.000000000')
POSTHOOK: type: QUERY
POSTHOOK: Output: default@test
POSTHOOK: Lineage: test.ts EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT ts FROM test
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT ts FROM test
POSTHOOK: type: QUERY
Plan optimized by CBO.
@@ -46,10 +46,10 @@ POSTHOOK: Input: default@test
#### A masked pattern was here ####
0001-01-01 00:00:00
9999-12-31 23:59:59.999999999
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test
POSTHOOK: type: QUERY
Plan optimized by CBO.
@@ -85,10 +85,10 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@test
#### A masked pattern was here ####
0001-01-01 00:00:00 9999-12-31 23:59:59.999999999 3652060 23:59:59.999999999
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT ts FROM test
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT ts FROM test
POSTHOOK: type: QUERY
Plan optimized by CBO.
@@ -114,10 +114,10 @@ POSTHOOK: Input: default@test
#### A masked pattern was here ####
0001-01-01 00:00:00
9999-12-31 23:59:59.999999999
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test
POSTHOOK: type: QUERY
Plan optimized by CBO.
http://git-wip-us.apache.org/repos/asf/hive/blob/fcb57100/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out b/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out
index 0044841..d2f95f0 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out
@@ -63,7 +63,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@alltypesorc
POSTHOOK: Output: default@alltypesorc_wrong
POSTHOOK: Lineage: alltypesorc_wrong.stimestamp1 SIMPLE []
-PREHOOK: query: EXPLAIN SELECT
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT
to_unix_timestamp(ctimestamp1) AS c1,
year(ctimestamp1),
month(ctimestamp1),
@@ -76,7 +76,7 @@ PREHOOK: query: EXPLAIN SELECT
FROM alltypesorc_string
ORDER BY c1
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT
to_unix_timestamp(ctimestamp1) AS c1,
year(ctimestamp1),
month(ctimestamp1),
@@ -89,6 +89,10 @@ POSTHOOK: query: EXPLAIN SELECT
FROM alltypesorc_string
ORDER BY c1
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -106,26 +110,61 @@ STAGE PLANS:
TableScan
alias: alltypesorc_string
Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1]
Select Operator
expressions: to_unix_timestamp(ctimestamp1) (type: bigint), year(ctimestamp1) (type: int), month(ctimestamp1) (type: int), day(ctimestamp1) (type: int), dayofmonth(ctimestamp1) (type: int), weekofyear(ctimestamp1) (type: int), hour(ctimestamp1) (type: int), minute(ctimestamp1) (type: int), second(ctimestamp1) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8, 9, 10]
+ selectExpressions: VectorUDFUnixTimeStampTimestamp(col 0) -> 2:long, VectorUDFYearTimestamp(col 0, field YEAR) -> 3:long, VectorUDFMonthTimestamp(col 0, field MONTH) -> 4:long, VectorUDFDayOfMonthTimestamp(col 0, field DAY_OF_MONTH) -> 5:long, VectorUDFDayOfMonthTimestamp(col 0, field DAY_OF_MONTH) -> 6:long, VectorUDFWeekOfYearTimestamp(col 0, field WEEK_OF_YEAR) -> 7:long, VectorUDFHourTimestamp(col 0, field HOUR_OF_DAY) -> 8:long, VectorUDFMinuteTimestamp(col 0, field MINUTE) -> 9:long, VectorUDFSecondTimestamp(col 0, field SECOND) -> 10:long
Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: bigint)
sort order: +
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -208,7 +247,7 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL NULL NULL
-PREHOOK: query: EXPLAIN SELECT
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT
to_unix_timestamp(stimestamp1) AS c1,
year(stimestamp1),
month(stimestamp1),
@@ -221,7 +260,7 @@ PREHOOK: query: EXPLAIN SELECT
FROM alltypesorc_string
ORDER BY c1
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT
to_unix_timestamp(stimestamp1) AS c1,
year(stimestamp1),
month(stimestamp1),
@@ -234,6 +273,10 @@ POSTHOOK: query: EXPLAIN SELECT
FROM alltypesorc_string
ORDER BY c1
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -251,26 +294,61 @@ STAGE PLANS:
TableScan
alias: alltypesorc_string
Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1]
Select Operator
expressions: to_unix_timestamp(stimestamp1) (type: bigint), year(stimestamp1) (type: int), month(stimestamp1) (type: int), day(stimestamp1) (type: int), dayofmonth(stimestamp1) (type: int), weekofyear(stimestamp1) (type: int), hour(stimestamp1) (type: int), minute(stimestamp1) (type: int), second(stimestamp1) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8, 9, 10]
+ selectExpressions: VectorUDFUnixTimeStampString(col 1) -> 2:long, VectorUDFYearString(col 1, fieldStart 0, fieldLength 4) -> 3:long, VectorUDFMonthString(col 1, fieldStart 5, fieldLength 2) -> 4:long, VectorUDFDayOfMonthString(col 1, fieldStart 8, fieldLength 2) -> 5:long, VectorUDFDayOfMonthString(col 1, fieldStart 8, fieldLength 2) -> 6:long, VectorUDFWeekOfYearString(col 1) -> 7:long, VectorUDFHourString(col 1, fieldStart 11, fieldLength 2) -> 8:long, VectorUDFMinuteString(col 1, fieldStart 14, fieldLength 2) -> 9:long, VectorUDFSecondString(col 1, fieldStart 17, fieldLength 2) -> 10:long
Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: bigint)
sort order: +
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -353,7 +431,7 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL NULL NULL
-PREHOOK: query: EXPLAIN SELECT
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT
to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1) AS c1,
year(ctimestamp1) = year(stimestamp1),
month(ctimestamp1) = month(stimestamp1),
@@ -366,7 +444,7 @@ PREHOOK: query: EXPLAIN SELECT
FROM alltypesorc_string
ORDER BY c1
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT
to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1) AS c1,
year(ctimestamp1) = year(stimestamp1),
month(ctimestamp1) = month(stimestamp1),
@@ -379,6 +457,10 @@ POSTHOOK: query: EXPLAIN SELECT
FROM alltypesorc_string
ORDER BY c1
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -396,26 +478,61 @@ STAGE PLANS:
TableScan
alias: alltypesorc_string
Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1]
Select Operator
expressions: (to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1)) (type: boolean), (year(ctimestamp1) = year(stimestamp1)) (type: boolean), (month(ctimestamp1) = month(stimestamp1)) (type: boolean), (day(ctimestamp1) = day(stimestamp1)) (type: boolean), (dayofmonth(ctimestamp1) = dayofmonth(stimestamp1)) (type: boolean), (weekofyear(ctimestamp1) = weekofyear(stimestamp1)) (type: boolean), (hour(ctimestamp1) = hour(stimestamp1)) (type: boolean), (minute(ctimestamp1) = minute(stimestamp1)) (type: boolean), (second(ctimestamp1) = second(stimestamp1)) (type: boolean)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [4, 5, 6, 7, 8, 9, 10, 11, 12]
+ selectExpressions: LongColEqualLongColumn(col 2, col 3)(children: VectorUDFUnixTimeStampTimestamp(col 0) -> 2:long, VectorUDFUnixTimeStampString(col 1) -> 3:long) -> 4:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFYearTimestamp(col 0, field YEAR) -> 2:long, VectorUDFYearString(col 1, fieldStart 0, fieldLength 4) -> 3:long) -> 5:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFMonthTimestamp(col 0, field MONTH) -> 2:long, VectorUDFMonthString(col 1, fieldStart 5, fieldLength 2) -> 3:long) -> 6:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFDayOfMonthTimestamp(col 0, field DAY_OF_MONTH) -> 2:long, VectorUDFDayOfMonthString(col 1, fieldStart 8, fieldLength 2) -> 3:long) -> 7:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFDayOfMonthTimestamp(col 0, field DAY_OF_MONTH) -> 2:long, VectorUDFDayOfMonthString(col 1, fieldStart 8, fieldLength 2) -> 3:long) -> 8:long, LongColEqualLongColumn(col 2, col 3)(childre
n: VectorUDFWeekOfYearTimestamp(col 0, field WEEK_OF_YEAR) -> 2:long, VectorUDFWeekOfYearString(col 1) -> 3:long) -> 9:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFHourTimestamp(col 0, field HOUR_OF_DAY) -> 2:long, VectorUDFHourString(col 1, fieldStart 11, fieldLength 2) -> 3:long) -> 10:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFMinuteTimestamp(col 0, field MINUTE) -> 2:long, VectorUDFMinuteString(col 1, fieldStart 14, fieldLength 2) -> 3:long) -> 11:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFSecondTimestamp(col 0, field SECOND) -> 2:long, VectorUDFSecondString(col 1, fieldStart 17, fieldLength 2) -> 3:long) -> 12:long
Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: boolean)
sort order: +
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: boolean), VALUE._col1 (type: boolean), VALUE._col2 (type: boolean), VALUE._col3 (type: boolean), VALUE._col4 (type: boolean), VALUE._col5 (type: boolean), VALUE._col6 (type: boolean), VALUE._col7 (type: boolean)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -498,7 +615,7 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL NULL NULL
-PREHOOK: query: EXPLAIN SELECT
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT
to_unix_timestamp(stimestamp1) AS c1,
year(stimestamp1),
month(stimestamp1),
@@ -511,7 +628,7 @@ PREHOOK: query: EXPLAIN SELECT
FROM alltypesorc_wrong
ORDER BY c1
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT
to_unix_timestamp(stimestamp1) AS c1,
year(stimestamp1),
month(stimestamp1),
@@ -524,6 +641,10 @@ POSTHOOK: query: EXPLAIN SELECT
FROM alltypesorc_wrong
ORDER BY c1
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -541,26 +662,61 @@ STAGE PLANS:
TableScan
alias: alltypesorc_wrong
Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0]
Select Operator
expressions: to_unix_timestamp(stimestamp1) (type: bigint), year(stimestamp1) (type: int), month(stimestamp1) (type: int), day(stimestamp1) (type: int), dayofmonth(stimestamp1) (type: int), weekofyear(stimestamp1) (type: int), hour(stimestamp1) (type: int), minute(stimestamp1) (type: int), second(stimestamp1) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [1, 2, 3, 4, 5, 6, 7, 8, 9]
+ selectExpressions: VectorUDFUnixTimeStampString(col 0) -> 1:long, VectorUDFYearString(col 0, fieldStart 0, fieldLength 4) -> 2:long, VectorUDFMonthString(col 0, fieldStart 5, fieldLength 2) -> 3:long, VectorUDFDayOfMonthString(col 0, fieldStart 8, fieldLength 2) -> 4:long, VectorUDFDayOfMonthString(col 0, fieldStart 8, fieldLength 2) -> 5:long, VectorUDFWeekOfYearString(col 0) -> 6:long, VectorUDFHourString(col 0, fieldStart 11, fieldLength 2) -> 7:long, VectorUDFMinuteString(col 0, fieldStart 14, fieldLength 2) -> 8:long, VectorUDFSecondString(col 0, fieldStart 17, fieldLength 2) -> 9:long
Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: bigint)
sort order: +
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -606,20 +762,24 @@ POSTHOOK: Input: default@alltypesorc_wrong
NULL NULL NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL NULL NULL
-PREHOOK: query: EXPLAIN SELECT
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT
min(ctimestamp1),
max(ctimestamp1),
count(ctimestamp1),
count(*)
FROM alltypesorc_string
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT
min(ctimestamp1),
max(ctimestamp1),
count(ctimestamp1),
count(*)
FROM alltypesorc_string
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -637,31 +797,73 @@ STAGE PLANS:
TableScan
alias: alltypesorc_string
Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1]
Select Operator
expressions: ctimestamp1 (type: timestamp)
outputColumnNames: ctimestamp1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0]
Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), count()
+ Group By Vectorization:
+ aggregators: VectorUDAFMinTimestamp(col 0) -> timestamp, VectorUDAFMaxTimestamp(col 0) -> timestamp, VectorUDAFCount(col 0) -> bigint, VectorUDAFCountStar(*) -> bigint
+ className: VectorGroupByOperator
+ vectorOutput: true
+ native: false
+ projectedOutputColumns: [0, 1, 2, 3]
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: bigint), _col3 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3)
+ Group By Vectorization:
+ aggregators: VectorUDAFMinTimestamp(col 0) -> timestamp, VectorUDAFMaxTimestamp(col 1) -> timestamp, VectorUDAFCountMerge(col 2) -> bigint, VectorUDAFCountMerge(col 3) -> bigint
+ className: VectorGroupByOperator
+ vectorOutput: true
+ native: false
+ projectedOutputColumns: [0, 1, 2, 3]
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -693,14 +895,18 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@alltypesorc_string
#### A masked pattern was here ####
NULL NULL 0 40
-PREHOOK: query: EXPLAIN SELECT
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT
round(sum(ctimestamp1), 3)
FROM alltypesorc_string
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT
round(sum(ctimestamp1), 3)
FROM alltypesorc_string
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -733,20 +939,47 @@ STAGE PLANS:
value expressions: _col0 (type: double)
Execution mode: llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ notVectorizedReason: Aggregation Function expression for GROUPBY operator: Vectorization of aggreation should have succeeded org.apache.hadoop.hive.ql.metadata.HiveException: Vector aggregate not implemented: "sum" for type: "TIMESTAMP (UDAF evaluator mode = PARTIAL1)
+ vectorized: false
Reducer 2
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: sum(VALUE._col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFSumDouble(col 0) -> double
+ className: VectorGroupByOperator
+ vectorOutput: true
+ native: false
+ projectedOutputColumns: [0]
mode: mergepartial
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: round(_col0, 3) (type: double)
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [1]
+ selectExpressions: RoundWithNumDigitsDoubleToDouble(col 0, decimalPlaces 3) -> 1:double
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -772,7 +1005,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@alltypesorc_string
#### A masked pattern was here ####
NULL
-PREHOOK: query: EXPLAIN SELECT
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT
round(avg(ctimestamp1), 0),
variance(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19,
var_pop(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19,
@@ -783,7 +1016,7 @@ PREHOOK: query: EXPLAIN SELECT
round(stddev_samp(ctimestamp1), 3)
FROM alltypesorc_string
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT
round(avg(ctimestamp1), 0),
variance(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19,
var_pop(ctimestamp1) between 8.97077295279421E19 and 8.97077295279422E19,
@@ -794,6 +1027,10 @@ POSTHOOK: query: EXPLAIN SELECT
round(stddev_samp(ctimestamp1), 3)
FROM alltypesorc_string
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -811,12 +1048,26 @@ STAGE PLANS:
TableScan
alias: alltypesorc_string
Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1]
Select Operator
expressions: ctimestamp1 (type: timestamp)
outputColumnNames: ctimestamp1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0]
Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: avg(ctimestamp1), variance(ctimestamp1), var_pop(ctimestamp1), var_samp(ctimestamp1), std(ctimestamp1), stddev(ctimestamp1), stddev_pop(ctimestamp1), stddev_samp(ctimestamp1)
+ Group By Vectorization:
+ aggregators: VectorUDAFAvgTimestamp(col 0) -> struct<count:bigint,sum:double>, VectorUDAFVarPopTimestamp(col 0) -> struct<count:bigint,sum:double,variance:double>, VectorUDAFVarPopTimestamp(col 0) -> struct<count:bigint,sum:double,variance:double>, VectorUDAFVarSampTimestamp(col 0) -> struct<count:bigint,sum:double,variance:double>, VectorUDAFStdPopTimestamp(col 0) -> struct<count:bigint,sum:double,variance:double>, VectorUDAFStdPopTimestamp(col 0) -> struct<count:bigint,sum:double,variance:double>, VectorUDAFStdPopTimestamp(col 0) -> struct<count:bigint,sum:double,variance:double>, VectorUDAFStdSampTimestamp(col 0) -> struct<count:bigint,sum:double,variance:double>
+ className: VectorGroupByOperator
+ vectorOutput: false
+ native: false
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7]
+ vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgTimestamp(col 0) -> struct<count:bigint,sum:double> output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopTimestamp(col 0) -> struct<count:bigint,sum:double,variance:double> output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopTimestamp(col 0) -> struct<count:bigint,sum:double,variance:double> output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampTimestamp(col 0) -> struct<count:bigint,sum:double,variance:double> output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopTimestamp(col 0) -> struct<count:bigint,sum:double,variance:double> output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopTimestamp(col 0) -> struct<count:bigint,sum:double,variance:double> output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopTimestamp(col 0) -> struct<count
:bigint,sum:double,variance:double> output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampTimestamp(col 0) -> struct<count:bigint,sum:double,variance:double> output type STRUCT requires PRIMITIVE IS false
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE
@@ -826,8 +1077,21 @@ STAGE PLANS:
value expressions: _col0 (type: struct<count:bigint,sum:double,input:timestamp>), _col1 (type: struct<count:bigint,sum:double,variance:double>), _col2 (type: struct<count:bigint,sum:double,variance:double>), _col3 (type: struct<count:bigint,sum:double,variance:double>), _col4 (type: struct<count:bigint,sum:double,variance:double>), _col5 (type: struct<count:bigint,sum:double,variance:double>), _col6 (type: struct<count:bigint,sum:double,variance:double>), _col7 (type: struct<count:bigint,sum:double,variance:double>)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: false
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct<count:bigint,sum:double,input:timestamp> of Column[VALUE._col0] not supported
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7)
http://git-wip-us.apache.org/repos/asf/hive/blob/fcb57100/ql/src/test/results/clientpositive/llap/vectorized_timestamp_ints_casts.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_timestamp_ints_casts.q.out b/ql/src/test/results/clientpositive/llap/vectorized_timestamp_ints_casts.q.out
index dbfe45e..9053c9b 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_timestamp_ints_casts.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_timestamp_ints_casts.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
select
cast (ctinyint as timestamp)
@@ -16,7 +16,7 @@ from alltypesorc
where cbigint % 250 = 0
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
select
cast (ctinyint as timestamp)
@@ -34,22 +34,69 @@ from alltypesorc
where cbigint % 250 = 0
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 1684250 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
+ Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterLongColEqualLongScalar(col 12, val 0)(children: LongColModuloLongScalar(col 3, val 250) -> 12:long) -> boolean
+ predicate: ((cbigint % 250) = 0) (type: boolean)
+ Statistics: Num rows: 6144 Data size: 842180 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: CAST( ctinyint AS TIMESTAMP) (type: timestamp), CAST( csmallint AS TIMESTAMP) (type: timestamp), CAST( cint AS TIMESTAMP) (type: timestamp), CAST( cbigint AS TIMESTAMP) (type: timestamp), CAST( cfloat AS TIMESTAMP) (type: timestamp), CAST( cdouble AS TIMESTAMP) (type: timestamp), CAST( cboolean1 AS TIMESTAMP) (type: timestamp), CAST( (cbigint * 0) AS TIMESTAMP) (type: timestamp), ctimestamp1 (type: timestamp), CAST( cstring1 AS TIMESTAMP) (type: timestamp), CAST( substr(cstring1, 1, 1) AS TIMESTAMP) (type: timestamp)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [14, 16, 18, 20, 21, 22, 24, 26, 8, 27, 29]
+ selectExpressions: CastMillisecondsLongToTimestamp(col 0) -> 14:timestamp, CastMillisecondsLongToTimestamp(col 1) -> 16:timestamp, CastMillisecondsLongToTimestamp(col 2) -> 18:timestamp, CastMillisecondsLongToTimestamp(col 3) -> 20:timestamp, CastDoubleToTimestamp(col 4) -> 21:timestamp, CastDoubleToTimestamp(col 5) -> 22:timestamp, CastMillisecondsLongToTimestamp(col 10) -> 24:timestamp, CastMillisecondsLongToTimestamp(col 12)(children: LongColMultiplyLongScalar(col 3, val 0) -> 12:long) -> 26:timestamp, VectorUDFAdaptor(CAST( cstring1 AS TIMESTAMP)) -> 27:timestamp, VectorUDFAdaptor(CAST( substr(cstring1, 1, 1) AS TIMESTAMP))(children: StringSubstrColStartLen(col 6, start 0, length 1) -> 28:string) -> 29:timestamp
+ Statistics: Num rows: 6144 Data size: 2703360 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 6144 Data size: 2703360 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: true
+ vectorized: true
+
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- TableScan
- alias: alltypesorc
- Filter Operator
- predicate: ((cbigint % 250) = 0) (type: boolean)
- Select Operator
- expressions: CAST( ctinyint AS TIMESTAMP) (type: timestamp), CAST( csmallint AS TIMESTAMP) (type: timestamp), CAST( cint AS TIMESTAMP) (type: timestamp), CAST( cbigint AS TIMESTAMP) (type: timestamp), CAST( cfloat AS TIMESTAMP) (type: timestamp), CAST( cdouble AS TIMESTAMP) (type: timestamp), CAST( cboolean1 AS TIMESTAMP) (type: timestamp), CAST( (cbigint * 0) AS TIMESTAMP) (type: timestamp), ctimestamp1 (type: timestamp), CAST( cstring1 AS TIMESTAMP) (type: timestamp), CAST( substr(cstring1, 1, 1) AS TIMESTAMP) (type: timestamp)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
- ListSink
+ ListSink
PREHOOK: query: select
@@ -115,7 +162,7 @@ POSTHOOK: Input: default@alltypesorc
1969-12-31 15:59:59.95 1969-12-31 15:59:52.804 NULL 1969-12-19 17:33:32.75 1969-12-31 15:59:10 1969-12-31 14:00:04 NULL 1969-12-31 16:00:00 1969-12-31 15:59:54.733 NULL NULL
1969-12-31 16:00:00.011 NULL 1969-12-30 22:03:04.018 1970-01-21 12:50:53.75 1969-12-31 16:00:11 NULL 1969-12-31 16:00:00.001 1969-12-31 16:00:00 1969-12-31 16:00:02.351 NULL NULL
1969-12-31 16:00:00.011 NULL 1969-12-27 18:49:09.583 1970-01-14 22:35:27 1969-12-31 16:00:11 NULL 1969-12-31 16:00:00.001 1969-12-31 16:00:00 1969-12-31 16:00:02.351 NULL NULL
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
select
cast (ctinyint as timestamp)
@@ -133,7 +180,7 @@ from alltypesorc
where cbigint % 250 = 0
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
select
cast (ctinyint as timestamp)
@@ -151,22 +198,69 @@ from alltypesorc
where cbigint % 250 = 0
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 1684250 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
+ Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterLongColEqualLongScalar(col 12, val 0)(children: LongColModuloLongScalar(col 3, val 250) -> 12:long) -> boolean
+ predicate: ((cbigint % 250) = 0) (type: boolean)
+ Statistics: Num rows: 6144 Data size: 842180 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: CAST( ctinyint AS TIMESTAMP) (type: timestamp), CAST( csmallint AS TIMESTAMP) (type: timestamp), CAST( cint AS TIMESTAMP) (type: timestamp), CAST( cbigint AS TIMESTAMP) (type: timestamp), CAST( cfloat AS TIMESTAMP) (type: timestamp), CAST( cdouble AS TIMESTAMP) (type: timestamp), CAST( cboolean1 AS TIMESTAMP) (type: timestamp), CAST( (cbigint * 0) AS TIMESTAMP) (type: timestamp), ctimestamp1 (type: timestamp), CAST( cstring1 AS TIMESTAMP) (type: timestamp), CAST( substr(cstring1, 1, 1) AS TIMESTAMP) (type: timestamp)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [13, 14, 15, 16, 17, 18, 19, 20, 8, 21, 23]
+ selectExpressions: CastLongToTimestamp(col 0) -> 13:timestamp, CastLongToTimestamp(col 1) -> 14:timestamp, CastLongToTimestamp(col 2) -> 15:timestamp, CastLongToTimestamp(col 3) -> 16:timestamp, CastDoubleToTimestamp(col 4) -> 17:timestamp, CastDoubleToTimestamp(col 5) -> 18:timestamp, CastLongToTimestamp(col 10) -> 19:timestamp, CastLongToTimestamp(col 12)(children: LongColMultiplyLongScalar(col 3, val 0) -> 12:long) -> 20:timestamp, VectorUDFAdaptor(CAST( cstring1 AS TIMESTAMP)) -> 21:timestamp, VectorUDFAdaptor(CAST( substr(cstring1, 1, 1) AS TIMESTAMP))(children: StringSubstrColStartLen(col 6, start 0, length 1) -> 22:string) -> 23:timestamp
+ Statistics: Num rows: 6144 Data size: 2703360 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 6144 Data size: 2703360 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: true
+ vectorized: true
+
Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
- TableScan
- alias: alltypesorc
- Filter Operator
- predicate: ((cbigint % 250) = 0) (type: boolean)
- Select Operator
- expressions: CAST( ctinyint AS TIMESTAMP) (type: timestamp), CAST( csmallint AS TIMESTAMP) (type: timestamp), CAST( cint AS TIMESTAMP) (type: timestamp), CAST( cbigint AS TIMESTAMP) (type: timestamp), CAST( cfloat AS TIMESTAMP) (type: timestamp), CAST( cdouble AS TIMESTAMP) (type: timestamp), CAST( cboolean1 AS TIMESTAMP) (type: timestamp), CAST( (cbigint * 0) AS TIMESTAMP) (type: timestamp), ctimestamp1 (type: timestamp), CAST( cstring1 AS TIMESTAMP) (type: timestamp), CAST( substr(cstring1, 1, 1) AS TIMESTAMP) (type: timestamp)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
- ListSink
+ ListSink
PREHOOK: query: select