You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2016/10/17 20:41:57 UTC
[05/67] [abbrv] [partial] hive git commit: Revert "Revert
"HIVE-11394: Enhance EXPLAIN display for vectorization (Matt McCline,
reviewed by Gopal Vijayaraghavan)""
http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/spark/vector_data_types.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vector_data_types.q.out b/ql/src/test/results/clientpositive/spark/vector_data_types.q.out
index dbaf14d..79638c1 100644
--- a/ql/src/test/results/clientpositive/spark/vector_data_types.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_data_types.q.out
@@ -97,10 +97,14 @@ POSTHOOK: Lineage: over1korc.s SIMPLE [(over1k)over1k.FieldSchema(name:s, type:s
POSTHOOK: Lineage: over1korc.si SIMPLE [(over1k)over1k.FieldSchema(name:si, type:smallint, comment:null), ]
POSTHOOK: Lineage: over1korc.t SIMPLE [(over1k)over1k.FieldSchema(name:t, type:tinyint, comment:null), ]
POSTHOOK: Lineage: over1korc.ts SIMPLE [(over1k)over1k.FieldSchema(name:ts, type:timestamp, comment:null), ]
-PREHOOK: query: EXPLAIN SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: false
+ enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -189,10 +193,14 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@over1korc
#### A masked pattern was here ####
-17045922556
-PREHOOK: query: EXPLAIN select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION select t, si, i, b, f, d, bo, s, ts, dec, bin FROM over1korc ORDER BY t, si, i LIMIT 20
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -209,29 +217,66 @@ STAGE PLANS:
TableScan
alias: over1korc
Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
Select Operator
expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int)
sort order: +++
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false
Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.1
value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: boolean), _col7 (type: string), _col8 (type: timestamp), _col9 (type: decimal(4,2)), _col10 (type: binary)
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: boolean), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: decimal(4,2)), VALUE._col7 (type: binary)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 20
+ Limit Vectorization:
+ className: VectorLimitOperator
+ native: true
Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out b/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out
index cfdfce1..919e290 100644
--- a/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out
@@ -23,7 +23,7 @@ POSTHOOK: Lineage: decimal_vgby.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchem
PREHOOK: query: -- SORT_QUERY_RESULTS
-- First only do simple aggregations that output primitives only
-EXPLAIN SELECT cint,
+EXPLAIN VECTORIZATION EXPRESSION SELECT cint,
COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1),
COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2)
FROM decimal_vgby
@@ -33,13 +33,17 @@ PREHOOK: type: QUERY
POSTHOOK: query: -- SORT_QUERY_RESULTS
-- First only do simple aggregations that output primitives only
-EXPLAIN SELECT cint,
+EXPLAIN VECTORIZATION EXPRESSION SELECT cint,
COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1),
COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2)
FROM decimal_vgby
GROUP BY cint
HAVING COUNT(*) > 1
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -56,12 +60,26 @@ STAGE PLANS:
TableScan
alias: decimal_vgby
Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3]
Select Operator
expressions: cint (type: int), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14))
outputColumnNames: cint, cdecimal1, cdecimal2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [3, 1, 2]
Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), count()
+ Group By Vectorization:
+ aggregators: VectorUDAFCount(col 1) -> bigint, VectorUDAFMaxDecimal(col 1) -> decimal(20,10), VectorUDAFMinDecimal(col 1) -> decimal(20,10), VectorUDAFSumDecimal(col 1) -> decimal(38,18), VectorUDAFCount(col 2) -> bigint, VectorUDAFMaxDecimal(col 2) -> decimal(23,14), VectorUDAFMinDecimal(col 2) -> decimal(23,14), VectorUDAFSumDecimal(col 2) -> decimal(38,18), VectorUDAFCountStar(*) -> bigint
+ className: VectorGroupByOperator
+ vectorOutput: true
+ keyExpressions: col 3
+ native: false
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
keys: cint (type: int)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
@@ -70,27 +88,65 @@ STAGE PLANS:
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: bigint), _col6 (type: decimal(23,14)), _col7 (type: decimal(23,14)), _col8 (type: decimal(33,14)), _col9 (type: bigint)
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), max(VALUE._col5), min(VALUE._col6), sum(VALUE._col7), count(VALUE._col8)
+ Group By Vectorization:
+ aggregators: VectorUDAFCountMerge(col 1) -> bigint, VectorUDAFMaxDecimal(col 2) -> decimal(20,10), VectorUDAFMinDecimal(col 3) -> decimal(20,10), VectorUDAFSumDecimal(col 4) -> decimal(38,18), VectorUDAFCountMerge(col 5) -> bigint, VectorUDAFMaxDecimal(col 6) -> decimal(23,14), VectorUDAFMinDecimal(col 7) -> decimal(23,14), VectorUDAFSumDecimal(col 8) -> decimal(38,18), VectorUDAFCountMerge(col 9) -> bigint
+ className: VectorGroupByOperator
+ vectorOutput: true
+ keyExpressions: col 0
+ native: false
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterLongColGreaterLongScalar(col 9, val 1) -> boolean
predicate: (_col9 > 1) (type: boolean)
Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: bigint), _col6 (type: decimal(23,14)), _col7 (type: decimal(23,14)), _col8 (type: decimal(33,14))
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -130,7 +186,7 @@ POSTHOOK: Input: default@decimal_vgby
762 2 5831542.2692483780 1531.2194054054 5833073.4886537834 2 6984454.21109769200000 1833.94569230769250 6986288.15678999969250
NULL 3072 9318.4351351351 -4298.1513513514 5018444.1081079808 3072 11160.71538461538500 -5147.90769230769300 6010604.30769230735360
PREHOOK: query: -- Now add the others...
-EXPLAIN SELECT cint,
+EXPLAIN VECTORIZATION EXPRESSION SELECT cint,
COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1),
COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2), AVG(cdecimal2), STDDEV_POP(cdecimal2), STDDEV_SAMP(cdecimal2)
FROM decimal_vgby
@@ -138,13 +194,17 @@ EXPLAIN SELECT cint,
HAVING COUNT(*) > 1
PREHOOK: type: QUERY
POSTHOOK: query: -- Now add the others...
-EXPLAIN SELECT cint,
+EXPLAIN VECTORIZATION EXPRESSION SELECT cint,
COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1),
COUNT(cdecimal2), MAX(cdecimal2), MIN(cdecimal2), SUM(cdecimal2), AVG(cdecimal2), STDDEV_POP(cdecimal2), STDDEV_SAMP(cdecimal2)
FROM decimal_vgby
GROUP BY cint
HAVING COUNT(*) > 1
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -161,12 +221,27 @@ STAGE PLANS:
TableScan
alias: decimal_vgby
Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3]
Select Operator
expressions: cint (type: int), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14))
outputColumnNames: cint, cdecimal1, cdecimal2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [3, 1, 2]
Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), avg(cdecimal1), stddev_pop(cdecimal1), stddev_samp(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), avg(cdecimal2), stddev_pop(cdecimal2), stddev_samp(cdecimal2), count()
+ Group By Vectorization:
+ aggregators: VectorUDAFCount(col 1) -> bigint, VectorUDAFMaxDecimal(col 1) -> decimal(20,10), VectorUDAFMinDecimal(col 1) -> decimal(20,10), VectorUDAFSumDecimal(col 1) -> decimal(38,18), VectorUDAFAvgDecimal(col 1) -> struct<count:bigint,sum:decimal(30,10)>, VectorUDAFStdPopDecimal(col 1) -> struct<count:bigint,sum:double,variance:double>, VectorUDAFStdSampDecimal(col 1) -> struct<count:bigint,sum:double,variance:double>, VectorUDAFCount(col 2) -> bigint, VectorUDAFMaxDecimal(col 2) -> decimal(23,14), VectorUDAFMinDecimal(col 2) -> decimal(23,14), VectorUDAFSumDecimal(col 2) -> decimal(38,18), VectorUDAFAvgDecimal(col 2) -> struct<count:bigint,sum:decimal(33,14)>, VectorUDAFStdPopDecimal(col 2) -> struct<count:bigint,sum:double,variance:double>, VectorUDAFStdSampDecimal(col 2) -> struct<count:bigint,sum:double,variance:double>, VectorUDAFCountStar(*) -> bigint
+ className: VectorGroupByOperator
+ vectorOutput: false
+ keyExpressions: col 3
+ native: false
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
+ vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgDecimal(col 1) -> struct<count:bigint,sum:decimal(30,10)> output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopDecimal(col 1) -> struct<count:bigint,sum:double,variance:double> output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampDecimal(col 1) -> struct<count:bigint,sum:double,variance:double> output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDecimal(col 2) -> struct<count:bigint,sum:decimal(33,14)> output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopDecimal(col 2) -> struct<count:bigint,sum:double,variance:double> output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampDecimal(col 2) -> struct<count:bigint,sum:double,variance:double> output type STRUCT requires PRIMITIVE IS false
keys: cint (type: int)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15
@@ -178,7 +253,20 @@ STAGE PLANS:
Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: struct<count:bigint,sum:decimal(30,10),input:decimal(20,10)>), _col6 (type: struct<count:bigint,sum:double,variance:double>), _col7 (type: struct<count:bigint,sum:double,variance:double>), _col8 (type: bigint), _col9 (type: decimal(23,14)), _col10 (type: decimal(23,14)), _col11 (type: decimal(33,14)), _col12 (type: struct<count:bigint,sum:decimal(33,14),input:decimal(23,14)>), _col13 (type: struct<count:bigint,sum:double,variance:double>), _col14 (type: struct<count:bigint,sum:double,variance:double>), _col15 (type: bigint)
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: false
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct<count:bigint,sum:decimal(30,10),input:decimal(20,10)> of Column[VALUE._col4] not supported
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), stddev_pop(VALUE._col5), stddev_samp(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9), sum(VALUE._col10), avg(VALUE._col11), stddev_pop(VALUE._col12), stddev_samp(VALUE._col13), count(VALUE._col14)
http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/spark/vector_decimal_mapjoin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vector_decimal_mapjoin.q.out b/ql/src/test/results/clientpositive/spark/vector_decimal_mapjoin.q.out
index 0493994..11d7609 100644
--- a/ql/src/test/results/clientpositive/spark/vector_decimal_mapjoin.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_decimal_mapjoin.q.out
@@ -72,12 +72,16 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@over1k
POSTHOOK: Output: default@t2
POSTHOOK: Lineage: t2.dec EXPRESSION [(over1k)over1k.FieldSchema(name:dec, type:decimal(4,2), comment:null), ]
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
select t1.dec, t2.dec from t1 join t2 on (t1.dec=t2.dec)
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
select t1.dec, t2.dec from t1 join t2 on (t1.dec=t2.dec)
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-2 is a root stage
Stage-1 depends on stages: Stage-2
@@ -93,18 +97,40 @@ STAGE PLANS:
TableScan
alias: t2
Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0]
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: SelectColumnIsNotNull(col 1)(children: CastDecimalToBoolean(col 0) -> 1:long) -> boolean
predicate: dec is not null (type: boolean)
Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: dec (type: decimal(4,0))
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0]
Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
+ Spark Hash Table Sink Vectorization:
+ className: VectorSparkHashTableSinkOperator
+ native: true
keys:
0 _col0 (type: decimal(6,2))
1 _col0 (type: decimal(6,2))
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
Local Work:
Map Reduce Local Work
@@ -117,12 +143,23 @@ STAGE PLANS:
TableScan
alias: t1
Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0]
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: SelectColumnIsNotNull(col 1)(children: CastDecimalToBoolean(col 0) -> 1:long) -> boolean
predicate: dec is not null (type: boolean)
Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: dec (type: decimal(4,2))
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0]
Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
@@ -130,18 +167,35 @@ STAGE PLANS:
keys:
0 _col0 (type: decimal(6,2))
1 _col0 (type: decimal(6,2))
+ Map Join Vectorization:
+ className: VectorMapJoinOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Not empty key IS true, When Fast Hash Table, then requires no Hybrid Hash Join IS true, Small table vectorizes IS true
+ nativeConditionsNotMet: Supports Key Types IS false
+ nativeNotSupportedKeyTypes: DECIMAL
outputColumnNames: _col0, _col1
input vertices:
1 Map 2
Statistics: Num rows: 1153 Data size: 129236 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 1153 Data size: 129236 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Local Work:
Map Reduce Local Work
http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out b/ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out
index 803a53b..af9ec87 100644
--- a/ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out
@@ -105,12 +105,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem
POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ]
POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ]
POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ]
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
select distinct s, t from vectortab2korc
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
select distinct s, t from vectortab2korc
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -127,11 +131,24 @@ STAGE PLANS:
TableScan
alias: vectortab2korc
Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
Select Operator
expressions: t (type: tinyint), s (type: string)
outputColumnNames: t, s
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 8]
Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE
Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ vectorOutput: true
+ keyExpressions: col 0, col 8
+ native: false
+ projectedOutputColumns: []
keys: t (type: tinyint), s (type: string)
mode: hash
outputColumnNames: _col0, _col1
@@ -140,12 +157,38 @@ STAGE PLANS:
key expressions: _col0 (type: tinyint), _col1 (type: string)
sort order: ++
Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Group By Operator
+ Group By Vectorization:
+ className: VectorGroupByOperator
+ vectorOutput: true
+ keyExpressions: col 0, col 1
+ native: false
+ projectedOutputColumns: []
keys: KEY._col0 (type: tinyint), KEY._col1 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1
@@ -153,9 +196,16 @@ STAGE PLANS:
Select Operator
expressions: _col1 (type: string), _col0 (type: tinyint)
outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [1, 0]
Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/spark/vector_elt.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vector_elt.q.out b/ql/src/test/results/clientpositive/spark/vector_elt.q.out
index bb66867..b49462a 100644
--- a/ql/src/test/results/clientpositive/spark/vector_elt.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_elt.q.out
@@ -1,29 +1,79 @@
-PREHOOK: query: EXPLAIN SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint)
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint)
FROM alltypesorc
WHERE ctinyint > 0 LIMIT 10
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint)
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint)
FROM alltypesorc
WHERE ctinyint > 0 LIMIT 10
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
+ Stage: Stage-1
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
+ Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterLongColGreaterLongScalar(col 0, val 0) -> boolean
+ predicate: (ctinyint > 0) (type: boolean)
+ Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: ((UDFToInteger(ctinyint) % 2) + 1) (type: int), cstring1 (type: string), cint (type: int), elt(((UDFToInteger(ctinyint) % 2) + 1), cstring1, cint) (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [13, 6, 2, 16]
+ selectExpressions: LongColAddLongScalar(col 12, val 1)(children: LongColModuloLongScalar(col 0, val 2)(children: col 0) -> 12:long) -> 13:long, VectorElt(columns [14, 6, 15])(children: LongColAddLongScalar(col 12, val 1)(children: LongColModuloLongScalar(col 0, val 2)(children: col 0) -> 12:long) -> 14:long, col 6, CastLongToString(col 2) -> 15:String) -> 16:string
+ Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Limit Vectorization:
+ className: VectorLimitOperator
+ native: true
+ Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+
Stage: Stage-0
Fetch Operator
limit: 10
Processor Tree:
- TableScan
- alias: alltypesorc
- Filter Operator
- predicate: (ctinyint > 0) (type: boolean)
- Select Operator
- expressions: ((UDFToInteger(ctinyint) % 2) + 1) (type: int), cstring1 (type: string), cint (type: int), elt(((UDFToInteger(ctinyint) % 2) + 1), cstring1, cint) (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
- Limit
- Number of rows: 10
- ListSink
+ ListSink
PREHOOK: query: SELECT (ctinyint % 2) + 1, cstring1, cint, elt((ctinyint % 2) + 1, cstring1, cint)
FROM alltypesorc
@@ -47,7 +97,7 @@ POSTHOOK: Input: default@alltypesorc
1 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p
2 cvLH6Eat2yFsyy7p 528534767 528534767
1 cvLH6Eat2yFsyy7p 528534767 cvLH6Eat2yFsyy7p
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT elt(2, 'abc', 'defg'),
elt(3, 'aa', 'bb', 'cc', 'dd', 'ee', 'ff', 'gg'),
elt('1', 'abc', 'defg'),
@@ -60,7 +110,7 @@ SELECT elt(2, 'abc', 'defg'),
elt(3, 'abc', 'defg')
FROM alltypesorc LIMIT 1
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT elt(2, 'abc', 'defg'),
elt(3, 'aa', 'bb', 'cc', 'dd', 'ee', 'ff', 'gg'),
elt('1', 'abc', 'defg'),
@@ -73,22 +123,67 @@ SELECT elt(2, 'abc', 'defg'),
elt(3, 'abc', 'defg')
FROM alltypesorc LIMIT 1
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
+ Stage: Stage-1
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc
+ Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
+ Select Operator
+ expressions: 'defg' (type: string), 'cc' (type: string), 'abc' (type: string), '2' (type: string), '12345' (type: string), '123456789012' (type: string), '1.25' (type: string), '16.0' (type: string), null (type: string), null (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
+ selectExpressions: ConstantVectorExpression(val defg) -> 12:string, ConstantVectorExpression(val cc) -> 13:string, ConstantVectorExpression(val abc) -> 14:string, ConstantVectorExpression(val 2) -> 15:string, ConstantVectorExpression(val 12345) -> 16:string, ConstantVectorExpression(val 123456789012) -> 17:string, ConstantVectorExpression(val 1.25) -> 18:string, ConstantVectorExpression(val 16.0) -> 19:string, ConstantVectorExpression(val null) -> 20:string, ConstantVectorExpression(val null) -> 21:string
+ Statistics: Num rows: 12288 Data size: 8687784 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 1
+ Limit Vectorization:
+ className: VectorLimitOperator
+ native: true
+ Statistics: Num rows: 1 Data size: 875 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 1 Data size: 875 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+
Stage: Stage-0
Fetch Operator
limit: 1
Processor Tree:
- TableScan
- alias: alltypesorc
- Select Operator
- expressions: 'defg' (type: string), 'cc' (type: string), 'abc' (type: string), '2' (type: string), '12345' (type: string), '123456789012' (type: string), '1.25' (type: string), '16.0' (type: string), null (type: string), null (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
- Limit
- Number of rows: 1
- ListSink
+ ListSink
PREHOOK: query: SELECT elt(2, 'abc', 'defg'),
elt(3, 'aa', 'bb', 'cc', 'dd', 'ee', 'ff', 'gg'),
http://git-wip-us.apache.org/repos/asf/hive/blob/16d28b34/ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out b/ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out
index e13c311..a99dfd8 100644
--- a/ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out
@@ -105,12 +105,16 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem
POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ]
POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ]
POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ]
-PREHOOK: query: explain
+PREHOOK: query: explain vectorization expression
select s, t, max(b) from vectortab2korc group by s, t
PREHOOK: type: QUERY
-POSTHOOK: query: explain
+POSTHOOK: query: explain vectorization expression
select s, t, max(b) from vectortab2korc group by s, t
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -127,12 +131,26 @@ STAGE PLANS:
TableScan
alias: vectortab2korc
Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
Select Operator
expressions: t (type: tinyint), s (type: string), b (type: bigint)
outputColumnNames: t, s, b
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 8, 3]
Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: max(b)
+ Group By Vectorization:
+ aggregators: VectorUDAFMaxLong(col 3) -> bigint
+ className: VectorGroupByOperator
+ vectorOutput: true
+ keyExpressions: col 0, col 8
+ native: false
+ projectedOutputColumns: [0]
keys: t (type: tinyint), s (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
@@ -141,14 +159,41 @@ STAGE PLANS:
key expressions: _col0 (type: tinyint), _col1 (type: string)
sort order: ++
Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: string)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: Uniform Hash IS false
Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE
value expressions: _col2 (type: bigint)
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Group By Operator
aggregations: max(VALUE._col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFMaxLong(col 2) -> bigint
+ className: VectorGroupByOperator
+ vectorOutput: true
+ keyExpressions: col 0, col 1
+ native: false
+ projectedOutputColumns: [0]
keys: KEY._col0 (type: tinyint), KEY._col1 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
@@ -156,9 +201,16 @@ STAGE PLANS:
Select Operator
expressions: _col1 (type: string), _col0 (type: tinyint), _col2 (type: bigint)
outputColumnNames: _col0, _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [1, 0, 2]
Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat