You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2017/02/03 20:03:33 UTC
[08/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN
display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)
http://git-wip-us.apache.org/repos/asf/hive/blob/fcb57100/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out
index 908e8ab..fe257aa 100644
--- a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out
@@ -1,4 +1,5 @@
-PREHOOK: query: EXPLAIN SELECT AVG(cint),
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+SELECT AVG(cint),
(AVG(cint) + -3728),
(-((AVG(cint) + -3728))),
(-((-((AVG(cint) + -3728))))),
@@ -34,7 +35,8 @@ WHERE ((762 = cbigint)
AND ((79.553 != cint)
AND (cboolean2 != cboolean1)))))
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT AVG(cint),
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+SELECT AVG(cint),
(AVG(cint) + -3728),
(-((AVG(cint) + -3728))),
(-((-((AVG(cint) + -3728))))),
@@ -70,6 +72,10 @@ WHERE ((762 = cbigint)
AND ((79.553 != cint)
AND (cboolean2 != cboolean1)))))
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -87,15 +93,33 @@ STAGE PLANS:
TableScan
alias: alltypesorc
Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterExprOrExpr(children: FilterLongScalarEqualLongColumn(val 762, col 3) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessDoubleColumn(col 12, col 4)(children: CastLongToFloatViaLongToDouble(col 1) -> 12:double) -> boolean, FilterDoubleColGreaterDoubleScalar(col 12, val -5.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDoubleColNotEqualDoubleColumn(col 5, col 12)(children: CastLongToDouble(col 2) -> 12:double) -> boolean) -> boolean, FilterStringGroupColEqualStringScalar(col 6, val a) -> boolean, FilterExprAndExpr(children: FilterDecimalColLessEqualDecimalScalar(col 13, val -1.389)(children: CastLongToDecimal(col 3) -> 13:decimal(22,3)) -> boolean, FilterStringGroupColNotEqualStringScalar(col 7, val a) -> boolean, FilterDecimalScalarNotEqualDecimalColumn(val 79.553, col 14)(children: CastLongToDecimal(col 2) -> 14:decimal(13,3)) -> boolean, FilterLongColNotEqualLongColumn(col 11, col 10) -> boolean)
-> boolean) -> boolean
predicate: ((762 = cbigint) or ((UDFToFloat(csmallint) < cfloat) and (UDFToDouble(ctimestamp2) > -5.0) and (cdouble <> UDFToDouble(cint))) or (cstring1 = 'a') or ((CAST( cbigint AS decimal(22,3)) <= -1.389) and (cstring2 <> 'a') and (79.553 <> CAST( cint AS decimal(13,3))) and (cboolean2 <> cboolean1))) (type: boolean)
Statistics: Num rows: 5466 Data size: 1157380 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: cint (type: int), cdouble (type: double), csmallint (type: smallint), cfloat (type: float), ctinyint (type: tinyint)
outputColumnNames: cint, cdouble, csmallint, cfloat, ctinyint
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [2, 5, 1, 4, 0]
Statistics: Num rows: 5466 Data size: 1157380 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: avg(cint), sum(cdouble), stddev_pop(cint), stddev_samp(csmallint), var_samp(cint), avg(cfloat), stddev_samp(cint), min(ctinyint), count(csmallint)
+ Group By Vectorization:
+ aggregators: VectorUDAFAvgLong(col 2) -> struct<count:bigint,sum:double>, VectorUDAFSumDouble(col 5) -> double, VectorUDAFStdPopLong(col 2) -> struct<count:bigint,sum:double,variance:double>, VectorUDAFStdSampLong(col 1) -> struct<count:bigint,sum:double,variance:double>, VectorUDAFVarSampLong(col 2) -> struct<count:bigint,sum:double,variance:double>, VectorUDAFAvgDouble(col 4) -> struct<count:bigint,sum:double>, VectorUDAFStdSampLong(col 2) -> struct<count:bigint,sum:double,variance:double>, VectorUDAFMinLong(col 0) -> tinyint, VectorUDAFCount(col 1) -> bigint
+ className: VectorGroupByOperator
+ vectorOutput: false
+ native: false
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
+ vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(col 2) -> struct<count:bigint,sum:double> output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 2) -> struct<count:bigint,sum:double,variance:double> output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampLong(col 1) -> struct<count:bigint,sum:double,variance:double> output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampLong(col 2) -> struct<count:bigint,sum:double,variance:double> output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDouble(col 4) -> struct<count:bigint,sum:double> output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampLong(col 2) -> struct<count:bigint,sum:double,variance:double> output type STRUCT requires PRIMITIVE IS false
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE
@@ -105,8 +129,21 @@ STAGE PLANS:
value expressions: _col0 (type: struct<count:bigint,sum:double,input:int>), _col1 (type: double), _col2 (type: struct<count:bigint,sum:double,variance:double>), _col3 (type: struct<count:bigint,sum:double,variance:double>), _col4 (type: struct<count:bigint,sum:double,variance:double>), _col5 (type: struct<count:bigint,sum:double,input:float>), _col6 (type: struct<count:bigint,sum:double,variance:double>), _col7 (type: tinyint), _col8 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: false
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct<count:bigint,sum:double,input:int> of Column[VALUE._col0] not supported
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: avg(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_samp(VALUE._col3), var_samp(VALUE._col4), avg(VALUE._col5), stddev_samp(VALUE._col6), min(VALUE._col7), count(VALUE._col8)
@@ -208,7 +245,8 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@alltypesorc
#### A masked pattern was here ####
1.6000018929276082E8 1.5999646129276082E8 -1.5999646129276082E8 1.5999646129276082E8 2.5598867626205912E16 -8706342.964000002 -1.6000018929276082E8 5.481251832900256E8 4.095728233294762E24 8549.657499338187 -5.481251832900256E8 3.8812872199726474E8 2.12743126884874112E17 3.0054786945575034E17 -5.700752675298234 -3.0054786945575034E17 3.0054786945575034E17 973579.3664121237 5.48222463472403E8 -973579.3664121237 -18.377427808018613 -64 2044 -6.573680812059066E-5 18.377427808018613
-PREHOOK: query: EXPLAIN SELECT MAX(cint),
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+SELECT MAX(cint),
(MAX(cint) / -3728),
(MAX(cint) * -3728),
VAR_POP(cbigint),
@@ -241,7 +279,8 @@ WHERE (((cbigint <= 197)
OR ((cfloat > 79.553)
AND (cstring2 LIKE '10%')))
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT MAX(cint),
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+SELECT MAX(cint),
(MAX(cint) / -3728),
(MAX(cint) * -3728),
VAR_POP(cbigint),
@@ -274,6 +313,10 @@ WHERE (((cbigint <= 197)
OR ((cfloat > 79.553)
AND (cstring2 LIKE '10%')))
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -291,15 +334,33 @@ STAGE PLANS:
TableScan
alias: alltypesorc
Statistics: Num rows: 12288 Data size: 2036734 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongColLessEqualLongScalar(col 3, val 197) -> boolean, FilterLongColLessLongColumn(col 2, col 3)(children: col 2) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5, val -26.28) -> boolean, FilterDoubleColGreaterDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 1) -> 12:double) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 12, col 4)(children: CastLongToFloatViaLongToDouble(col 0) -> 12:double) -> boolean, FilterStringColRegExpStringScalar(col 6, pattern .*ss.*) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 4, val 79.5530014038086) -> boolean, FilterStringColLikeStringScalar(col 7, pattern 10%) -> boolean) -> boolean) -> boolean
predicate: (((cbigint <= 197) and (UDFToLong(cint) < cbigint)) or ((cdouble >= -26.28) and (UDFToDouble(csmallint) > cdouble)) or ((UDFToFloat(ctinyint) > cfloat) and cstring1 regexp '.*ss.*') or ((cfloat > 79.553) and (cstring2 like '10%'))) (type: boolean)
Statistics: Num rows: 6826 Data size: 1131534 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: cint (type: int), cbigint (type: bigint), csmallint (type: smallint), cdouble (type: double), ctinyint (type: tinyint)
outputColumnNames: cint, cbigint, csmallint, cdouble, ctinyint
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [2, 3, 1, 5, 0]
Statistics: Num rows: 6826 Data size: 1131534 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: max(cint), var_pop(cbigint), stddev_pop(csmallint), max(cdouble), avg(ctinyint), min(cint), min(cdouble), stddev_samp(csmallint), var_samp(cint)
+ Group By Vectorization:
+ aggregators: VectorUDAFMaxLong(col 2) -> int, VectorUDAFVarPopLong(col 3) -> struct<count:bigint,sum:double,variance:double>, VectorUDAFStdPopLong(col 1) -> struct<count:bigint,sum:double,variance:double>, VectorUDAFMaxDouble(col 5) -> double, VectorUDAFAvgLong(col 0) -> struct<count:bigint,sum:double>, VectorUDAFMinLong(col 2) -> int, VectorUDAFMinDouble(col 5) -> double, VectorUDAFStdSampLong(col 1) -> struct<count:bigint,sum:double,variance:double>, VectorUDAFVarSampLong(col 2) -> struct<count:bigint,sum:double,variance:double>
+ className: VectorGroupByOperator
+ vectorOutput: false
+ native: false
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8]
+ vectorOutputConditionsNotMet: Vector output of VectorUDAFVarPopLong(col 3) -> struct<count:bigint,sum:double,variance:double> output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 1) -> struct<count:bigint,sum:double,variance:double> output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgLong(col 0) -> struct<count:bigint,sum:double> output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampLong(col 1) -> struct<count:bigint,sum:double,variance:double> output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampLong(col 2) -> struct<count:bigint,sum:double,variance:double> output type STRUCT requires PRIMITIVE IS false
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Statistics: Num rows: 1 Data size: 420 Basic stats: COMPLETE Column stats: COMPLETE
@@ -309,8 +370,21 @@ STAGE PLANS:
value expressions: _col0 (type: int), _col1 (type: struct<count:bigint,sum:double,variance:double>), _col2 (type: struct<count:bigint,sum:double,variance:double>), _col3 (type: double), _col4 (type: struct<count:bigint,sum:double,input:tinyint>), _col5 (type: int), _col6 (type: double), _col7 (type: struct<count:bigint,sum:double,variance:double>), _col8 (type: struct<count:bigint,sum:double,variance:double>)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: false
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: Aggregation Function UDF var_pop parameter expression for GROUPBY operator: Data type struct<count:bigint,sum:double,variance:double> of Column[VALUE._col1] not supported
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: max(VALUE._col0), var_pop(VALUE._col1), stddev_pop(VALUE._col2), max(VALUE._col3), avg(VALUE._col4), min(VALUE._col5), min(VALUE._col6), stddev_samp(VALUE._col7), var_samp(VALUE._col8)
@@ -406,7 +480,8 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@alltypesorc
#### A masked pattern was here ####
-20301111 5445.576984978541 -1626869520 7.9684972882908944E16 1626869520 NULL -563 NULL NULL NULL -8.935323383084578 NULL -1069736047 NULL NULL NULL NULL NULL -5445.576984978541 511 5454.512308361625 1626869520 7.2647256545687792E16
-PREHOOK: query: EXPLAIN SELECT VAR_POP(cbigint),
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+SELECT VAR_POP(cbigint),
(-(VAR_POP(cbigint))),
(VAR_POP(cbigint) - (-(VAR_POP(cbigint)))),
COUNT(*),
@@ -438,7 +513,8 @@ WHERE ((ctimestamp1 = ctimestamp2)
AND ((ctimestamp2 IS NOT NULL)
AND (cstring2 > 'a'))))
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT VAR_POP(cbigint),
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+SELECT VAR_POP(cbigint),
(-(VAR_POP(cbigint))),
(VAR_POP(cbigint) - (-(VAR_POP(cbigint)))),
COUNT(*),
@@ -470,6 +546,10 @@ WHERE ((ctimestamp1 = ctimestamp2)
AND ((ctimestamp2 IS NOT NULL)
AND (cstring2 > 'a'))))
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -487,15 +567,33 @@ STAGE PLANS:
TableScan
alias: alltypesorc
Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterExprOrExpr(children: FilterTimestampColEqualTimestampColumn(col 8, col 9) -> boolean, FilterDoubleScalarEqualDoubleColumn(val 762.0, col 4) -> boolean, FilterStringGroupColEqualStringScalar(col 6, val ss) -> boolean, FilterExprAndExpr(children: FilterLongColLessEqualLongColumn(col 1, col 3)(children: col 1) -> boolean, FilterLongScalarEqualLongColumn(val 1, col 11) -> boolean) -> boolean, FilterExprAndExpr(children: SelectColumnIsNotNull(col 10) -> boolean, SelectColumnIsNotNull(col 9) -> boolean, FilterStringGroupColGreaterStringScalar(col 7, val a) -> boolean) -> boolean) -> boolean
predicate: ((ctimestamp1 = ctimestamp2) or (762 = cfloat) or (cstring1 = 'ss') or ((UDFToLong(csmallint) <= cbigint) and (1 = cboolean2)) or (cboolean1 is not null and ctimestamp2 is not null and (cstring2 > 'a'))) (type: boolean)
Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: cbigint (type: bigint), ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cdouble (type: double)
outputColumnNames: cbigint, ctinyint, csmallint, cint, cdouble
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [3, 0, 1, 2, 5]
Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: var_pop(cbigint), count(), max(ctinyint), stddev_pop(csmallint), max(cint), stddev_samp(cdouble), count(ctinyint), avg(ctinyint)
+ Group By Vectorization:
+ aggregators: VectorUDAFVarPopLong(col 3) -> struct<count:bigint,sum:double,variance:double>, VectorUDAFCountStar(*) -> bigint, VectorUDAFMaxLong(col 0) -> tinyint, VectorUDAFStdPopLong(col 1) -> struct<count:bigint,sum:double,variance:double>, VectorUDAFMaxLong(col 2) -> int, VectorUDAFStdSampDouble(col 5) -> struct<count:bigint,sum:double,variance:double>, VectorUDAFCount(col 0) -> bigint, VectorUDAFAvgLong(col 0) -> struct<count:bigint,sum:double>
+ className: VectorGroupByOperator
+ vectorOutput: false
+ native: false
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7]
+ vectorOutputConditionsNotMet: Vector output of VectorUDAFVarPopLong(col 3) -> struct<count:bigint,sum:double,variance:double> output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 1) -> struct<count:bigint,sum:double,variance:double> output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampDouble(col 5) -> struct<count:bigint,sum:double,variance:double> output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgLong(col 0) -> struct<count:bigint,sum:double> output type STRUCT requires PRIMITIVE IS false
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE
@@ -505,8 +603,21 @@ STAGE PLANS:
value expressions: _col0 (type: struct<count:bigint,sum:double,variance:double>), _col1 (type: bigint), _col2 (type: tinyint), _col3 (type: struct<count:bigint,sum:double,variance:double>), _col4 (type: int), _col5 (type: struct<count:bigint,sum:double,variance:double>), _col6 (type: bigint), _col7 (type: struct<count:bigint,sum:double,input:tinyint>)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: false
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: Aggregation Function UDF var_pop parameter expression for GROUPBY operator: Data type struct<count:bigint,sum:double,variance:double> of Column[VALUE._col0] not supported
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: var_pop(VALUE._col0), count(VALUE._col1), max(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), stddev_samp(VALUE._col5), count(VALUE._col6), avg(VALUE._col7)
@@ -601,7 +712,8 @@ POSTHOOK: Input: default@alltypesorc
#### A masked pattern was here ####
2.5109214708345636E18 -2.5109214708345636E18 5.0218429416691272E18 2780 75.198 62 2.5109214708345661E18 2.5109214708345636E18 -1.0 2780 -2780 9460.675803068349 -2.5109214708345636E18 -2118360 1072872630 -2118298 -2.5109214697616911E18 185935.34910862707 0 758 -1.733509234828496 -3728
WARNING: Comparing a bigint and a double may result in a loss of precision.
-PREHOOK: query: EXPLAIN SELECT AVG(ctinyint),
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+SELECT AVG(ctinyint),
(AVG(ctinyint) + 6981),
((AVG(ctinyint) + 6981) + AVG(ctinyint)),
MAX(cbigint),
@@ -623,7 +735,8 @@ WHERE (((ctimestamp2 <= ctimestamp1)
AND (ctimestamp1 >= 0))
OR (cfloat = 17))
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT AVG(ctinyint),
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+SELECT AVG(ctinyint),
(AVG(ctinyint) + 6981),
((AVG(ctinyint) + 6981) + AVG(ctinyint)),
MAX(cbigint),
@@ -645,6 +758,10 @@ WHERE (((ctimestamp2 <= ctimestamp1)
AND (ctimestamp1 >= 0))
OR (cfloat = 17))
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -662,15 +779,33 @@ STAGE PLANS:
TableScan
alias: alltypesorc
Statistics: Num rows: 12288 Data size: 2139070 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterTimestampColLessEqualTimestampColumn(col 9, col 8) -> boolean, FilterDoubleColNotEqualDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 3) -> 12:double) -> boolean, FilterStringScalarLessEqualStringGroupColumn(val ss, col 6) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongColLessLongColumn(col 1, col 0)(children: col 0) -> boolean, FilterDoubleColGreaterEqualDoubleScalar(col 12, val 0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean) -> boolean, FilterDoubleColEqualDoubleScalar(col 4, val 17.0) -> boolean) -> boolean
predicate: (((ctimestamp2 <= ctimestamp1) and (UDFToDouble(cbigint) <> cdouble) and ('ss' <= cstring1)) or ((csmallint < UDFToShort(ctinyint)) and (UDFToDouble(ctimestamp1) >= 0.0)) or (cfloat = 17)) (type: boolean)
Statistics: Num rows: 2835 Data size: 493648 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: ctinyint (type: tinyint), cbigint (type: bigint), cint (type: int), cfloat (type: float)
outputColumnNames: ctinyint, cbigint, cint, cfloat
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 3, 2, 4]
Statistics: Num rows: 2835 Data size: 493648 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: avg(ctinyint), max(cbigint), stddev_samp(cint), var_pop(cint), var_pop(cbigint), max(cfloat)
+ Group By Vectorization:
+ aggregators: VectorUDAFAvgLong(col 0) -> struct<count:bigint,sum:double>, VectorUDAFMaxLong(col 3) -> bigint, VectorUDAFStdSampLong(col 2) -> struct<count:bigint,sum:double,variance:double>, VectorUDAFVarPopLong(col 2) -> struct<count:bigint,sum:double,variance:double>, VectorUDAFVarPopLong(col 3) -> struct<count:bigint,sum:double,variance:double>, VectorUDAFMaxDouble(col 4) -> float
+ className: VectorGroupByOperator
+ vectorOutput: false
+ native: false
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5]
+ vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(col 0) -> struct<count:bigint,sum:double> output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampLong(col 2) -> struct<count:bigint,sum:double,variance:double> output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopLong(col 2) -> struct<count:bigint,sum:double,variance:double> output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopLong(col 3) -> struct<count:bigint,sum:double,variance:double> output type STRUCT requires PRIMITIVE IS false
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE
@@ -680,8 +815,21 @@ STAGE PLANS:
value expressions: _col0 (type: struct<count:bigint,sum:double,input:tinyint>), _col1 (type: bigint), _col2 (type: struct<count:bigint,sum:double,variance:double>), _col3 (type: struct<count:bigint,sum:double,variance:double>), _col4 (type: struct<count:bigint,sum:double,variance:double>), _col5 (type: float)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: false
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct<count:bigint,sum:double,input:tinyint> of Column[VALUE._col0] not supported
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: avg(VALUE._col0), max(VALUE._col1), stddev_samp(VALUE._col2), var_pop(VALUE._col3), var_pop(VALUE._col4), max(VALUE._col5)
@@ -756,7 +904,8 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@alltypesorc
#### A masked pattern was here ####
-0.5934409161894847 6980.406559083811 6979.813118167622 2141851355 -11761.597368421053 -6980.406559083811 1.5852855222071937E8 -0.5934409161894847 2.5099887741860852E16 1.52140608502098816E18 -2141851355 -13.510823917813237 79.553 -3.998255191435157E19
-PREHOOK: query: EXPLAIN SELECT cint,
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+SELECT cint,
cdouble,
ctimestamp2,
cstring1,
@@ -792,7 +941,8 @@ WHERE (((cstring1 RLIKE 'a.*')
ORDER BY cint, cdouble, ctimestamp2, cstring1, cboolean2, ctinyint, cfloat, ctimestamp1, csmallint, cbigint, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13
LIMIT 50
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT cint,
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+SELECT cint,
cdouble,
ctimestamp2,
cstring1,
@@ -828,6 +978,10 @@ WHERE (((cstring1 RLIKE 'a.*')
ORDER BY cint, cdouble, ctimestamp2, cstring1, cboolean2, ctinyint, cfloat, ctimestamp1, csmallint, cbigint, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13
LIMIT 50
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -845,32 +999,74 @@ STAGE PLANS:
TableScan
alias: alltypesorc
Statistics: Num rows: 12288 Data size: 3056470 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterStringColRegExpStringScalar(col 6, pattern a.*) -> boolean, FilterStringColLikeStringScalar(col 7, pattern %ss%) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongScalarNotEqualLongColumn(val 1, col 11) -> boolean, FilterDecimalColLessDecimalScalar(col 12, val 79.553)(children: CastLongToDecimal(col 1) -> 12:decimal(8,3)) -> boolean, FilterLongScalarNotEqualLongColumn(val -257, col 0)(children: col 0) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 5, col 13)(children: CastLongToDouble(col 0) -> 13:double) -> boolean, FilterDoubleColGreaterEqualDoubleColumn(col 4, col 13)(children: CastLongToFloatViaLongToDouble(col 2) -> 13:double) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongColLessLongColumn(col 2, col 3)(children: col 2) -> boolean, FilterLongColGreaterLongColumn(col 0, col 3)(children: col 0) -> boolean
) -> boolean) -> boolean
predicate: ((cstring1 regexp 'a.*' and (cstring2 like '%ss%')) or ((1 <> cboolean2) and (CAST( csmallint AS decimal(8,3)) < 79.553) and (-257 <> UDFToInteger(ctinyint))) or ((cdouble > UDFToDouble(ctinyint)) and (cfloat >= UDFToFloat(cint))) or ((UDFToLong(cint) < cbigint) and (UDFToLong(ctinyint) > cbigint))) (type: boolean)
Statistics: Num rows: 9898 Data size: 2462086 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: cint (type: int), cdouble (type: double), ctimestamp2 (type: timestamp), cstring1 (type: string), cboolean2 (type: boolean), ctinyint (type: tinyint), cfloat (type: float), ctimestamp1 (type: timestamp), csmallint (type: smallint), cbigint (type: bigint), (-3728 * cbigint) (type: bigint), (- cint) (type: int), (-863.257 - CAST( cint AS decimal(10,0))) (type: decimal(14,3)), (- csmallint) (type: smallint), (csmallint - (- csmallint)) (type: smallint), ((csmallint - (- csmallint)) + (- csmallint)) (type: smallint), (UDFToDouble(cint) / UDFToDouble(cint)) (type: double), ((-863.257 - CAST( cint AS decimal(10,0))) - -26.28) (type: decimal(15,3)), (- cfloat) (type: float), (cdouble * -89010.0) (type: double), (UDFToDouble(ctinyint) / 988888.0) (type: double), (- ctinyint) (type: tinyint), (79.553 / CAST( ctinyint AS decimal(3,0))) (type: decimal(9,7))
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [2, 5, 9, 6, 11, 0, 4, 8, 1, 3, 14, 15, 17, 18, 20, 22, 24, 26, 13, 23, 28, 19, 30]
+ selectExpressions: LongScalarMultiplyLongColumn(val -3728, col 3) -> 14:long, LongColUnaryMinus(col 2) -> 15:long, DecimalScalarSubtractDecimalColumn(val -863.257, col 16)(children: CastLongToDecimal(col 2) -> 16:decimal(10,0)) -> 17:decimal(14,3), LongColUnaryMinus(col 1) -> 18:long, LongColSubtractLongColumn(col 1, col 19)(children: LongColUnaryMinus(col 1) -> 19:long) -> 20:long, LongColAddLongColumn(col 21, col 19)(children: LongColSubtractLongColumn(col 1, col 19)(children: LongColUnaryMinus(col 1) -> 19:long) -> 21:long, LongColUnaryMinus(col 1) -> 19:long) -> 22:long, DoubleColDivideDoubleColumn(col 13, col 23)(children: CastLongToDouble(col 2) -> 13:double, CastLongToDouble(col 2) -> 23:double) -> 24:double, DecimalColSubtractDecimalScalar(col 25, val -26.28)(children: DecimalScalarSubtractDecimalColumn(val -863.257, col 16)(children: CastLongToDecimal(col 2) -> 16:decimal(10,0)) -> 25:decimal(14,3)) -> 26:decimal(15,3), DoubleColUnaryMinus(col 4) -
> 13:double, DoubleColMultiplyDoubleScalar(col 5, val -89010.0) -> 23:double, DoubleColDivideDoubleScalar(col 27, val 988888.0)(children: CastLongToDouble(col 0) -> 27:double) -> 28:double, LongColUnaryMinus(col 0) -> 19:long, DecimalScalarDivideDecimalColumn(val 79.553, col 29)(children: CastLongToDecimal(col 0) -> 29:decimal(3,0)) -> 30:decimal(9,7)
Statistics: Num rows: 9898 Data size: 5632662 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int), _col1 (type: double), _col2 (type: timestamp), _col3 (type: string), _col4 (type: boolean), _col5 (type: tinyint), _col6 (type: float), _col7 (type: timestamp), _col8 (type: smallint), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: int), _col12 (type: decimal(14,3)), _col13 (type: smallint), _col14 (type: smallint), _col15 (type: smallint), _col16 (type: double), _col17 (type: decimal(15,3)), _col18 (type: float), _col19 (type: double), _col20 (type: double), _col21 (type: tinyint), _col22 (type: decimal(9,7))
sort order: +++++++++++++++++++++++
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false
Statistics: Num rows: 9898 Data size: 5632662 Basic stats: COMPLETE Column stats: COMPLETE
TopN Hash Memory Usage: 0.1
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: boolean), KEY.reducesinkkey5 (type: tinyint), KEY.reducesinkkey6 (type: float), KEY.reducesinkkey7 (type: timestamp), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey9 (type: bigint), KEY.reducesinkkey10 (type: bigint), KEY.reducesinkkey11 (type: int), KEY.reducesinkkey12 (type: decimal(14,3)), KEY.reducesinkkey13 (type: smallint), KEY.reducesinkkey14 (type: smallint), KEY.reducesinkkey15 (type: smallint), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: decimal(15,3)), KEY.reducesinkkey18 (type: float), KEY.reducesinkkey19 (type: double), KEY.reducesinkkey20 (type: double), KEY.reducesinkkey21 (type: tinyint), KEY.reducesinkkey22 (type: decimal(9,7))
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]
Statistics: Num rows: 9898 Data size: 5632662 Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 50
+ Limit Vectorization:
+ className: VectorLimitOperator
+ native: true
Statistics: Num rows: 50 Data size: 28540 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 50 Data size: 28540 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -1009,7 +1205,8 @@ NULL -7196.0 1969-12-31 15:59:58.174 NULL false -64 -64.0 1969-12-31 15:59:56.04
NULL -7196.0 1969-12-31 15:59:58.174 NULL false -64 -64.0 1969-12-31 16:00:01.785 -7196 -1639157869 6110780535632 NULL NULL 7196 -14392 -7196 NULL NULL 64.0 6.4051596E8 -6.471915929812072E-5 64 -1.2430156
NULL -7196.0 1969-12-31 15:59:58.174 NULL false -64 -64.0 1969-12-31 16:00:11.912 -7196 -1615920595 6024151978160 NULL NULL 7196 -14392 -7196 NULL NULL 64.0 6.4051596E8 -6.471915929812072E-5 64 -1.2430156
NULL -7196.0 1969-12-31 15:59:58.174 NULL false -64 -64.0 1969-12-31 16:00:12.339 -7196 1805860756 -6732248898368 NULL NULL 7196 -14392 -7196 NULL NULL 64.0 6.4051596E8 -6.471915929812072E-5 64 -1.2430156
-PREHOOK: query: EXPLAIN SELECT cint,
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+SELECT cint,
cbigint,
cstring1,
cboolean1,
@@ -1044,7 +1241,8 @@ WHERE (((197 > ctinyint)
ORDER BY cint, cbigint, cstring1, cboolean1, cfloat, cdouble, ctimestamp2, csmallint, cstring2, cboolean2, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15
LIMIT 25
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT cint,
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+SELECT cint,
cbigint,
cstring1,
cboolean1,
@@ -1079,6 +1277,10 @@ WHERE (((197 > ctinyint)
ORDER BY cint, cbigint, cstring1, cboolean1, cfloat, cdouble, ctimestamp2, csmallint, cstring2, cboolean2, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15
LIMIT 25
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -1096,32 +1298,74 @@ STAGE PLANS:
TableScan
alias: alltypesorc
Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongScalarGreaterLongColumn(val 197, col 0)(children: col 0) -> boolean, FilterLongColEqualLongColumn(col 2, col 3)(children: col 2) -> boolean) -> boolean, FilterLongColEqualLongScalar(col 3, val 359) -> boolean, FilterLongColLessLongScalar(col 10, val 0) -> boolean, FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 6, pattern %ss) -> boolean, FilterDoubleColLessEqualDoubleColumn(col 4, col 12)(children: CastLongToFloatViaLongToDouble(col 0) -> 12:double) -> boolean) -> boolean) -> boolean
predicate: (((197 > UDFToInteger(ctinyint)) and (UDFToLong(cint) = cbigint)) or (cbigint = 359) or (cboolean1 < 0) or ((cstring1 like '%ss') and (cfloat <= UDFToFloat(ctinyint)))) (type: boolean)
Statistics: Num rows: 8195 Data size: 1735170 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: cint (type: int), cbigint (type: bigint), cstring1 (type: string), cboolean1 (type: boolean), cfloat (type: float), cdouble (type: double), ctimestamp2 (type: timestamp), csmallint (type: smallint), cstring2 (type: string), cboolean2 (type: boolean), (UDFToDouble(cint) / UDFToDouble(cbigint)) (type: double), (CAST( cbigint AS decimal(19,0)) % 79.553) (type: decimal(5,3)), (- (UDFToDouble(cint) / UDFToDouble(cbigint))) (type: double), (10.175 % cfloat) (type: float), (- cfloat) (type: float), (cfloat - (- cfloat)) (type: float), ((cfloat - (- cfloat)) % -6432.0) (type: float), (cdouble * UDFToDouble(csmallint)) (type: double), (- cdouble) (type: double), (- cbigint) (type: bigint), (UDFToDouble(cfloat) - (UDFToDouble(cint) / UDFToDouble(cbigint))) (type: double), (- csmallint) (type: smallint), (3569 % cbigint) (type: bigint), (359.0 - cdouble) (type: double), (- csmallint) (type: smallint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [2, 3, 6, 10, 4, 5, 9, 1, 7, 11, 14, 16, 12, 13, 17, 19, 18, 21, 20, 22, 23, 26, 27, 24, 28]
+ selectExpressions: DoubleColDivideDoubleColumn(col 12, col 13)(children: CastLongToDouble(col 2) -> 12:double, CastLongToDouble(col 3) -> 13:double) -> 14:double, DecimalColModuloDecimalScalar(col 15, val 79.553)(children: CastLongToDecimal(col 3) -> 15:decimal(19,0)) -> 16:decimal(5,3), DoubleColUnaryMinus(col 17)(children: DoubleColDivideDoubleColumn(col 12, col 13)(children: CastLongToDouble(col 2) -> 12:double, CastLongToDouble(col 3) -> 13:double) -> 17:double) -> 12:double, DoubleScalarModuloDoubleColumn(val 10.175000190734863, col 4) -> 13:double, DoubleColUnaryMinus(col 4) -> 17:double, DoubleColSubtractDoubleColumn(col 4, col 18)(children: DoubleColUnaryMinus(col 4) -> 18:double) -> 19:double, DoubleColModuloDoubleScalar(col 20, val -6432.0)(children: DoubleColSubtractDoubleColumn(col 4, col 18)(children: DoubleColUnaryMinus(col 4) -> 18:double) -> 20:double) -> 18:double, DoubleColMultiplyDoubleColumn(col 5, col 20)(children: CastLongToDouble(col
1) -> 20:double) -> 21:double, DoubleColUnaryMinus(col 5) -> 20:double, LongColUnaryMinus(col 3) -> 22:long, DoubleColSubtractDoubleColumn(col 4, col 25)(children: col 4, DoubleColDivideDoubleColumn(col 23, col 24)(children: CastLongToDouble(col 2) -> 23:double, CastLongToDouble(col 3) -> 24:double) -> 25:double) -> 23:double, LongColUnaryMinus(col 1) -> 26:long, LongScalarModuloLongColumn(val 3569, col 3) -> 27:long, DoubleScalarSubtractDoubleColumn(val 359.0, col 5) -> 24:double, LongColUnaryMinus(col 1) -> 28:long
Statistics: Num rows: 8195 Data size: 3349694 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: string), _col3 (type: boolean), _col4 (type: float), _col5 (type: double), _col6 (type: timestamp), _col7 (type: smallint), _col8 (type: string), _col9 (type: boolean), _col10 (type: double), _col11 (type: decimal(5,3)), _col12 (type: double), _col13 (type: float), _col14 (type: float), _col15 (type: float), _col16 (type: float), _col17 (type: double), _col18 (type: double), _col19 (type: bigint), _col20 (type: double), _col21 (type: smallint), _col22 (type: bigint), _col23 (type: double), _col24 (type: smallint)
sort order: +++++++++++++++++++++++++
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false
Statistics: Num rows: 8195 Data size: 3349694 Basic stats: COMPLETE Column stats: COMPLETE
TopN Hash Memory Usage: 0.1
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: boolean), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: timestamp), KEY.reducesinkkey7 (type: smallint), KEY.reducesinkkey8 (type: string), KEY.reducesinkkey9 (type: boolean), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: decimal(5,3)), KEY.reducesinkkey12 (type: double), KEY.reducesinkkey13 (type: float), KEY.reducesinkkey14 (type: float), KEY.reducesinkkey15 (type: float), KEY.reducesinkkey16 (type: float), KEY.reducesinkkey17 (type: double), KEY.reducesinkkey18 (type: double), KEY.reducesinkkey19 (type: bigint), KEY.reducesinkkey20 (type: double), KEY.reducesinkkey21 (type: smallint), KEY.reducesinkkey22 (type: bigint), KEY.reducesinkkey23 (type: double), KEY.reducesinkkey21 (type: smallint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 21]
Statistics: Num rows: 8195 Data size: 3349694 Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 25
+ Limit Vectorization:
+ className: VectorLimitOperator
+ native: true
Statistics: Num rows: 25 Data size: 10520 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 25 Data size: 10520 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -1211,7 +1455,8 @@ POSTHOOK: Input: default@alltypesorc
-462839731 988888 ss false -51.0 NULL NULL NULL Lml5J2QBU77 false -468.04059812638036 44.210 468.04059812638036 10.175 51.0 -102.0 -102.0 NULL NULL -988888 417.04059812638036 NULL 3569 NULL NULL
-635141101 -89010 ss false -51.0 NULL NULL NULL rVWAj4N1MCg8Scyp7wj2C true 7135.6151106617235 -69.746 -7135.6151106617235 10.175 51.0 -102.0 -102.0 NULL NULL 89010 -7186.6151106617235 NULL 3569 NULL NULL
WARNING: Comparing a bigint and a double may result in a loss of precision.
-PREHOOK: query: EXPLAIN SELECT cint,
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+SELECT cint,
cstring1,
cboolean2,
ctimestamp2,
@@ -1245,7 +1490,8 @@ WHERE (((csmallint > -26.28)
ORDER BY cboolean1, cstring1, ctimestamp2, cfloat, cbigint, cstring1, cdouble, cint, csmallint, cdouble, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13
LIMIT 75
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT cint,
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+SELECT cint,
cstring1,
cboolean2,
ctimestamp2,
@@ -1279,6 +1525,10 @@ WHERE (((csmallint > -26.28)
ORDER BY cboolean1, cstring1, ctimestamp2, cfloat, cbigint, cstring1, cdouble, cint, csmallint, cdouble, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13
LIMIT 75
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -1296,33 +1546,75 @@ STAGE PLANS:
TableScan
alias: alltypesorc
Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDecimalColGreaterDecimalScalar(col 12, val -26.28)(children: CastLongToDecimal(col 1) -> 12:decimal(7,2)) -> boolean, FilterStringColLikeStringScalar(col 7, pattern ss) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 5, col 13)(children: CastLongToDouble(col 3) -> 13:double) -> boolean, FilterStringGroupColGreaterEqualStringScalar(col 6, val ss) -> boolean, FilterDoubleColNotEqualDoubleColumn(col 13, col 5)(children: CastLongToDouble(col 2) -> 13:double) -> boolean) -> boolean, FilterLongColEqualLongScalar(col 0, val -89010)(children: col 0) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 13, col 4)(children: CastLongToFloatViaLongToDouble(col 3) -> 13:double) -> boolean, FilterDecimalScalarLessEqualDecimalColumn(val -26.28, col 12)(children: CastLongToDecimal(col 1) -> 12:decimal(7,2)) -> boolean) ->
boolean) -> boolean
predicate: (((CAST( csmallint AS decimal(7,2)) > -26.28) and (cstring2 like 'ss')) or ((cdouble <= UDFToDouble(cbigint)) and (cstring1 >= 'ss') and (UDFToDouble(cint) <> cdouble)) or (UDFToInteger(ctinyint) = -89010) or ((UDFToFloat(cbigint) <= cfloat) and (-26.28 <= CAST( csmallint AS decimal(7,2))))) (type: boolean)
Statistics: Num rows: 10922 Data size: 2312410 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: cint (type: int), cstring1 (type: string), cboolean2 (type: boolean), ctimestamp2 (type: timestamp), cdouble (type: double), cfloat (type: float), cbigint (type: bigint), csmallint (type: smallint), cboolean1 (type: boolean), (cint + UDFToInteger(csmallint)) (type: int), (cbigint - UDFToLong(ctinyint)) (type: bigint), (- cbigint) (type: bigint), (- cfloat) (type: float), ((cbigint - UDFToLong(ctinyint)) + cbigint) (type: bigint), (cdouble / cdouble) (type: double), (- cdouble) (type: double), (UDFToLong((cint + UDFToInteger(csmallint))) * (- cbigint)) (type: bigint), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (-1.389 / CAST( ctinyint AS decimal(3,0))) (type: decimal(8,7)), (UDFToDouble(cbigint) % cdouble) (type: double), (- csmallint) (type: smallint), (UDFToInteger(csmallint) + (cint + UDFToInteger(csmallint))) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [2, 6, 11, 9, 5, 4, 3, 1, 10, 14, 15, 16, 13, 18, 19, 20, 22, 25, 27, 24, 17, 28]
+ selectExpressions: LongColAddLongColumn(col 2, col 1)(children: col 1) -> 14:long, LongColSubtractLongColumn(col 3, col 0)(children: col 0) -> 15:long, LongColUnaryMinus(col 3) -> 16:long, DoubleColUnaryMinus(col 4) -> 13:double, LongColAddLongColumn(col 17, col 3)(children: LongColSubtractLongColumn(col 3, col 0)(children: col 0) -> 17:long) -> 18:long, DoubleColDivideDoubleColumn(col 5, col 5) -> 19:double, DoubleColUnaryMinus(col 5) -> 20:double, LongColMultiplyLongColumn(col 17, col 21)(children: col 17, LongColUnaryMinus(col 3) -> 21:long) -> 22:long, DoubleColAddDoubleColumn(col 23, col 24)(children: DoubleColUnaryMinus(col 5) -> 23:double, CastLongToDouble(col 3) -> 24:double) -> 25:double, DecimalScalarDivideDecimalColumn(val -1.389, col 26)(children: CastLongToDecimal(col 0) -> 26:decimal(3,0)) -> 27:decimal(8,7), DoubleColModuloDoubleColumn(col 23, col 5)(children: CastLongToDouble(col 3) -> 23:double) -> 24:double, LongColUnaryMinus(col 1) -> 17:
long, LongColAddLongColumn(col 1, col 21)(children: col 1, LongColAddLongColumn(col 2, col 1)(children: col 1) -> 21:long) -> 28:long
Statistics: Num rows: 10922 Data size: 3594034 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col8 (type: boolean), _col1 (type: string), _col3 (type: timestamp), _col5 (type: float), _col6 (type: bigint), _col1 (type: string), _col4 (type: double), _col0 (type: int), _col7 (type: smallint), _col4 (type: double), _col9 (type: int), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: float), _col13 (type: bigint), _col14 (type: double), _col15 (type: double), _col16 (type: bigint), _col17 (type: double), _col18 (type: decimal(8,7)), _col19 (type: double), _col20 (type: smallint), _col21 (type: int)
sort order: +++++++++++++++++++++++
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false
Statistics: Num rows: 10922 Data size: 3594034 Basic stats: COMPLETE Column stats: COMPLETE
TopN Hash Memory Usage: 0.1
value expressions: _col2 (type: boolean)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey7 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: boolean), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: bigint), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey10 (type: int), KEY.reducesinkkey11 (type: bigint), KEY.reducesinkkey12 (type: bigint), KEY.reducesinkkey13 (type: float), KEY.reducesinkkey14 (type: bigint), KEY.reducesinkkey15 (type: double), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: bigint), KEY.reducesinkkey18 (type: double), KEY.reducesinkkey19 (type: decimal(8,7)), KEY.reducesinkkey20 (type: double), KEY.reducesinkkey21 (type: smallint), KEY.reducesinkkey22 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [7, 1, 23, 2, 6, 3, 4, 8, 0, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]
Statistics: Num rows: 10922 Data size: 3594034 Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 75
+ Limit Vectorization:
+ className: VectorLimitOperator
+ native: true
Statistics: Num rows: 75 Data size: 24810 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 75 Data size: 24810 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -1483,7 +1775,8 @@ NULL NULL true 1969-12-31 15:59:58.456 15601.0 -62.0 667693308 15601 NULL NULL 6
NULL NULL true 1969-12-31 15:59:58.456 15601.0 -63.0 -200542601 15601 NULL NULL -200542538 200542601 63.0 -401085139 1.0 -15601.0 NULL -2.00558202E8 0.0220476 -7347.0 -15601 NULL
NULL NULL true 1969-12-31 15:59:58.456 15601.0 -63.0 -721244708 15601 NULL NULL -721244645 721244708 63.0 -1442489353 1.0 -15601.0 NULL -7.21260309E8 0.0220476 -10478.0 -15601 NULL
NULL NULL true 1969-12-31 15:59:58.456 15601.0 -64.0 -1809291815 15601 NULL NULL -1809291751 1809291815 64.0 -3618583566 1.0 -15601.0 NULL -1.809307416E9 0.0217031 -12643.0 -15601 NULL
-PREHOOK: query: EXPLAIN SELECT ctimestamp1,
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+SELECT ctimestamp1,
cstring2,
cdouble,
cfloat,
@@ -1510,7 +1803,8 @@ WHERE (((-1.389 >= cint)
ORDER BY csmallint, cstring2, cdouble, cfloat, cbigint, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10
LIMIT 45
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT ctimestamp1,
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+SELECT ctimestamp1,
cstring2,
cdouble,
cfloat,
@@ -1537,6 +1831,10 @@ WHERE (((-1.389 >= cint)
ORDER BY csmallint, cstring2, cdouble, cfloat, cbigint, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10
LIMIT 45
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -1554,33 +1852,75 @@ STAGE PLANS:
TableScan
alias: alltypesorc
Statistics: Num rows: 12288 Data size: 2528254 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDecimalScalarGreaterEqualDecimalColumn(val -1.389, col 12)(children: CastLongToDecimal(col 2) -> 12:decimal(13,3)) -> boolean, FilterLongColLessLongColumn(col 1, col 0)(children: col 0) -> boolean, FilterLongScalarGreaterLongColumn(val -6432, col 1)(children: col 1) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleColumn(col 5, col 4)(children: col 4) -> boolean, FilterStringGroupColLessEqualStringScalar(col 7, val a) -> boolean) -> boolean, FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 6, pattern ss%) -> boolean, FilterDecimalScalarGreaterDecimalColumn(val 10.175, col 13)(children: CastLongToDecimal(col 3) -> 13:decimal(22,3)) -> boolean) -> boolean) -> boolean
predicate: (((-1.389 >= CAST( cint AS decimal(13,3))) and (csmallint < UDFToShort(ctinyint)) and (-6432 > UDFToInteger(csmallint))) or ((cdouble >= UDFToDouble(cfloat)) and (cstring2 <= 'a')) or ((cstring1 like 'ss%') and (10.175 > CAST( cbigint AS decimal(22,3))))) (type: boolean)
Statistics: Num rows: 3868 Data size: 795962 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: ctimestamp1 (type: timestamp), cstring2 (type: string), cdouble (type: double), cfloat (type: float), cbigint (type: bigint), csmallint (type: smallint), (UDFToDouble(cbigint) / 3569.0) (type: double), (-257 - UDFToInteger(csmallint)) (type: int), (-6432.0 * cfloat) (type: float), (- cdouble) (type: double), (cdouble * 10.175) (type: double), (UDFToDouble((-6432.0 * cfloat)) / UDFToDouble(cfloat)) (type: double), (- cfloat) (type: float), (cint % UDFToInteger(csmallint)) (type: int), (- cdouble) (type: double), (cdouble * (- cdouble)) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [8, 7, 5, 4, 3, 1, 15, 16, 14, 17, 18, 20, 19, 21, 22, 24]
+ selectExpressions: DoubleColDivideDoubleScalar(col 14, val 3569.0)(children: CastLongToDouble(col 3) -> 14:double) -> 15:double, LongScalarSubtractLongColumn(val -257, col 1)(children: col 1) -> 16:long, DoubleScalarMultiplyDoubleColumn(val -6432.0, col 4) -> 14:double, DoubleColUnaryMinus(col 5) -> 17:double, DoubleColMultiplyDoubleScalar(col 5, val 10.175) -> 18:double, DoubleColDivideDoubleColumn(col 19, col 4)(children: col 19, col 4) -> 20:double, DoubleColUnaryMinus(col 4) -> 19:double, LongColModuloLongColumn(col 2, col 1)(children: col 1) -> 21:long, DoubleColUnaryMinus(col 5) -> 22:double, DoubleColMultiplyDoubleColumn(col 5, col 23)(children: DoubleColUnaryMinus(col 5) -> 23:double) -> 24:double
Statistics: Num rows: 3868 Data size: 748844 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col5 (type: smallint), _col1 (type: string), _col2 (type: double), _col3 (type: float), _col4 (type: bigint), _col6 (type: double), _col7 (type: int), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: float), _col13 (type: int), _col14 (type: double), _col15 (type: double)
sort order: +++++++++++++++
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false
Statistics: Num rows: 3868 Data size: 748844 Basic stats: COMPLETE Column stats: COMPLETE
TopN Hash Memory Usage: 0.1
value expressions: _col0 (type: timestamp)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: VALUE._col0 (type: timestamp), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: bigint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: int), KEY.reducesinkkey7 (type: float), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: int), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey14 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [15, 1, 2, 3, 4, 0, 5, 6, 7, 8, 9, 10, 11, 12, 8, 14]
Statistics: Num rows: 3868 Data size: 748844 Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 45
+ Limit Vectorization:
+ className: VectorLimitOperator
+ native: true
Statistics: Num rows: 45 Data size: 8880 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 45 Data size: 8880 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -1696,7 +2036,8 @@ POSTHOOK: Input: default@alltypesorc
NULL 4hA4KQj2vD3fI6gX82220d 12329.0 NULL -1887561756 12329 -528876.9279910339 -12586 NULL -12329.0 125447.57500000001 NULL NULL -3104 -12329.0 -1.52004241E8
NULL 4hA4KQj2vD3fI6gX82220d 477.0 NULL -1887561756 477 -528876.9279910339 -734 NULL -477.0 4853.475 NULL NULL -326 -477.0 -227529.0
NULL xH7445Rals48VOulSyR5F 10221.0 NULL -1645852809 10221 -461152.37013168953 -10478 NULL -10221.0 103998.675 NULL NULL 5022 -10221.0 -1.04468841E8
-PREHOOK: query: EXPLAIN SELECT csmallint,
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+SELECT csmallint,
(csmallint % -75) as c1,
STDDEV_SAMP(csmallint) as c2,
(-1.389 / csmallint) as c3,
@@ -1716,7 +2057,8 @@ GROUP BY csmallint
ORDER BY csmallint, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10
LIMIT 20
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT csmallint,
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+SELECT csmallint,
(csmallint % -75) as c1,
STDDEV_SAMP(csmallint) as c2,
(-1.389 / csmallint) as c3,
@@ -1736,6 +2078,10 @@ GROUP BY csmallint
ORDER BY csmallint, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10
LIMIT 20
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -1754,15 +2100,34 @@ STAGE PLANS:
TableScan
alias: alltypesorc
Statistics: Num rows: 12288 Data size: 256884 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterEqualLongScalar(col 1, val -257)(children: col 1) -> boolean, FilterExprOrExpr(children: FilterLongScalarEqualLongColumn(val -6432, col 1)(children: col 1) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 2) -> 12:double) -> boolean, FilterLongColLessEqualLongColumn(col 0, col 2)(children: col 0) -> boolean) -> boolean) -> boolean) -> boolean
predicate: ((UDFToInteger(csmallint) >= -257) and ((-6432 = UDFToInteger(csmallint)) or ((UDFToDouble(cint) >= cdouble) and (UDFToInteger(ctinyint) <= cint)))) (type: boolean)
Statistics: Num rows: 2503 Data size: 52344 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: csmallint (type: smallint), cbigint (type: bigint), ctinyint (type: tinyint)
outputColumnNames: csmallint, cbigint, ctinyint
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [1, 3, 0]
Statistics: Num rows: 2503 Data size: 52344 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: stddev_samp(csmallint), sum(cbigint), var_pop(ctinyint), count()
+ Group By Vectorization:
+ aggregators: VectorUDAFStdSampLong(col 1) -> struct<count:bigint,sum:double,variance:double>, VectorUDAFSumLong(col 3) -> bigint, VectorUDAFVarPopLong(col 0) -> struct<count:bigint,sum:double,variance:double>, VectorUDAFCountStar(*) -> bigint
+ className: VectorGroupByOperator
+ vectorOutput: false
+ keyExpressions: col 1
+ native: false
+ projectedOutputColumns: [0, 1, 2, 3]
+ vectorOutputConditionsNotMet: Vector output of VectorUDAFStdSampLong(col 1) -> struct<count:bigint,sum:double,variance:double> output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopLong(col 0) -> struct<count:bigint,sum:double,variance:double> output type STRUCT requires PRIMITIVE IS false
keys: csmallint (type: smallint)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4
@@ -1775,8 +2140,21 @@ STAGE PLANS:
value expressions: _col1 (type: struct<count:bigint,sum:double,variance:double>), _col2 (type: bigint), _col3 (type: struct<count:bigint,sum:double,variance:double>), _col4 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: false
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: Aggregation Function UDF stddev_samp parameter expression for GROUPBY operator: Data type struct<count:bigint,sum:double,variance:double> of Column[VALUE._col0] not supported
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: stddev_samp(VALUE._col0), sum(VALUE._col1), var_pop(VALUE._col2), count(VALUE._col3)
@@ -1795,16 +2173,33 @@ STAGE PLANS:
TopN Hash Memory Usage: 0.1
Reducer 3
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: decimal(10,9)), KEY.reducesinkkey4 (type: bigint), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: int), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: int), KEY.reducesinkkey9 (type: bigint), KEY.reducesinkkey10 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
Statistics: Num rows: 1128 Data size: 197388 Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 20
+ Limit Vectorization:
+ className: VectorLimitOperator
+ native: true
Statistics: Num rows: 20 Data size: 3504 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 20 Data size: 3504 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -1882,7 +2277,8 @@ POSTHOOK: Input: default@alltypesorc
-89 -14 0.0 0.015606742 NULL NULL 14 0.0 -14 1 89011
-95 -20 0.0 0.014621053 NULL NULL 20 0.0 -20 1 89011
WARNING: Comparing a bigint and a double may result in a loss of precision.
-PREHOOK: query: EXPLAIN SELECT cdouble,
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+SELECT cdouble,
VAR_SAMP(cdouble),
(2563.58 * VAR_SAMP(cdouble)),
(-(VAR_SAMP(cdouble))),
@@ -1909,7 +2305,8 @@ WHERE (((cdouble > 2563.58))
GROUP BY cdouble
ORDER BY cdouble
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT cdouble,
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+SELECT cdouble,
VAR_SAMP(cdouble),
(2563.58 * VAR_SAMP(cdouble)),
(-(VAR_SAMP(cdouble))),
@@ -1936,6 +2333,10 @@ WHERE (((cdouble > 2563.58))
GROUP BY cdouble
ORDER BY cdouble
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -1954,15 +2355,34 @@ STAGE PLANS:
TableScan
alias: alltypesorc
Statistics: Num rows: 12288 Data size: 293580 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5, val 2563.58) -> boolean, FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 3, col 2)(children: col 2) -> boolean, FilterLongColLessLongColumn(col 1, col 2)(children: col 1) -> boolean, FilterDoubleColLessDoubleScalar(col 4, val -5638.14990234375) -> boolean) -> boolean, FilterDecimalScalarEqualDecimalColumn(val 2563.58, col 12)(children: CastLongToDecimal(col 0) -> 12:decimal(6,2)) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 5, col 13)(children: CastLongToDouble(col 3) -> 13:double) -> boolean, FilterDecimalScalarGreaterDecimalColumn(val -5638.15, col 14)(children: CastLongToDecimal(col 3) -> 14:decimal(21,2)) -> boolean) -> boolean) -> boolean) -> boolean
predicate: ((cdouble > 2563.58) and (((cbigint >= UDFToLong(cint)) and (UDFToInteger(csmallint) < cint) and (cfloat < -5638.15)) or (2563.58 = CAST( ctinyint AS decimal(6,2))) or ((cdouble <= UDFToDouble(cbigint)) and (-5638.15 > CAST( cbigint AS decimal(21,2)))))) (type: boolean)
Statistics: Num rows: 2503 Data size: 59820 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: cdouble (type: double), cfloat (type: float)
outputColumnNames: cdouble, cfloat
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [5, 4]
Statistics: Num rows: 2503 Data size: 59820 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: var_samp(cdouble), count(cfloat), sum(cfloat), var_pop(cdouble), stddev_pop(cdouble), sum(cdouble)
+ Group By Vectorization:
+ aggregators: VectorUDAFVarSampDouble(col 5) -> struct<count:bigint,sum:double,variance:double>, VectorUDAFCount(col 4) -> bigint, VectorUDAFSumDouble(col 4) -> double, VectorUDAFVarPopDouble(col 5) -> struct<count:bigint,sum:double,variance:double>, VectorUDAFStdPopDouble(col 5) -> struct<count:bigint,sum:double,variance:double>, VectorUDAFSumDouble(col 5) -> double
+ className: VectorGroupByOperator
+ vectorOutput: false
+ keyExpressions: col 5
+ native: false
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5]
+ vectorOutputConditionsNotMet: Vector output of VectorUDAFVarSampDouble(col 5) -> struct<count:bigint,sum:double,variance:double> output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopDouble(col 5) -> struct<count:bigint,sum:double,variance:double> output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopDouble(col 5) -> struct<count:bigint,sum:double,variance:double> output type STRUCT requires PRIMITIVE IS false
keys: cdouble (type: double)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
@@ -1975,8 +2395,21 @@ STAGE PLANS:
value expressions: _col1 (type: struct<count:bigint,sum:double,variance:double>), _col2 (type: bigint), _col3 (type: double), _col4 (type: struct<count:bigint,sum:double,variance:double>), _col5 (type: struct<count:bigint,sum:double,variance:double>), _col6 (type: double)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: false
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: Aggregation Function UDF var_samp parameter expression for GROUPBY operator: Data type struct<count:bigint,sum:double,variance:double> of Column[VALUE._col0] not supported
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: var_samp(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), var_pop(VALUE._col3), stddev_pop(VALUE._col4), sum(VALUE._col5)
@@ -1995,13 +2428,27 @@ STAGE PLANS:
value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: double), _col14 (type: double)
Reducer 3
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: bigint), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: double), VALUE._col11 (type: double), VALUE._col12 (type: double), VALUE._col13 (type: double), VALUE._col12 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 13]
Statistics: Num rows: 870 Data size: 109608 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 870 Data size: 109608 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -2073,7 +2520,8 @@ ORDER BY cdouble
POSTHOOK: type: QUERY
POSTHOOK: Input: default@alltypesorc
#### A masked pattern was here ####
-PREHOOK: query: EXPLAIN SELECT ctimestamp1,
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+SELECT ctimestamp1,
cstring1,
STDDEV_POP(cint) as c1,
(STDDEV_POP(cint) * 10.175) as c2,
@@ -2128,7 +2576,8 @@ GROUP BY ctimestamp1, cstring1
ORDER BY ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16, c17, c18, c19, c20, c21, c22, c23, c24, c25, c26, c27, c28, c29, c30, c31, c32, c33, c34, c35, c36, c37
LIMIT 50
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT ctimestamp1,
+POSTHOOK: query: EXPLAIN
<TRUNCATED>