You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2016/10/13 10:50:32 UTC
[13/51] [partial] hive git commit: HIVE-11394: Enhance EXPLAIN
display for vectorization (Matt McCline, reviewed by Gopal Vijayaraghavan)
http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vectorization_13.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorization_13.q.out b/ql/src/test/results/clientpositive/llap/vectorization_13.q.out
index 8cf503f..f0d2a50 100644
--- a/ql/src/test/results/clientpositive/llap/vectorization_13.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorization_13.q.out
@@ -1,6 +1,6 @@
PREHOOK: query: -- SORT_QUERY_RESULTS
-EXPLAIN
+EXPLAIN VECTORIZATION EXPRESSION
SELECT cboolean1,
ctinyint,
ctimestamp1,
@@ -35,7 +35,7 @@ LIMIT 40
PREHOOK: type: QUERY
POSTHOOK: query: -- SORT_QUERY_RESULTS
-EXPLAIN
+EXPLAIN VECTORIZATION EXPRESSION
SELECT cboolean1,
ctinyint,
ctimestamp1,
@@ -68,6 +68,10 @@ GROUP BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1
ORDER BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16
LIMIT 40
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -86,15 +90,34 @@ STAGE PLANS:
TableScan
alias: alltypesorc
Statistics: Num rows: 12288 Data size: 2028982 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleScalar(col 4, val 3569.0) -> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val 10.175, col 5) -> boolean, FilterLongColNotEqualLongScalar(col 10, val 1) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 12, val 11.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterDoubleColNotEqualDoubleScalar(col 12, val 12.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDecimalColLessDecimalScalar(col 13, val 9763215.5639)(children: CastLongToDecimal(col 0) -> 13:decimal(11,4)) -> boolean) -> boolean) -> boolean
predicate: (((cfloat < 3569) and (10.175 >= cdouble) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > 11.0) and (UDFToDouble(ctimestamp2) <> 12.0) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean)
Statistics: Num rows: 5461 Data size: 901772 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string)
outputColumnNames: cboolean1, ctinyint, ctimestamp1, cfloat, cstring1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [10, 0, 8, 4, 6]
Statistics: Num rows: 5461 Data size: 901772 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: max(ctinyint), sum(cfloat), stddev_pop(cfloat), stddev_pop(ctinyint), max(cfloat), min(ctinyint)
+ Group By Vectorization:
+ aggregators: VectorUDAFMaxLong(col 0) -> tinyint, VectorUDAFSumDouble(col 4) -> double, VectorUDAFStdPopDouble(col 4) -> struct<count:bigint,sum:double,variance:double>, VectorUDAFStdPopLong(col 0) -> struct<count:bigint,sum:double,variance:double>, VectorUDAFMaxDouble(col 4) -> float, VectorUDAFMinLong(col 0) -> tinyint
+ className: VectorGroupByOperator
+ vectorOutput: false
+ keyExpressions: col 10, col 0, col 8, col 4, col 6
+ native: false
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5]
+ vectorOutputConditionsNotMet: Vector output of VectorUDAFStdPopDouble(col 4) -> struct<count:bigint,sum:double,variance:double> output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 0) -> struct<count:bigint,sum:double,variance:double> output type STRUCT requires PRIMITIVE IS false
keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
@@ -107,8 +130,21 @@ STAGE PLANS:
value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct<count:bigint,sum:double,variance:double>), _col8 (type: struct<count:bigint,sum:double,variance:double>), _col9 (type: float), _col10 (type: tinyint)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: false
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: Aggregation Function UDF stddev_pop parameter expression for GROUPBY operator: Data type struct<count:bigint,sum:double,variance:double> of Column[VALUE._col2] not supported
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: max(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), min(VALUE._col5)
@@ -127,16 +163,33 @@ STAGE PLANS:
TopN Hash Memory Usage: 0.1
Reducer 3
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: tinyint), KEY.reducesinkkey6 (type: tinyint), KEY.reducesinkkey7 (type: tinyint), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey14 (type: double), KEY.reducesinkkey15 (type: decimal(7,3)), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: double), KEY.reducesinkkey18 (type: float), KEY.reducesinkkey19 (type: double), KEY.reducesinkkey20 (type: tinyint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 10, 14, 15, 16, 17, 18, 19, 20]
Statistics: Num rows: 1365 Data size: 446640 Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 40
+ Limit Vectorization:
+ className: VectorLimitOperator
+ native: true
Statistics: Num rows: 40 Data size: 13206 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 40 Data size: 13206 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -258,7 +311,7 @@ NULL -64 1969-12-31 16:00:11.912 -64.0 NULL 64 -64 0 -64.0 -0.0 64.0 -5091.392 0
NULL -64 1969-12-31 16:00:12.339 -64.0 NULL 64 -64 0 -64.0 -0.0 64.0 -5091.392 0.0 64.0 0.0 -10.175 -64.0 0.410625 -64.0 0.0 -64
NULL -64 1969-12-31 16:00:13.274 -64.0 NULL 64 -64 0 -64.0 -0.0 64.0 -5091.392 0.0 64.0 0.0 -10.175 -64.0 0.410625 -64.0 0.0 -64
PREHOOK: query: -- double compare timestamp
-EXPLAIN
+EXPLAIN VECTORIZATION EXPRESSION
SELECT cboolean1,
ctinyint,
ctimestamp1,
@@ -292,7 +345,7 @@ ORDER BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1, c1, c2, c3, c4, c5,
LIMIT 40
PREHOOK: type: QUERY
POSTHOOK: query: -- double compare timestamp
-EXPLAIN
+EXPLAIN VECTORIZATION EXPRESSION
SELECT cboolean1,
ctinyint,
ctimestamp1,
@@ -325,6 +378,10 @@ GROUP BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1
ORDER BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16
LIMIT 40
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -343,15 +400,34 @@ STAGE PLANS:
TableScan
alias: alltypesorc
Statistics: Num rows: 12288 Data size: 2028982 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleScalar(col 4, val 3569.0) -> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val 10.175, col 5) -> boolean, FilterLongColNotEqualLongScalar(col 10, val 1) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 12, val -1.388)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterDoubleColNotEqualDoubleScalar(col 12, val -1.3359999999999999)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDecimalColLessDecimalScalar(col 13, val 9763215.5639)(children: CastLongToDecimal(col 0) -> 13:decimal(11,4)) -> boolean) -> boolean) -> boolean
predicate: (((cfloat < 3569) and (10.175 >= cdouble) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > -1.388) and (UDFToDouble(ctimestamp2) <> -1.3359999999999999) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean)
Statistics: Num rows: 5461 Data size: 901772 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string)
outputColumnNames: cboolean1, ctinyint, ctimestamp1, cfloat, cstring1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [10, 0, 8, 4, 6]
Statistics: Num rows: 5461 Data size: 901772 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: max(ctinyint), sum(cfloat), stddev_pop(cfloat), stddev_pop(ctinyint), max(cfloat), min(ctinyint)
+ Group By Vectorization:
+ aggregators: VectorUDAFMaxLong(col 0) -> tinyint, VectorUDAFSumDouble(col 4) -> double, VectorUDAFStdPopDouble(col 4) -> struct<count:bigint,sum:double,variance:double>, VectorUDAFStdPopLong(col 0) -> struct<count:bigint,sum:double,variance:double>, VectorUDAFMaxDouble(col 4) -> float, VectorUDAFMinLong(col 0) -> tinyint
+ className: VectorGroupByOperator
+ vectorOutput: false
+ keyExpressions: col 10, col 0, col 8, col 4, col 6
+ native: false
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5]
+ vectorOutputConditionsNotMet: Vector output of VectorUDAFStdPopDouble(col 4) -> struct<count:bigint,sum:double,variance:double> output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 0) -> struct<count:bigint,sum:double,variance:double> output type STRUCT requires PRIMITIVE IS false
keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
@@ -364,8 +440,21 @@ STAGE PLANS:
value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct<count:bigint,sum:double,variance:double>), _col8 (type: struct<count:bigint,sum:double,variance:double>), _col9 (type: float), _col10 (type: tinyint)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: false
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: Aggregation Function UDF stddev_pop parameter expression for GROUPBY operator: Data type struct<count:bigint,sum:double,variance:double> of Column[VALUE._col2] not supported
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: max(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), min(VALUE._col5)
@@ -384,16 +473,33 @@ STAGE PLANS:
TopN Hash Memory Usage: 0.1
Reducer 3
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: tinyint), KEY.reducesinkkey6 (type: tinyint), KEY.reducesinkkey7 (type: tinyint), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey14 (type: double), KEY.reducesinkkey15 (type: decimal(7,3)), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: double), KEY.reducesinkkey18 (type: float), KEY.reducesinkkey19 (type: double), KEY.reducesinkkey20 (type: tinyint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 10, 14, 15, 16, 17, 18, 19, 20]
Statistics: Num rows: 1365 Data size: 446640 Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 40
+ Limit Vectorization:
+ className: VectorLimitOperator
+ native: true
Statistics: Num rows: 40 Data size: 13206 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 40 Data size: 13206 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vectorization_14.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorization_14.q.out b/ql/src/test/results/clientpositive/llap/vectorization_14.q.out
index c227e44..e6fdca9 100644
--- a/ql/src/test/results/clientpositive/llap/vectorization_14.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorization_14.q.out
@@ -1,6 +1,6 @@
PREHOOK: query: -- SORT_QUERY_RESULTS
-EXPLAIN
+EXPLAIN VECTORIZATION
SELECT ctimestamp1,
cfloat,
cstring1,
@@ -35,7 +35,7 @@ ORDER BY cstring1, cfloat, cdouble, ctimestamp1
PREHOOK: type: QUERY
POSTHOOK: query: -- SORT_QUERY_RESULTS
-EXPLAIN
+EXPLAIN VECTORIZATION
SELECT ctimestamp1,
cfloat,
cstring1,
@@ -68,6 +68,10 @@ WHERE (((ctinyint <= cbigint)
GROUP BY ctimestamp1, cfloat, cstring1, cboolean1, cdouble
ORDER BY cstring1, cfloat, cdouble, ctimestamp1
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -107,8 +111,21 @@ STAGE PLANS:
value expressions: _col5 (type: struct<count:bigint,sum:double,variance:double>), _col6 (type: float), _col7 (type: struct<count:bigint,sum:double,variance:double>), _col8 (type: bigint), _col9 (type: struct<count:bigint,sum:double,variance:double>), _col10 (type: struct<count:bigint,sum:double,variance:double>)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: false
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: Aggregation Function UDF stddev_samp parameter expression for GROUPBY operator: Data type struct<count:bigint,sum:double,variance:double> of Column[VALUE._col0] not supported
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: stddev_samp(VALUE._col0), max(VALUE._col1), stddev_pop(VALUE._col2), count(VALUE._col3), var_pop(VALUE._col4), var_samp(VALUE._col5)
@@ -127,6 +144,13 @@ STAGE PLANS:
value expressions: _col3 (type: boolean), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: float), _col10 (type: float), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double)
Reducer 3
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey3 (type: timestamp), KEY.reducesinkkey1 (type: float), KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean), KEY.reducesinkkey2 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: float), VALUE._col6 (type: float), VALUE._col7 (type: float), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: bigint), VALUE._col11 (type: double), VALUE._col12 (type: double), VALUE._col13 (type: double), VALUE._col14 (type: double), VALUE._col15 (type: double), VALUE._col16 (type: double), VALUE._col17 (type: double)
http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vectorization_15.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorization_15.q.out b/ql/src/test/results/clientpositive/llap/vectorization_15.q.out
index 768aed4..a9908a4 100644
--- a/ql/src/test/results/clientpositive/llap/vectorization_15.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorization_15.q.out
@@ -1,6 +1,6 @@
PREHOOK: query: -- SORT_QUERY_RESULTS
-EXPLAIN
+EXPLAIN VECTORIZATION
SELECT cfloat,
cboolean1,
cdouble,
@@ -33,7 +33,7 @@ ORDER BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1
PREHOOK: type: QUERY
POSTHOOK: query: -- SORT_QUERY_RESULTS
-EXPLAIN
+EXPLAIN VECTORIZATION
SELECT cfloat,
cboolean1,
cdouble,
@@ -64,6 +64,10 @@ WHERE (((cstring2 LIKE '%ss%')
GROUP BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1
ORDER BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -103,8 +107,21 @@ STAGE PLANS:
value expressions: _col7 (type: struct<count:bigint,sum:double,variance:double>), _col8 (type: double), _col9 (type: struct<count:bigint,sum:double,variance:double>), _col10 (type: struct<count:bigint,sum:double,variance:double>), _col11 (type: struct<count:bigint,sum:double,variance:double>), _col12 (type: struct<count:bigint,sum:double,variance:double>)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: false
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: Aggregation Function UDF stddev_samp parameter expression for GROUPBY operator: Data type struct<count:bigint,sum:double,variance:double> of Column[VALUE._col0] not supported
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: stddev_samp(VALUE._col0), min(VALUE._col1), stddev_samp(VALUE._col2), var_pop(VALUE._col3), var_samp(VALUE._col4), stddev_pop(VALUE._col5)
@@ -123,6 +140,13 @@ STAGE PLANS:
value expressions: _col7 (type: double), _col8 (type: decimal(13,2)), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: tinyint), _col16 (type: double), _col17 (type: float), _col18 (type: int), _col19 (type: decimal(13,2)), _col20 (type: double)
Reducer 3
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: float), KEY.reducesinkkey1 (type: boolean), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: tinyint), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey6 (type: timestamp), VALUE._col0 (type: double), VALUE._col1 (type: decimal(13,2)), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: tinyint), VALUE._col9 (type: double), VALUE._col10 (type: float), VALUE._col11 (type: int), VALUE._col12 (type: decimal(13,2)), VALUE._col13 (type: double)
http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vectorization_16.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorization_16.q.out b/ql/src/test/results/clientpositive/llap/vectorization_16.q.out
index a1eb629..22041cc 100644
--- a/ql/src/test/results/clientpositive/llap/vectorization_16.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorization_16.q.out
@@ -1,6 +1,6 @@
PREHOOK: query: -- SORT_QUERY_RESULTS
-EXPLAIN
+EXPLAIN VECTORIZATION
SELECT cstring1,
cdouble,
ctimestamp1,
@@ -22,7 +22,7 @@ GROUP BY cstring1, cdouble, ctimestamp1
PREHOOK: type: QUERY
POSTHOOK: query: -- SORT_QUERY_RESULTS
-EXPLAIN
+EXPLAIN VECTORIZATION
SELECT cstring1,
cdouble,
ctimestamp1,
@@ -42,6 +42,10 @@ WHERE ((cstring2 LIKE '%b%')
OR (cstring1 < 'a')))
GROUP BY cstring1, cdouble, ctimestamp1
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -80,8 +84,21 @@ STAGE PLANS:
value expressions: _col3 (type: bigint), _col4 (type: struct<count:bigint,sum:double,variance:double>), _col5 (type: double)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: false
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: Aggregation Function UDF stddev_samp parameter expression for GROUPBY operator: Data type struct<count:bigint,sum:double,variance:double> of Column[VALUE._col1] not supported
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), min(VALUE._col2)
http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vectorization_17.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorization_17.q.out b/ql/src/test/results/clientpositive/llap/vectorization_17.q.out
index 3a77cc9..007ce8f 100644
--- a/ql/src/test/results/clientpositive/llap/vectorization_17.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorization_17.q.out
@@ -1,6 +1,6 @@
PREHOOK: query: -- SORT_QUERY_RESULTS
-EXPLAIN
+EXPLAIN VECTORIZATION
SELECT cfloat,
cstring1,
cint,
@@ -26,7 +26,7 @@ ORDER BY cbigint, cfloat
PREHOOK: type: QUERY
POSTHOOK: query: -- SORT_QUERY_RESULTS
-EXPLAIN
+EXPLAIN VECTORIZATION
SELECT cfloat,
cstring1,
cint,
@@ -50,6 +50,10 @@ WHERE (((cbigint > -23)
OR (cfloat = cdouble))))
ORDER BY cbigint, cfloat
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -81,8 +85,23 @@ STAGE PLANS:
value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: timestamp), _col4 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: decimal(11,4)), _col13 (type: double)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: float), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: timestamp), VALUE._col3 (type: double), KEY.reducesinkkey0 (type: bigint), VALUE._col4 (type: double), VALUE._col5 (type: bigint), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: decimal(11,4)), VALUE._col11 (type: double)
http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vectorization_7.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorization_7.q.out b/ql/src/test/results/clientpositive/llap/vectorization_7.q.out
index 54cc498..aeec133 100644
--- a/ql/src/test/results/clientpositive/llap/vectorization_7.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorization_7.q.out
@@ -1,6 +1,6 @@
PREHOOK: query: -- SORT_QUERY_RESULTS
-EXPLAIN
+EXPLAIN VECTORIZATION EXPRESSION
SELECT cboolean1,
cbigint,
csmallint,
@@ -29,7 +29,7 @@ LIMIT 25
PREHOOK: type: QUERY
POSTHOOK: query: -- SORT_QUERY_RESULTS
-EXPLAIN
+EXPLAIN VECTORIZATION EXPRESSION
SELECT cboolean1,
cbigint,
csmallint,
@@ -56,6 +56,10 @@ WHERE ((ctinyint != 0)
ORDER BY cboolean1, cbigint, csmallint, ctinyint, ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9
LIMIT 25
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -73,32 +77,74 @@ STAGE PLANS:
TableScan
alias: alltypesorc
Statistics: Num rows: 12288 Data size: 3019778 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterExprAndExpr(children: FilterLongColNotEqualLongScalar(col 0, val 0) -> boolean, FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleScalar(col 12, val 0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterLongColEqualLongColumn(col 0, col 2)(children: col 0) -> boolean, FilterStringColLikeStringScalar(col 7, pattern ss) -> boolean) -> boolean, FilterExprOrExpr(children: FilterDoubleScalarLessDoubleColumn(val 988888.0, col 5) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 12, val -15.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val 3569.0, col 5) -> boolean) -> boolean) -> boolean) -> boolean
predicate: ((ctinyint <> 0) and ((UDFToDouble(ctimestamp1) <= 0.0) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > -15.0) and (3569.0 >= cdouble)))) (type: boolean)
Statistics: Num rows: 7281 Data size: 1789382 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [10, 3, 1, 0, 8, 6, 13, 14, 15, 16, 18, 19, 17, 20, 22]
+ selectExpressions: LongColAddLongColumn(col 3, col 3) -> 13:long, LongColModuloLongScalar(col 1, val -257)(children: col 1) -> 14:long, LongColUnaryMinus(col 1) -> 15:long, LongColUnaryMinus(col 0) -> 16:long, LongColAddLongScalar(col 17, val 17)(children: col 17) -> 18:long, LongColMultiplyLongColumn(col 3, col 17)(children: col 17) -> 19:long, LongColModuloLongColumn(col 2, col 1)(children: col 1) -> 17:long, LongColUnaryMinus(col 0) -> 20:long, LongColModuloLongColumn(col 21, col 0)(children: LongColUnaryMinus(col 0) -> 21:long) -> 22:long
Statistics: Num rows: 7281 Data size: 1231410 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: boolean), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: tinyint), _col4 (type: timestamp), _col5 (type: string), _col6 (type: bigint), _col7 (type: int), _col8 (type: smallint), _col9 (type: tinyint), _col10 (type: int), _col11 (type: bigint), _col12 (type: int), _col13 (type: tinyint), _col14 (type: tinyint)
sort order: +++++++++++++++
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false
Statistics: Num rows: 7281 Data size: 1231410 Basic stats: COMPLETE Column stats: COMPLETE
TopN Hash Memory Usage: 0.1
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: smallint), KEY.reducesinkkey3 (type: tinyint), KEY.reducesinkkey4 (type: timestamp), KEY.reducesinkkey5 (type: string), KEY.reducesinkkey6 (type: bigint), KEY.reducesinkkey7 (type: int), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey10 (type: int), KEY.reducesinkkey11 (type: bigint), KEY.reducesinkkey12 (type: int), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey14 (type: tinyint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 9, 14]
Statistics: Num rows: 7281 Data size: 1231410 Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 25
+ Limit Vectorization:
+ className: VectorLimitOperator
+ native: true
Statistics: Num rows: 25 Data size: 4380 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 25 Data size: 4380 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -193,7 +239,7 @@ NULL NULL -7196 1 1969-12-31 15:59:48.361 NULL NULL 0 7196 -1 16 NULL NULL -1 0
NULL NULL -7196 14 1969-12-31 15:59:50.291 NULL NULL 0 7196 -14 3 NULL NULL -14 0
NULL NULL -7196 22 1969-12-31 15:59:52.699 NULL NULL 0 7196 -22 -5 NULL NULL -22 0
PREHOOK: query: -- double compare timestamp
-EXPLAIN
+EXPLAIN VECTORIZATION EXPRESSION
SELECT cboolean1,
cbigint,
csmallint,
@@ -221,7 +267,7 @@ ORDER BY cboolean1, cbigint, csmallint, ctinyint, ctimestamp1, cstring1, c1, c2,
LIMIT 25
PREHOOK: type: QUERY
POSTHOOK: query: -- double compare timestamp
-EXPLAIN
+EXPLAIN VECTORIZATION EXPRESSION
SELECT cboolean1,
cbigint,
csmallint,
@@ -248,6 +294,10 @@ WHERE ((ctinyint != 0)
ORDER BY cboolean1, cbigint, csmallint, ctinyint, ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9
LIMIT 25
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -265,32 +315,74 @@ STAGE PLANS:
TableScan
alias: alltypesorc
Statistics: Num rows: 12288 Data size: 3019778 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterExprAndExpr(children: FilterLongColNotEqualLongScalar(col 0, val 0) -> boolean, FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleScalar(col 12, val 0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterLongColEqualLongColumn(col 0, col 2)(children: col 0) -> boolean, FilterStringColLikeStringScalar(col 7, pattern ss) -> boolean) -> boolean, FilterExprOrExpr(children: FilterDoubleScalarLessDoubleColumn(val 988888.0, col 5) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 12, val 7.6850000000000005)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val 3569.0, col 5) -> boolean) -> boolean) -> boolean) -> boolean
predicate: ((ctinyint <> 0) and ((UDFToDouble(ctimestamp1) <= 0.0) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and ((988888.0 < cdouble) or ((UDFToDouble(ctimestamp2) > 7.6850000000000005) and (3569.0 >= cdouble)))) (type: boolean)
Statistics: Num rows: 7281 Data size: 1789382 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [10, 3, 1, 0, 8, 6, 13, 14, 15, 16, 18, 19, 17, 20, 22]
+ selectExpressions: LongColAddLongColumn(col 3, col 3) -> 13:long, LongColModuloLongScalar(col 1, val -257)(children: col 1) -> 14:long, LongColUnaryMinus(col 1) -> 15:long, LongColUnaryMinus(col 0) -> 16:long, LongColAddLongScalar(col 17, val 17)(children: col 17) -> 18:long, LongColMultiplyLongColumn(col 3, col 17)(children: col 17) -> 19:long, LongColModuloLongColumn(col 2, col 1)(children: col 1) -> 17:long, LongColUnaryMinus(col 0) -> 20:long, LongColModuloLongColumn(col 21, col 0)(children: LongColUnaryMinus(col 0) -> 21:long) -> 22:long
Statistics: Num rows: 7281 Data size: 1231410 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: boolean), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: tinyint), _col4 (type: timestamp), _col5 (type: string), _col6 (type: bigint), _col7 (type: int), _col8 (type: smallint), _col9 (type: tinyint), _col10 (type: int), _col11 (type: bigint), _col12 (type: int), _col13 (type: tinyint), _col14 (type: tinyint)
sort order: +++++++++++++++
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false
Statistics: Num rows: 7281 Data size: 1231410 Basic stats: COMPLETE Column stats: COMPLETE
TopN Hash Memory Usage: 0.1
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: smallint), KEY.reducesinkkey3 (type: tinyint), KEY.reducesinkkey4 (type: timestamp), KEY.reducesinkkey5 (type: string), KEY.reducesinkkey6 (type: bigint), KEY.reducesinkkey7 (type: int), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey10 (type: int), KEY.reducesinkkey11 (type: bigint), KEY.reducesinkkey12 (type: int), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey14 (type: tinyint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 9, 14]
Statistics: Num rows: 7281 Data size: 1231410 Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 25
+ Limit Vectorization:
+ className: VectorLimitOperator
+ native: true
Statistics: Num rows: 25 Data size: 4380 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 25 Data size: 4380 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vectorization_8.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorization_8.q.out b/ql/src/test/results/clientpositive/llap/vectorization_8.q.out
index 70ef835..d0410bd 100644
--- a/ql/src/test/results/clientpositive/llap/vectorization_8.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorization_8.q.out
@@ -1,6 +1,6 @@
PREHOOK: query: -- SORT_QUERY_RESULTS
-EXPLAIN
+EXPLAIN VECTORIZATION EXPRESSION
SELECT ctimestamp1,
cdouble,
cboolean1,
@@ -27,7 +27,7 @@ LIMIT 20
PREHOOK: type: QUERY
POSTHOOK: query: -- SORT_QUERY_RESULTS
-EXPLAIN
+EXPLAIN VECTORIZATION EXPRESSION
SELECT ctimestamp1,
cdouble,
cboolean1,
@@ -52,6 +52,10 @@ WHERE (((cstring2 IS NOT NULL)
ORDER BY ctimestamp1, cdouble, cboolean1, cstring1, cfloat, c1, c2, c3, c4, c5, c6, c7, c8, c9
LIMIT 20
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -69,32 +73,74 @@ STAGE PLANS:
TableScan
alias: alltypesorc
Statistics: Num rows: 12288 Data size: 2983078 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: SelectColumnIsNotNull(col 7) -> boolean, FilterDoubleColLessEqualDoubleScalar(col 12, val 10.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterDoubleColNotEqualDoubleScalar(col 12, val 16.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean) -> boolean, FilterDoubleColLessDoubleScalar(col 4, val -6432.0) -> boolean, FilterExprAndExpr(children: SelectColumnIsNotNull(col 10) -> boolean, FilterDoubleColEqualDoubleScalar(col 5, val 988888.0) -> boolean) -> boolean) -> boolean
predicate: ((cstring2 is not null and (UDFToDouble(ctimestamp1) <= 10.0) and (UDFToDouble(ctimestamp2) <> 16.0)) or (cfloat < -6432) or (cboolean1 is not null and (cdouble = 988888.0))) (type: boolean)
Statistics: Num rows: 3060 Data size: 743036 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: ctimestamp1 (type: timestamp), cdouble (type: double), cboolean1 (type: boolean), cstring1 (type: string), cfloat (type: float), (- cdouble) (type: double), (-5638.15 - cdouble) (type: double), (cdouble * -257.0) (type: double), (UDFToFloat(cint) + cfloat) (type: float), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (- cdouble) (type: double), (-1.389 - cfloat) (type: float), (- cfloat) (type: float), ((-5638.15 - cdouble) + UDFToDouble((UDFToFloat(cint) + cfloat))) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [8, 5, 10, 6, 4, 12, 13, 14, 16, 18, 15, 17, 19, 21]
+ selectExpressions: DoubleColUnaryMinus(col 5) -> 12:double, DoubleScalarSubtractDoubleColumn(val -5638.15, col 5) -> 13:double, DoubleColMultiplyDoubleScalar(col 5, val -257.0) -> 14:double, DoubleColAddDoubleColumn(col 15, col 4)(children: CastLongToFloatViaLongToDouble(col 2) -> 15:double) -> 16:double, DoubleColAddDoubleColumn(col 15, col 17)(children: DoubleColUnaryMinus(col 5) -> 15:double, CastLongToDouble(col 3) -> 17:double) -> 18:double, DoubleColUnaryMinus(col 5) -> 15:double, DoubleScalarSubtractDoubleColumn(val -1.3890000581741333, col 4) -> 17:double, DoubleColUnaryMinus(col 4) -> 19:double, DoubleColAddDoubleColumn(col 20, col 22)(children: DoubleScalarSubtractDoubleColumn(val -5638.15, col 5) -> 20:double, col 22) -> 21:double
Statistics: Num rows: 3060 Data size: 557456 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: timestamp), _col1 (type: double), _col2 (type: boolean), _col3 (type: string), _col4 (type: float), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: float), _col13 (type: double)
sort order: ++++++++++++++
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false
Statistics: Num rows: 3060 Data size: 557456 Basic stats: COMPLETE Column stats: COMPLETE
TopN Hash Memory Usage: 0.1
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: boolean), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: float), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: float), KEY.reducesinkkey13 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 5, 11, 12, 13]
Statistics: Num rows: 3060 Data size: 557456 Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 20
+ Limit Vectorization:
+ className: VectorLimitOperator
+ native: true
Statistics: Num rows: 20 Data size: 3760 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 20 Data size: 3760 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -180,7 +226,7 @@ POSTHOOK: Input: default@alltypesorc
1969-12-31 15:59:43.807 -7196.0 NULL NULL 42.0 7196.0 1557.8500000000004 1849372.0 NULL -5.98226333E8 7196.0 -43.389 -42.0 NULL
1969-12-31 15:59:43.82 -7196.0 NULL NULL -30.0 7196.0 1557.8500000000004 1849372.0 NULL 1.329550715E9 7196.0 28.611 30.0 NULL
PREHOOK: query: -- double compare timestamp
-EXPLAIN
+EXPLAIN VECTORIZATION EXPRESSION
SELECT ctimestamp1,
cdouble,
cboolean1,
@@ -206,7 +252,7 @@ ORDER BY ctimestamp1, cdouble, cboolean1, cstring1, cfloat, c1, c2, c3, c4, c5,
LIMIT 20
PREHOOK: type: QUERY
POSTHOOK: query: -- double compare timestamp
-EXPLAIN
+EXPLAIN VECTORIZATION EXPRESSION
SELECT ctimestamp1,
cdouble,
cboolean1,
@@ -231,6 +277,10 @@ WHERE (((cstring2 IS NOT NULL)
ORDER BY ctimestamp1, cdouble, cboolean1, cstring1, cfloat, c1, c2, c3, c4, c5, c6, c7, c8, c9
LIMIT 20
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -248,32 +298,74 @@ STAGE PLANS:
TableScan
alias: alltypesorc
Statistics: Num rows: 12288 Data size: 2983078 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: SelectColumnIsNotNull(col 7) -> boolean, FilterDoubleColLessEqualDoubleScalar(col 12, val 12.503)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterDoubleColNotEqualDoubleScalar(col 12, val 11.998)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean) -> boolean, FilterDoubleColLessDoubleScalar(col 4, val -6432.0) -> boolean, FilterExprAndExpr(children: SelectColumnIsNotNull(col 10) -> boolean, FilterDoubleColEqualDoubleScalar(col 5, val 988888.0) -> boolean) -> boolean) -> boolean
predicate: ((cstring2 is not null and (UDFToDouble(ctimestamp1) <= 12.503) and (UDFToDouble(ctimestamp2) <> 11.998)) or (cfloat < -6432) or (cboolean1 is not null and (cdouble = 988888.0))) (type: boolean)
Statistics: Num rows: 3060 Data size: 743036 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: ctimestamp1 (type: timestamp), cdouble (type: double), cboolean1 (type: boolean), cstring1 (type: string), cfloat (type: float), (- cdouble) (type: double), (-5638.15 - cdouble) (type: double), (cdouble * -257.0) (type: double), (UDFToFloat(cint) + cfloat) (type: float), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (- cdouble) (type: double), (-1.389 - cfloat) (type: float), (- cfloat) (type: float), ((-5638.15 - cdouble) + UDFToDouble((UDFToFloat(cint) + cfloat))) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [8, 5, 10, 6, 4, 12, 13, 14, 16, 18, 15, 17, 19, 21]
+ selectExpressions: DoubleColUnaryMinus(col 5) -> 12:double, DoubleScalarSubtractDoubleColumn(val -5638.15, col 5) -> 13:double, DoubleColMultiplyDoubleScalar(col 5, val -257.0) -> 14:double, DoubleColAddDoubleColumn(col 15, col 4)(children: CastLongToFloatViaLongToDouble(col 2) -> 15:double) -> 16:double, DoubleColAddDoubleColumn(col 15, col 17)(children: DoubleColUnaryMinus(col 5) -> 15:double, CastLongToDouble(col 3) -> 17:double) -> 18:double, DoubleColUnaryMinus(col 5) -> 15:double, DoubleScalarSubtractDoubleColumn(val -1.3890000581741333, col 4) -> 17:double, DoubleColUnaryMinus(col 4) -> 19:double, DoubleColAddDoubleColumn(col 20, col 22)(children: DoubleScalarSubtractDoubleColumn(val -5638.15, col 5) -> 20:double, col 22) -> 21:double
Statistics: Num rows: 3060 Data size: 557456 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: timestamp), _col1 (type: double), _col2 (type: boolean), _col3 (type: string), _col4 (type: float), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: float), _col13 (type: double)
sort order: ++++++++++++++
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkOperator
+ native: false
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, Not ACID UPDATE or DELETE IS true, No buckets IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ nativeConditionsNotMet: No TopN IS false, Uniform Hash IS false
Statistics: Num rows: 3060 Data size: 557456 Basic stats: COMPLETE Column stats: COMPLETE
TopN Hash Memory Usage: 0.1
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: boolean), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: float), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: float), KEY.reducesinkkey13 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 5, 11, 12, 13]
Statistics: Num rows: 3060 Data size: 557456 Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 20
+ Limit Vectorization:
+ className: VectorLimitOperator
+ native: true
Statistics: Num rows: 20 Data size: 3760 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 20 Data size: 3760 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vectorization_9.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorization_9.q.out b/ql/src/test/results/clientpositive/llap/vectorization_9.q.out
index a1eb629..22041cc 100644
--- a/ql/src/test/results/clientpositive/llap/vectorization_9.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorization_9.q.out
@@ -1,6 +1,6 @@
PREHOOK: query: -- SORT_QUERY_RESULTS
-EXPLAIN
+EXPLAIN VECTORIZATION
SELECT cstring1,
cdouble,
ctimestamp1,
@@ -22,7 +22,7 @@ GROUP BY cstring1, cdouble, ctimestamp1
PREHOOK: type: QUERY
POSTHOOK: query: -- SORT_QUERY_RESULTS
-EXPLAIN
+EXPLAIN VECTORIZATION
SELECT cstring1,
cdouble,
ctimestamp1,
@@ -42,6 +42,10 @@ WHERE ((cstring2 LIKE '%b%')
OR (cstring1 < 'a')))
GROUP BY cstring1, cdouble, ctimestamp1
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -80,8 +84,21 @@ STAGE PLANS:
value expressions: _col3 (type: bigint), _col4 (type: struct<count:bigint,sum:double,variance:double>), _col5 (type: double)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: false
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: Aggregation Function UDF stddev_samp parameter expression for GROUPBY operator: Data type struct<count:bigint,sum:double,variance:double> of Column[VALUE._col1] not supported
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), min(VALUE._col2)
http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vectorization_decimal_date.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorization_decimal_date.q.out b/ql/src/test/results/clientpositive/llap/vectorization_decimal_date.q.out
index 9a6cb52..74455f5 100644
--- a/ql/src/test/results/clientpositive/llap/vectorization_decimal_date.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorization_decimal_date.q.out
@@ -12,28 +12,78 @@ POSTHOOK: Lineage: date_decimal_test.cdate EXPRESSION [(alltypesorc)alltypesorc.
POSTHOOK: Lineage: date_decimal_test.cdecimal EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ]
POSTHOOK: Lineage: date_decimal_test.cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ]
POSTHOOK: Lineage: date_decimal_test.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ]
-PREHOOK: query: EXPLAIN SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
- Stage-0 is a root stage
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: date_decimal_test
+ Statistics: Num rows: 12288 Data size: 1651260 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ projectedOutputColumns: [0, 1, 2, 3]
+ Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0) -> boolean, SelectColumnIsNotNull(col 1) -> boolean) -> boolean
+ predicate: (cint is not null and cdouble is not null) (type: boolean)
+ Statistics: Num rows: 12288 Data size: 1651260 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: cdate (type: date), cdecimal (type: decimal(20,10))
+ outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumns: [2, 3]
+ Statistics: Num rows: 12288 Data size: 1651260 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Limit Vectorization:
+ className: VectorLimitOperator
+ native: true
+ Statistics: Num rows: 10 Data size: 1340 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 10 Data size: 1340 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+
Stage: Stage-0
Fetch Operator
limit: 10
Processor Tree:
- TableScan
- alias: date_decimal_test
- Filter Operator
- predicate: (cint is not null and cdouble is not null) (type: boolean)
- Select Operator
- expressions: cdate (type: date), cdecimal (type: decimal(20,10))
- outputColumnNames: _col0, _col1
- Limit
- Number of rows: 10
- ListSink
+ ListSink
PREHOOK: query: SELECT cdate, cdecimal from date_decimal_test where cint IS NOT NULL AND cdouble IS NOT NULL LIMIT 10
PREHOOK: type: QUERY
http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out b/ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out
index 2078e81..872e7f3 100644
--- a/ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out
@@ -46,10 +46,14 @@ POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).cstring2 SIMPLE [(alltype
POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctimestamp1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ]
POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctimestamp2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), ]
POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:null), ]
-PREHOOK: query: explain select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10
+PREHOOK: query: explain vectorization select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10
PREHOOK: type: QUERY
-POSTHOOK: query: explain select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10
+POSTHOOK: query: explain vectorization select (cdouble+2) c1 from alltypesorc_part order by c1 limit 10
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -78,8 +82,23 @@ STAGE PLANS:
TopN Hash Memory Usage: 0.1
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: true
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ groupByVectorOutput: true
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: double)
http://git-wip-us.apache.org/repos/asf/hive/blob/f923db0b/ql/src/test/results/clientpositive/llap/vectorization_pushdown.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorization_pushdown.q.out b/ql/src/test/results/clientpositive/llap/vectorization_pushdown.q.out
index 6bd4bd6..6324e01 100644
--- a/ql/src/test/results/clientpositive/llap/vectorization_pushdown.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorization_pushdown.q.out
@@ -1,8 +1,12 @@
WARNING: Comparing a bigint and a double may result in a loss of precision.
-PREHOOK: query: explain SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble
+PREHOOK: query: explain vectorization SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble
PREHOOK: type: QUERY
-POSTHOOK: query: explain SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble
+POSTHOOK: query: explain vectorization SELECT AVG(cbigint) FROM alltypesorc WHERE cbigint < cdouble
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -39,8 +43,21 @@ STAGE PLANS:
value expressions: _col0 (type: struct<count:bigint,sum:double,input:bigint>)
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ groupByVectorOutput: false
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Reducer 2
Execution mode: llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
+ notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct<count:bigint,sum:double,input:bigint> of Column[VALUE._col0] not supported
+ vectorized: false
Reduce Operator Tree:
Group By Operator
aggregations: avg(VALUE._col0)