You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2018/08/25 05:30:13 UTC
hive git commit: HIVE-20339: Vectorization: Lift unneeded restriction
causing some PTF with RANK not to be vectorized (Matt McCline,
reviewed by Teddy Choi)
Repository: hive
Updated Branches:
refs/heads/master b5578eb08 -> e2142b206
HIVE-20339: Vectorization: Lift unneeded restriction causing some PTF with RANK not to be vectorized (Matt McCline, reviewed by Teddy Choi)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e2142b20
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e2142b20
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e2142b20
Branch: refs/heads/master
Commit: e2142b20660c3582bc09c87f67c1d32c201952c3
Parents: b5578eb
Author: Matt McCline <mm...@hortonworks.com>
Authored: Fri Aug 24 22:30:07 2018 -0700
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Fri Aug 24 22:30:07 2018 -0700
----------------------------------------------------------------------
.../exec/vector/ptf/VectorPTFEvaluatorBase.java | 7 +
.../vector/ptf/VectorPTFEvaluatorDenseRank.java | 8 +-
.../exec/vector/ptf/VectorPTFEvaluatorRank.java | 8 +-
.../hive/ql/optimizer/physical/Vectorizer.java | 73 ++++-----
.../hadoop/hive/ql/plan/VectorPTFDesc.java | 6 +-
.../test/results/clientpositive/llap/ptf.q.out | 8 +-
.../llap/vector_ptf_part_simple.q.out | 74 ++++++++-
.../clientpositive/llap/vector_windowing.q.out | 38 ++++-
.../llap/vector_windowing_rank.q.out | 41 ++++-
.../clientpositive/llap/vectorized_ptf.q.out | 151 +++++++++++++++++--
.../clientpositive/perf/spark/query47.q.out | 3 +
.../clientpositive/perf/spark/query57.q.out | 3 +
.../clientpositive/perf/tez/query47.q.out | 56 +++----
.../clientpositive/perf/tez/query57.q.out | 56 +++----
.../test/results/clientpositive/spark/ptf.q.out | 4 +
.../clientpositive/spark/vectorized_ptf.q.out | 147 ++++++++++++++++--
16 files changed, 541 insertions(+), 142 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/e2142b20/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorBase.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorBase.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorBase.java
index 437c319..daefdc4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorBase.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorBase.java
@@ -65,6 +65,13 @@ public abstract class VectorPTFEvaluatorBase {
this.outputColumnNum = outputColumnNum;
}
+ public VectorPTFEvaluatorBase(WindowFrameDef windowFrameDef, int outputColumnNum) {
+ this.windowFrameDef = windowFrameDef;
+ inputVecExpr = null;
+ inputColumnNum = -1;
+ this.outputColumnNum = outputColumnNum;
+ }
+
// Evaluate the aggregation input argument expression.
public void evaluateInputExpr(VectorizedRowBatch batch) throws HiveException {
if (inputVecExpr != null) {
http://git-wip-us.apache.org/repos/asf/hive/blob/e2142b20/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDenseRank.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDenseRank.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDenseRank.java
index cb6b586..c80b077 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDenseRank.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDenseRank.java
@@ -21,7 +21,6 @@ package org.apache.hadoop.hive.ql.exec.vector.ptf;
import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef;
@@ -35,9 +34,8 @@ public class VectorPTFEvaluatorDenseRank extends VectorPTFEvaluatorBase {
private int denseRank;
- public VectorPTFEvaluatorDenseRank(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr,
- int outputColumnNum) {
- super(windowFrameDef, inputVecExpr, outputColumnNum);
+ public VectorPTFEvaluatorDenseRank(WindowFrameDef windowFrameDef, int outputColumnNum) {
+ super(windowFrameDef, outputColumnNum);
resetEvaluator();
}
@@ -45,7 +43,7 @@ public class VectorPTFEvaluatorDenseRank extends VectorPTFEvaluatorBase {
public void evaluateGroupBatch(VectorizedRowBatch batch)
throws HiveException {
- evaluateInputExpr(batch);
+ // We don't evaluate input columns...
LongColumnVector longColVector = (LongColumnVector) batch.cols[outputColumnNum];
longColVector.isRepeating = true;
http://git-wip-us.apache.org/repos/asf/hive/blob/e2142b20/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorRank.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorRank.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorRank.java
index d20c60c..5fd2506 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorRank.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorRank.java
@@ -21,7 +21,6 @@ package org.apache.hadoop.hive.ql.exec.vector.ptf;
import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef;
@@ -36,9 +35,8 @@ public class VectorPTFEvaluatorRank extends VectorPTFEvaluatorBase {
private int rank;
private int groupCount;
- public VectorPTFEvaluatorRank(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr,
- int outputColumnNum) {
- super(windowFrameDef, inputVecExpr, outputColumnNum);
+ public VectorPTFEvaluatorRank(WindowFrameDef windowFrameDef, int outputColumnNum) {
+ super(windowFrameDef, outputColumnNum);
resetEvaluator();
}
@@ -46,7 +44,7 @@ public class VectorPTFEvaluatorRank extends VectorPTFEvaluatorBase {
public void evaluateGroupBatch(VectorizedRowBatch batch)
throws HiveException {
- evaluateInputExpr(batch);
+ // We don't evaluate input columns...
/*
* Do careful maintenance of the outputColVector.noNulls flag.
http://git-wip-us.apache.org/repos/asf/hive/blob/e2142b20/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index 1956125..48974f8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -2863,45 +2863,50 @@ public class Vectorizer implements PhysicalPlanResolver {
default:
throw new RuntimeException("Unexpected window type " + windowFrameDef.getWindowType());
}
- if (exprNodeDescList != null && exprNodeDescList.size() > 1) {
- setOperatorIssue("More than 1 argument expression of aggregation function " + functionName);
- return false;
- }
- if (exprNodeDescList != null) {
- ExprNodeDesc exprNodeDesc = exprNodeDescList.get(0);
- if (containsLeadLag(exprNodeDesc)) {
- setOperatorIssue("lead and lag function not supported in argument expression of aggregation function " + functionName);
- return false;
- }
+ // RANK/DENSE_RANK don't care about columns.
+ if (supportedFunctionType != SupportedFunctionType.RANK &&
+ supportedFunctionType != SupportedFunctionType.DENSE_RANK) {
- if (supportedFunctionType != SupportedFunctionType.COUNT &&
- supportedFunctionType != SupportedFunctionType.DENSE_RANK &&
- supportedFunctionType != SupportedFunctionType.RANK) {
+ if (exprNodeDescList != null) {
+ if (exprNodeDescList.size() > 1) {
+ setOperatorIssue("More than 1 argument expression of aggregation function " + functionName);
+ return false;
+ }
- // COUNT, DENSE_RANK, and RANK do not care about column types. The rest do.
- TypeInfo typeInfo = exprNodeDesc.getTypeInfo();
- Category category = typeInfo.getCategory();
- boolean isSupportedType;
- if (category != Category.PRIMITIVE) {
- isSupportedType = false;
- } else {
- ColumnVector.Type colVecType =
- VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo);
- switch (colVecType) {
- case LONG:
- case DOUBLE:
- case DECIMAL:
- isSupportedType = true;
- break;
- default:
+ ExprNodeDesc exprNodeDesc = exprNodeDescList.get(0);
+
+ if (containsLeadLag(exprNodeDesc)) {
+ setOperatorIssue("lead and lag function not supported in argument expression of aggregation function " + functionName);
+ return false;
+ }
+
+ if (supportedFunctionType != SupportedFunctionType.COUNT) {
+
+ // COUNT does not care about column types. The rest do.
+ TypeInfo typeInfo = exprNodeDesc.getTypeInfo();
+ Category category = typeInfo.getCategory();
+ boolean isSupportedType;
+ if (category != Category.PRIMITIVE) {
isSupportedType = false;
- break;
+ } else {
+ ColumnVector.Type colVecType =
+ VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo);
+ switch (colVecType) {
+ case LONG:
+ case DOUBLE:
+ case DECIMAL:
+ isSupportedType = true;
+ break;
+ default:
+ isSupportedType = false;
+ break;
+ }
+ }
+ if (!isSupportedType) {
+ setOperatorIssue(typeInfo.getTypeName() + " data type not supported in argument expression of aggregation function " + functionName);
+ return false;
}
- }
- if (!isSupportedType) {
- setOperatorIssue(typeInfo.getTypeName() + " data type not supported in argument expression of aggregation function " + functionName);
- return false;
}
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/e2142b20/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPTFDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPTFDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPTFDesc.java
index 53886fe..54efca8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPTFDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPTFDesc.java
@@ -162,12 +162,10 @@ public class VectorPTFDesc extends AbstractVectorDesc {
new VectorPTFEvaluatorRowNumber(windowFrameDef, inputVectorExpression, outputColumnNum);
break;
case RANK:
- evaluator =
- new VectorPTFEvaluatorRank(windowFrameDef, inputVectorExpression, outputColumnNum);
+ evaluator = new VectorPTFEvaluatorRank(windowFrameDef, outputColumnNum);
break;
case DENSE_RANK:
- evaluator =
- new VectorPTFEvaluatorDenseRank(windowFrameDef, inputVectorExpression, outputColumnNum);
+ evaluator = new VectorPTFEvaluatorDenseRank(windowFrameDef, outputColumnNum);
break;
case MIN:
switch (columnVectorType) {
http://git-wip-us.apache.org/repos/asf/hive/blob/e2142b20/ql/src/test/results/clientpositive/llap/ptf.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/ptf.q.out b/ql/src/test/results/clientpositive/llap/ptf.q.out
index 3fa2655..7a067f7 100644
--- a/ql/src/test/results/clientpositive/llap/ptf.q.out
+++ b/ql/src/test/results/clientpositive/llap/ptf.q.out
@@ -1440,7 +1440,7 @@ STAGE PLANS:
Map-reduce partition columns: _col2 (type: string)
Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
Reducer 3
- Execution mode: llap
+ Execution mode: vectorized, llap
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: int)
@@ -3541,7 +3541,7 @@ STAGE PLANS:
Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col5 (type: int)
Reducer 4
- Execution mode: llap
+ Execution mode: vectorized, llap
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int)
@@ -4342,7 +4342,7 @@ STAGE PLANS:
Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col5 (type: int)
Reducer 5
- Execution mode: llap
+ Execution mode: vectorized, llap
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int)
@@ -4607,7 +4607,7 @@ STAGE PLANS:
Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col5 (type: int)
Reducer 4
- Execution mode: llap
+ Execution mode: vectorized, llap
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int)
http://git-wip-us.apache.org/repos/asf/hive/blob/e2142b20/ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out b/ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out
index 9f49f2e..44bfe20 100644
--- a/ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out
@@ -5765,16 +5765,28 @@ STAGE PLANS:
partitionColumnCount: 0
scratchColumnTypeNames: [bigint, timestamp, timestamp]
Reducer 2
- Execution mode: llap
+ Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- notVectorizedReason: PTF operator: More than 1 argument expression of aggregation function rank
- vectorized: false
+ reduceColumnNullOrder: aa
+ reduceColumnSortOrder: ++
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 4
+ dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:timestamp, VALUE._col0:string, VALUE._col1:double
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, bigint, timestamp, timestamp]
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: double)
outputColumnNames: _col0, _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 2, 3]
Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
@@ -5796,13 +5808,32 @@ STAGE PLANS:
window function: GenericUDAFRankEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
+ PTF Vectorization:
+ className: VectorPTFOperator
+ evaluatorClasses: [VectorPTFEvaluatorRank]
+ functionInputExpressions: [col 0:string]
+ functionNames: [rank]
+ keyInputColumns: [0]
+ native: true
+ nonKeyInputColumns: [2, 3]
+ orderExpressions: [col 0:string, IfExprColumnNull(col 5:boolean, col 6:timestamp, null)(children: StringGroupColEqualStringScalar(col 0:string, val Manufacturer#2) -> 5:boolean, ConstantVectorExpression(val 2000-01-01 00:00:00) -> 6:timestamp) -> 7:timestamp]
+ outputColumns: [4, 0, 2, 3]
+ outputTypes: [int, string, string, double]
+ streamingColumns: [4]
Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), rank_window_0 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 2, 3, 4]
Statistics: Num rows: 40 Data size: 9224 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 40 Data size: 9224 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -6868,16 +6899,28 @@ STAGE PLANS:
partitionColumnCount: 0
scratchColumnTypeNames: [bigint, timestamp, timestamp]
Reducer 2
- Execution mode: llap
+ Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- notVectorizedReason: PTF operator: More than 1 argument expression of aggregation function rank
- vectorized: false
+ reduceColumnNullOrder: aa
+ reduceColumnSortOrder: ++
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 4
+ dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:timestamp, VALUE._col0:string, VALUE._col1:double
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, bigint, timestamp, timestamp]
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: double)
outputColumnNames: _col0, _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 2, 3]
Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
@@ -6899,13 +6942,32 @@ STAGE PLANS:
window function: GenericUDAFRankEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
+ PTF Vectorization:
+ className: VectorPTFOperator
+ evaluatorClasses: [VectorPTFEvaluatorRank]
+ functionInputExpressions: [col 0:string]
+ functionNames: [rank]
+ keyInputColumns: [0]
+ native: true
+ nonKeyInputColumns: [2, 3]
+ orderExpressions: [col 0:string, IfExprColumnNull(col 5:boolean, col 6:timestamp, null)(children: StringGroupColEqualStringScalar(col 0:string, val Manufacturer#2) -> 5:boolean, ConstantVectorExpression(val 2000-01-01 00:00:00) -> 6:timestamp) -> 7:timestamp]
+ outputColumns: [4, 0, 2, 3]
+ outputTypes: [int, string, string, double]
+ streamingColumns: [4]
Statistics: Num rows: 40 Data size: 19816 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), rank_window_0 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 2, 3, 4]
Statistics: Num rows: 40 Data size: 9224 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 40 Data size: 9224 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/e2142b20/ql/src/test/results/clientpositive/llap/vector_windowing.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_windowing.q.out b/ql/src/test/results/clientpositive/llap/vector_windowing.q.out
index cf6af00..53327bd 100644
--- a/ql/src/test/results/clientpositive/llap/vector_windowing.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_windowing.q.out
@@ -1660,16 +1660,28 @@ STAGE PLANS:
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
- Execution mode: llap
+ Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- notVectorizedReason: PTF operator: More than 1 argument expression of aggregation function rank
- vectorized: false
+ reduceColumnNullOrder: aaz
+ reduceColumnSortOrder: ++-
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 3
+ dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, KEY.reducesinkkey2:int
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint]
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: int)
outputColumnNames: _col1, _col2, _col5
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [1, 0, 2]
Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
@@ -1691,13 +1703,33 @@ STAGE PLANS:
window function: GenericUDAFRankEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
+ PTF Vectorization:
+ className: VectorPTFOperator
+ evaluatorClasses: [VectorPTFEvaluatorRank]
+ functionInputExpressions: [col 1:string]
+ functionNames: [rank]
+ keyInputColumns: [1, 0, 2]
+ native: true
+ nonKeyInputColumns: []
+ orderExpressions: [col 1:string, col 2:int]
+ outputColumns: [3, 1, 0, 2]
+ outputTypes: [int, string, string, int]
+ partitionExpressions: [col 0:string]
+ streamingColumns: [3]
Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 2, 3]
Statistics: Num rows: 26 Data size: 5902 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 26 Data size: 5902 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/e2142b20/ql/src/test/results/clientpositive/llap/vector_windowing_rank.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_windowing_rank.q.out b/ql/src/test/results/clientpositive/llap/vector_windowing_rank.q.out
index 0be304c..d2670af 100644
--- a/ql/src/test/results/clientpositive/llap/vector_windowing_rank.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_windowing_rank.q.out
@@ -364,16 +364,28 @@ STAGE PLANS:
partitionColumnCount: 0
scratchColumnTypeNames: []
Reducer 2
- Execution mode: llap
+ Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- notVectorizedReason: PTF operator: More than 1 argument expression of aggregation function dense_rank
- vectorized: false
+ reduceColumnNullOrder: aaz
+ reduceColumnSortOrder: ++-
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 3
+ dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:int, KEY.reducesinkkey2:string
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint]
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey0 (type: timestamp)
outputColumnNames: _col2, _col7, _col8
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [1, 2, 0]
Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE
PTF Operator
Function definitions:
@@ -395,16 +407,39 @@ STAGE PLANS:
window function: GenericUDAFDenseRankEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
+ PTF Vectorization:
+ className: VectorPTFOperator
+ evaluatorClasses: [VectorPTFEvaluatorDenseRank]
+ functionInputExpressions: [col 1:int]
+ functionNames: [dense_rank]
+ keyInputColumns: [1, 2, 0]
+ native: true
+ nonKeyInputColumns: []
+ orderExpressions: [col 1:int, col 2:string]
+ outputColumns: [3, 1, 2, 0]
+ outputTypes: [int, int, string, timestamp]
+ partitionExpressions: [col 0:timestamp]
+ streamingColumns: [3]
Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col7 (type: string), dense_rank_window_0 (type: int)
outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [2, 3]
Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 100
+ Limit Vectorization:
+ className: VectorLimitOperator
+ native: true
Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/e2142b20/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out b/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out
index b6b6cc2..748dea1 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out
@@ -2011,16 +2011,28 @@ STAGE PLANS:
Map-reduce partition columns: _col2 (type: string)
Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
Reducer 3
- Execution mode: llap
+ Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- notVectorizedReason: PTF operator: More than 1 argument expression of aggregation function rank
- vectorized: false
+ reduceColumnNullOrder: aaz
+ reduceColumnSortOrder: ++-
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 3
+ dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, KEY.reducesinkkey2:int
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint]
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: int)
outputColumnNames: _col1, _col2, _col5
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [1, 0, 2]
Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
@@ -2042,13 +2054,33 @@ STAGE PLANS:
window function: GenericUDAFRankEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
+ PTF Vectorization:
+ className: VectorPTFOperator
+ evaluatorClasses: [VectorPTFEvaluatorRank]
+ functionInputExpressions: [col 1:string]
+ functionNames: [rank]
+ keyInputColumns: [1, 0, 2]
+ native: true
+ nonKeyInputColumns: []
+ orderExpressions: [col 1:string, col 2:int]
+ outputColumns: [3, 1, 0, 2]
+ outputTypes: [int, string, string, int]
+ partitionExpressions: [col 0:string]
+ streamingColumns: [3]
Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 2, 3]
Statistics: Num rows: 26 Data size: 5902 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 26 Data size: 5902 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -4187,7 +4219,7 @@ STAGE PLANS:
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- notVectorizedReason: PTF operator: More than 1 argument expression of aggregation function rank
+ notVectorizedReason: PTF operator: cume_dist not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum]
vectorized: false
Reduce Operator Tree:
Select Operator
@@ -4630,16 +4662,28 @@ STAGE PLANS:
Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col5 (type: int)
Reducer 4
- Execution mode: llap
+ Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- notVectorizedReason: PTF operator: More than 1 argument expression of aggregation function rank
- vectorized: false
+ reduceColumnNullOrder: aa
+ reduceColumnSortOrder: ++
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 3
+ dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, bigint, bigint]
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int)
outputColumnNames: _col1, _col2, _col5
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [1, 0, 2]
Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
@@ -4674,13 +4718,32 @@ STAGE PLANS:
name: sum
window function: GenericUDAFSumLong
window frame: ROWS PRECEDING(MAX)~CURRENT
+ PTF Vectorization:
+ className: VectorPTFOperator
+ evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingLongSum]
+ functionInputExpressions: [col 0:string, col 0:string, col 2:int]
+ functionNames: [rank, dense_rank, sum]
+ keyInputColumns: [1, 0]
+ native: true
+ nonKeyInputColumns: [2]
+ orderExpressions: [col 0:string, col 1:string]
+ outputColumns: [3, 4, 5, 1, 0, 2]
+ outputTypes: [int, int, bigint, string, string, int]
+ streamingColumns: [3, 4, 5]
Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 3, 4, 2, 5]
Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -5634,16 +5697,28 @@ STAGE PLANS:
Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col5 (type: int)
Reducer 5
- Execution mode: llap
+ Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- notVectorizedReason: PTF operator: More than 1 argument expression of aggregation function rank
- vectorized: false
+ reduceColumnNullOrder: aa
+ reduceColumnSortOrder: ++
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 3
+ dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, bigint, bigint]
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int)
outputColumnNames: _col1, _col2, _col5
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [1, 0, 2]
Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
@@ -5678,13 +5753,32 @@ STAGE PLANS:
name: sum
window function: GenericUDAFSumLong
window frame: ROWS PRECEDING(MAX)~CURRENT
+ PTF Vectorization:
+ className: VectorPTFOperator
+ evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingLongSum]
+ functionInputExpressions: [col 0:string, col 0:string, col 2:int]
+ functionNames: [rank, dense_rank, sum]
+ keyInputColumns: [1, 0]
+ native: true
+ nonKeyInputColumns: [2]
+ orderExpressions: [col 0:string, col 1:string]
+ outputColumns: [3, 4, 5, 1, 0, 2]
+ outputTypes: [int, int, bigint, string, string, int]
+ streamingColumns: [3, 4, 5]
Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 3, 4, 2, 5]
Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -5942,16 +6036,28 @@ STAGE PLANS:
Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col5 (type: int)
Reducer 4
- Execution mode: llap
+ Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- notVectorizedReason: PTF operator: More than 1 argument expression of aggregation function rank
- vectorized: false
+ reduceColumnNullOrder: aa
+ reduceColumnSortOrder: ++
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 3
+ dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, bigint, bigint]
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int)
outputColumnNames: _col1, _col2, _col5
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [1, 0, 2]
Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
PTF Operator
Function definitions:
@@ -5986,13 +6092,32 @@ STAGE PLANS:
name: sum
window function: GenericUDAFSumLong
window frame: ROWS PRECEDING(MAX)~CURRENT
+ PTF Vectorization:
+ className: VectorPTFOperator
+ evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingLongSum]
+ functionInputExpressions: [col 0:string, col 0:string, col 2:int]
+ functionNames: [rank, dense_rank, sum]
+ keyInputColumns: [1, 0]
+ native: true
+ nonKeyInputColumns: [2]
+ orderExpressions: [col 0:string, col 1:string]
+ outputColumns: [3, 4, 5, 1, 0, 2]
+ outputTypes: [int, int, bigint, string, string, int]
+ streamingColumns: [3, 4, 5]
Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 3, 4, 2, 5, 5]
Statistics: Num rows: 26 Data size: 6422 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 26 Data size: 6422 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/e2142b20/ql/src/test/results/clientpositive/perf/spark/query47.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query47.q.out b/ql/src/test/results/clientpositive/perf/spark/query47.q.out
index 690b105..a9b5092 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query47.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query47.q.out
@@ -484,6 +484,7 @@ STAGE PLANS:
Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
value expressions: avg_window_0 (type: decimal(21,6)), _col6 (type: decimal(17,2))
Reducer 16
+ Execution mode: vectorized
Reduce Operator Tree:
Select Operator
expressions: VALUE._col0 (type: decimal(21,6)), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col1 (type: decimal(17,2))
@@ -612,6 +613,7 @@ STAGE PLANS:
Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
value expressions: _col6 (type: decimal(17,2))
Reducer 24
+ Execution mode: vectorized
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey4 (type: int), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col0 (type: decimal(17,2))
@@ -701,6 +703,7 @@ STAGE PLANS:
Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE
value expressions: _col6 (type: decimal(17,2))
Reducer 5
+ Execution mode: vectorized
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey4 (type: int), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col0 (type: decimal(17,2))
http://git-wip-us.apache.org/repos/asf/hive/blob/e2142b20/ql/src/test/results/clientpositive/perf/spark/query57.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/spark/query57.q.out b/ql/src/test/results/clientpositive/perf/spark/query57.q.out
index 51e644a..6785ee9 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query57.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query57.q.out
@@ -478,6 +478,7 @@ STAGE PLANS:
Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE
value expressions: avg_window_0 (type: decimal(21,6)), _col5 (type: decimal(17,2))
Reducer 16
+ Execution mode: vectorized
Reduce Operator Tree:
Select Operator
expressions: VALUE._col0 (type: decimal(21,6)), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: decimal(17,2))
@@ -618,6 +619,7 @@ STAGE PLANS:
Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE
value expressions: _col5 (type: decimal(17,2))
Reducer 24
+ Execution mode: vectorized
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col0 (type: decimal(17,2))
@@ -695,6 +697,7 @@ STAGE PLANS:
Statistics: Num rows: 191657247 Data size: 25954241376 Basic stats: COMPLETE Column stats: NONE
value expressions: _col5 (type: decimal(17,2))
Reducer 5
+ Execution mode: vectorized
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col0 (type: decimal(17,2))
http://git-wip-us.apache.org/repos/asf/hive/blob/e2142b20/ql/src/test/results/clientpositive/perf/tez/query47.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query47.q.out b/ql/src/test/results/clientpositive/perf/tez/query47.q.out
index d034ea9..bd17808 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query47.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query47.q.out
@@ -121,40 +121,40 @@ Stage-0
limit:-1
Stage-1
Reducer 8 vectorized
- File Output Operator [FS_320]
- Limit [LIM_319] (rows=100 width=88)
+ File Output Operator [FS_334]
+ Limit [LIM_333] (rows=100 width=88)
Number of rows:100
- Select Operator [SEL_318] (rows=843315280 width=88)
+ Select Operator [SEL_332] (rows=843315280 width=88)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"]
<-Reducer 7 [SIMPLE_EDGE]
SHUFFLE [RS_108]
Select Operator [SEL_107] (rows=843315280 width=88)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"]
Merge Join Operator [MERGEJOIN_279] (rows=843315280 width=88)
- Conds:RS_103._col0, _col1, _col2, _col3, (_col5 + 1)=RS_104._col0, _col1, _col2, _col3, _col8(Inner),RS_104._col0, _col1, _col2, _col3, _col8=RS_105._col0, _col1, _col2, _col3, (_col5 - 1)(Inner),Output:["_col4","_col6","_col10","_col11","_col12","_col13","_col19"]
- <-Reducer 11 [SIMPLE_EDGE]
- SHUFFLE [RS_104]
+ Conds:RS_320._col0, _col1, _col2, _col3, (_col5 + 1)=RS_331._col0, _col1, _col2, _col3, _col8(Inner),RS_331._col0, _col1, _col2, _col3, _col8=RS_315._col0, _col1, _col2, _col3, (_col5 - 1)(Inner),Output:["_col4","_col6","_col10","_col11","_col12","_col13","_col19"]
+ <-Reducer 11 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_331]
PartitionCols:_col0, _col1, _col2, _col3, _col8
- Select Operator [SEL_67] (rows=31943759 width=88)
+ Select Operator [SEL_330] (rows=31943759 width=88)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"]
- Filter Operator [FIL_169] (rows=31943759 width=88)
+ Filter Operator [FIL_329] (rows=31943759 width=88)
predicate:CASE WHEN ((_col0 > 0)) THEN (((abs((_col7 - _col0)) / _col0) > 0.1)) ELSE (null) END
- Select Operator [SEL_66] (rows=63887519 width=88)
+ Select Operator [SEL_328] (rows=63887519 width=88)
Output:["rank_window_1","_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"]
- Filter Operator [FIL_170] (rows=63887519 width=88)
+ Filter Operator [FIL_327] (rows=63887519 width=88)
predicate:((_col0 > 0) and (_col1 = 2000) and rank_window_1 is not null)
- PTF Operator [PTF_65] (rows=383325119 width=88)
+ PTF Operator [PTF_326] (rows=383325119 width=88)
Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST, _col2 ASC NULLS FIRST","partition by:":"_col4, _col3, _col5, _col6"}]
- Select Operator [SEL_64] (rows=383325119 width=88)
+ Select Operator [SEL_325] (rows=383325119 width=88)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"]
<-Reducer 10 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_316]
+ SHUFFLE [RS_324]
PartitionCols:_col3, _col2, _col4, _col5
- Select Operator [SEL_315] (rows=383325119 width=88)
+ Select Operator [SEL_323] (rows=383325119 width=88)
Output:["avg_window_0","_col0","_col1","_col2","_col3","_col4","_col5","_col6"]
- PTF Operator [PTF_314] (rows=383325119 width=88)
+ PTF Operator [PTF_322] (rows=383325119 width=88)
Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 ASC NULLS FIRST, _col2 ASC NULLS FIRST, _col4 ASC NULLS FIRST, _col5 ASC NULLS FIRST, _col0 ASC NULLS FIRST","partition by:":"_col3, _col2, _col4, _col5, _col0"}]
- Select Operator [SEL_313] (rows=383325119 width=88)
+ Select Operator [SEL_321] (rows=383325119 width=88)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"]
<-Reducer 5 [SIMPLE_EDGE] vectorized
SHUFFLE [RS_310]
@@ -247,31 +247,31 @@ Stage-0
Select Operator [SEL_299] (rows=1704 width=1910)
Output:["_col0"]
Please refer to the previous Select Operator [SEL_297]
- <-Reducer 6 [SIMPLE_EDGE]
- SHUFFLE [RS_105]
+ <-Reducer 6 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_315]
PartitionCols:_col0, _col1, _col2, _col3, (_col5 - 1)
- Select Operator [SEL_99] (rows=383325119 width=88)
+ Select Operator [SEL_314] (rows=383325119 width=88)
Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
- Filter Operator [FIL_175] (rows=383325119 width=88)
+ Filter Operator [FIL_313] (rows=383325119 width=88)
predicate:rank_window_0 is not null
- PTF Operator [PTF_98] (rows=383325119 width=88)
+ PTF Operator [PTF_312] (rows=383325119 width=88)
Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col0 ASC NULLS FIRST, _col1 ASC NULLS FIRST","partition by:":"_col3, _col2, _col4, _col5"}]
- Select Operator [SEL_97] (rows=383325119 width=88)
+ Select Operator [SEL_311] (rows=383325119 width=88)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"]
<-Reducer 5 [SIMPLE_EDGE] vectorized
SHUFFLE [RS_308]
PartitionCols:_col3, _col2, _col4, _col5
Please refer to the previous Group By Operator [GBY_307]
- <-Reducer 9 [SIMPLE_EDGE]
- SHUFFLE [RS_103]
+ <-Reducer 9 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_320]
PartitionCols:_col0, _col1, _col2, _col3, (_col5 + 1)
- Select Operator [SEL_29] (rows=383325119 width=88)
+ Select Operator [SEL_319] (rows=383325119 width=88)
Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
- Filter Operator [FIL_164] (rows=383325119 width=88)
+ Filter Operator [FIL_318] (rows=383325119 width=88)
predicate:rank_window_0 is not null
- PTF Operator [PTF_28] (rows=383325119 width=88)
+ PTF Operator [PTF_317] (rows=383325119 width=88)
Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col0 ASC NULLS FIRST, _col1 ASC NULLS FIRST","partition by:":"_col3, _col2, _col4, _col5"}]
- Select Operator [SEL_27] (rows=383325119 width=88)
+ Select Operator [SEL_316] (rows=383325119 width=88)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"]
<-Reducer 5 [SIMPLE_EDGE] vectorized
SHUFFLE [RS_309]
http://git-wip-us.apache.org/repos/asf/hive/blob/e2142b20/ql/src/test/results/clientpositive/perf/tez/query57.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query57.q.out b/ql/src/test/results/clientpositive/perf/tez/query57.q.out
index 42cbbdc..1d3c17d 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query57.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query57.q.out
@@ -115,40 +115,40 @@ Stage-0
limit:-1
Stage-1
Reducer 8 vectorized
- File Output Operator [FS_320]
- Limit [LIM_319] (rows=100 width=135)
+ File Output Operator [FS_334]
+ Limit [LIM_333] (rows=100 width=135)
Number of rows:100
- Select Operator [SEL_318] (rows=421645952 width=135)
+ Select Operator [SEL_332] (rows=421645952 width=135)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"]
<-Reducer 7 [SIMPLE_EDGE]
SHUFFLE [RS_108]
Select Operator [SEL_107] (rows=421645952 width=135)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"]
Merge Join Operator [MERGEJOIN_279] (rows=421645952 width=135)
- Conds:RS_103._col0, _col1, _col2, (_col4 + 1)=RS_104._col0, _col1, _col2, _col7(Inner),RS_104._col0, _col1, _col2, _col7=RS_105._col0, _col1, _col2, (_col4 - 1)(Inner),Output:["_col3","_col5","_col6","_col8","_col9","_col10","_col11","_col16"]
- <-Reducer 11 [SIMPLE_EDGE]
- SHUFFLE [RS_104]
+ Conds:RS_320._col0, _col1, _col2, (_col4 + 1)=RS_331._col0, _col1, _col2, _col7(Inner),RS_331._col0, _col1, _col2, _col7=RS_315._col0, _col1, _col2, (_col4 - 1)(Inner),Output:["_col3","_col5","_col6","_col8","_col9","_col10","_col11","_col16"]
+ <-Reducer 11 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_331]
PartitionCols:_col0, _col1, _col2, _col7
- Select Operator [SEL_67] (rows=15971437 width=135)
+ Select Operator [SEL_330] (rows=15971437 width=135)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"]
- Filter Operator [FIL_169] (rows=15971437 width=135)
+ Filter Operator [FIL_329] (rows=15971437 width=135)
predicate:CASE WHEN ((_col0 > 0)) THEN (((abs((_col6 - _col0)) / _col0) > 0.1)) ELSE (null) END
- Select Operator [SEL_66] (rows=31942874 width=135)
+ Select Operator [SEL_328] (rows=31942874 width=135)
Output:["rank_window_1","_col0","_col1","_col2","_col3","_col4","_col5","_col6"]
- Filter Operator [FIL_170] (rows=31942874 width=135)
+ Filter Operator [FIL_327] (rows=31942874 width=135)
predicate:((_col0 > 0) and (_col1 = 2000) and rank_window_1 is not null)
- PTF Operator [PTF_65] (rows=191657247 width=135)
+ PTF Operator [PTF_326] (rows=191657247 width=135)
Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST, _col2 ASC NULLS FIRST","partition by:":"_col5, _col4, _col3"}]
- Select Operator [SEL_64] (rows=191657247 width=135)
+ Select Operator [SEL_325] (rows=191657247 width=135)
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"]
<-Reducer 10 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_316]
+ SHUFFLE [RS_324]
PartitionCols:_col4, _col3, _col2
- Select Operator [SEL_315] (rows=191657247 width=135)
+ Select Operator [SEL_323] (rows=191657247 width=135)
Output:["avg_window_0","_col0","_col1","_col2","_col3","_col4","_col5"]
- PTF Operator [PTF_314] (rows=191657247 width=135)
+ PTF Operator [PTF_322] (rows=191657247 width=135)
Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col4 ASC NULLS FIRST, _col3 ASC NULLS FIRST, _col2 ASC NULLS FIRST, _col0 ASC NULLS FIRST","partition by:":"_col4, _col3, _col2, _col0"}]
- Select Operator [SEL_313] (rows=191657247 width=135)
+ Select Operator [SEL_321] (rows=191657247 width=135)
Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
<-Reducer 5 [SIMPLE_EDGE] vectorized
SHUFFLE [RS_310]
@@ -241,31 +241,31 @@ Stage-0
Select Operator [SEL_299] (rows=462000 width=1436)
Output:["_col0"]
Please refer to the previous Select Operator [SEL_297]
- <-Reducer 6 [SIMPLE_EDGE]
- SHUFFLE [RS_105]
+ <-Reducer 6 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_315]
PartitionCols:_col0, _col1, _col2, (_col4 - 1)
- Select Operator [SEL_99] (rows=191657247 width=135)
+ Select Operator [SEL_314] (rows=191657247 width=135)
Output:["_col0","_col1","_col2","_col3","_col4"]
- Filter Operator [FIL_175] (rows=191657247 width=135)
+ Filter Operator [FIL_313] (rows=191657247 width=135)
predicate:rank_window_0 is not null
- PTF Operator [PTF_98] (rows=191657247 width=135)
+ PTF Operator [PTF_312] (rows=191657247 width=135)
Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col0 ASC NULLS FIRST, _col1 ASC NULLS FIRST","partition by:":"_col4, _col3, _col2"}]
- Select Operator [SEL_97] (rows=191657247 width=135)
+ Select Operator [SEL_311] (rows=191657247 width=135)
Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
<-Reducer 5 [SIMPLE_EDGE] vectorized
SHUFFLE [RS_308]
PartitionCols:_col4, _col3, _col2
Please refer to the previous Group By Operator [GBY_307]
- <-Reducer 9 [SIMPLE_EDGE]
- SHUFFLE [RS_103]
+ <-Reducer 9 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_320]
PartitionCols:_col0, _col1, _col2, (_col4 + 1)
- Select Operator [SEL_29] (rows=191657247 width=135)
+ Select Operator [SEL_319] (rows=191657247 width=135)
Output:["_col0","_col1","_col2","_col3","_col4"]
- Filter Operator [FIL_164] (rows=191657247 width=135)
+ Filter Operator [FIL_318] (rows=191657247 width=135)
predicate:rank_window_0 is not null
- PTF Operator [PTF_28] (rows=191657247 width=135)
+ PTF Operator [PTF_317] (rows=191657247 width=135)
Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col0 ASC NULLS FIRST, _col1 ASC NULLS FIRST","partition by:":"_col4, _col3, _col2"}]
- Select Operator [SEL_27] (rows=191657247 width=135)
+ Select Operator [SEL_316] (rows=191657247 width=135)
Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
<-Reducer 5 [SIMPLE_EDGE] vectorized
SHUFFLE [RS_309]
http://git-wip-us.apache.org/repos/asf/hive/blob/e2142b20/ql/src/test/results/clientpositive/spark/ptf.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/ptf.q.out b/ql/src/test/results/clientpositive/spark/ptf.q.out
index 62d0942..40ac6a8 100644
--- a/ql/src/test/results/clientpositive/spark/ptf.q.out
+++ b/ql/src/test/results/clientpositive/spark/ptf.q.out
@@ -1403,6 +1403,7 @@ STAGE PLANS:
Map-reduce partition columns: _col2 (type: string)
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
Reducer 3
+ Execution mode: vectorized
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: int)
@@ -3413,6 +3414,7 @@ STAGE PLANS:
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
value expressions: _col5 (type: int)
Reducer 4
+ Execution mode: vectorized
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int)
@@ -4199,6 +4201,7 @@ STAGE PLANS:
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
value expressions: _col5 (type: int)
Reducer 5
+ Execution mode: vectorized
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int)
@@ -4459,6 +4462,7 @@ STAGE PLANS:
Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
value expressions: _col5 (type: int)
Reducer 4
+ Execution mode: vectorized
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int)
http://git-wip-us.apache.org/repos/asf/hive/blob/e2142b20/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out b/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out
index a133aad..0c48310 100644
--- a/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out
@@ -1989,15 +1989,28 @@ STAGE PLANS:
Map-reduce partition columns: _col2 (type: string)
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Reducer 3
+ Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: PTF operator: More than 1 argument expression of aggregation function rank
- vectorized: false
+ reduceColumnNullOrder: aaz
+ reduceColumnSortOrder: ++-
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 3
+ dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, KEY.reducesinkkey2:int
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint]
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: int)
outputColumnNames: _col1, _col2, _col5
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [1, 0, 2]
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
PTF Operator
Function definitions:
@@ -2019,13 +2032,33 @@ STAGE PLANS:
window function: GenericUDAFRankEvaluator
window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
isPivotResult: true
+ PTF Vectorization:
+ className: VectorPTFOperator
+ evaluatorClasses: [VectorPTFEvaluatorRank]
+ functionInputExpressions: [col 1:string]
+ functionNames: [rank]
+ keyInputColumns: [1, 0, 2]
+ native: true
+ nonKeyInputColumns: []
+ orderExpressions: [col 1:string, col 2:int]
+ outputColumns: [3, 1, 0, 2]
+ outputTypes: [int, string, string, int]
+ partitionExpressions: [col 0:string]
+ streamingColumns: [3]
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 2, 3]
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -4097,7 +4130,7 @@ STAGE PLANS:
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: PTF operator: More than 1 argument expression of aggregation function rank
+ notVectorizedReason: PTF operator: cume_dist not in supported functions [avg, count, dense_rank, first_value, last_value, max, min, rank, row_number, sum]
vectorized: false
Reduce Operator Tree:
Select Operator
@@ -4550,15 +4583,28 @@ STAGE PLANS:
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
value expressions: _col5 (type: int)
Reducer 4
+ Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: PTF operator: More than 1 argument expression of aggregation function rank
- vectorized: false
+ reduceColumnNullOrder: aa
+ reduceColumnSortOrder: ++
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 3
+ dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, bigint, bigint]
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int)
outputColumnNames: _col1, _col2, _col5
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [1, 0, 2]
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
PTF Operator
Function definitions:
@@ -4593,13 +4639,32 @@ STAGE PLANS:
name: sum
window function: GenericUDAFSumLong
window frame: ROWS PRECEDING(MAX)~CURRENT
+ PTF Vectorization:
+ className: VectorPTFOperator
+ evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingLongSum]
+ functionInputExpressions: [col 0:string, col 0:string, col 2:int]
+ functionNames: [rank, dense_rank, sum]
+ keyInputColumns: [1, 0]
+ native: true
+ nonKeyInputColumns: [2]
+ orderExpressions: [col 0:string, col 1:string]
+ outputColumns: [3, 4, 5, 1, 0, 2]
+ outputTypes: [int, int, bigint, string, string, int]
+ streamingColumns: [3, 4, 5]
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 3, 4, 2, 5]
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -5539,15 +5604,28 @@ STAGE PLANS:
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
value expressions: _col5 (type: int)
Reducer 5
+ Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: PTF operator: More than 1 argument expression of aggregation function rank
- vectorized: false
+ reduceColumnNullOrder: aa
+ reduceColumnSortOrder: ++
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 3
+ dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, bigint, bigint]
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int)
outputColumnNames: _col1, _col2, _col5
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [1, 0, 2]
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
PTF Operator
Function definitions:
@@ -5582,13 +5660,32 @@ STAGE PLANS:
name: sum
window function: GenericUDAFSumLong
window frame: ROWS PRECEDING(MAX)~CURRENT
+ PTF Vectorization:
+ className: VectorPTFOperator
+ evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingLongSum]
+ functionInputExpressions: [col 0:string, col 0:string, col 2:int]
+ functionNames: [rank, dense_rank, sum]
+ keyInputColumns: [1, 0]
+ native: true
+ nonKeyInputColumns: [2]
+ orderExpressions: [col 0:string, col 1:string]
+ outputColumns: [3, 4, 5, 1, 0, 2]
+ outputTypes: [int, int, bigint, string, string, int]
+ streamingColumns: [3, 4, 5]
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 3, 4, 2, 5]
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -5842,15 +5939,28 @@ STAGE PLANS:
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
value expressions: _col5 (type: int)
Reducer 4
+ Execution mode: vectorized
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true
- notVectorizedReason: PTF operator: More than 1 argument expression of aggregation function rank
- vectorized: false
+ reduceColumnNullOrder: aa
+ reduceColumnSortOrder: ++
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 3
+ dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string, VALUE._col3:int
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, bigint, bigint]
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int)
outputColumnNames: _col1, _col2, _col5
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [1, 0, 2]
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
PTF Operator
Function definitions:
@@ -5885,13 +5995,32 @@ STAGE PLANS:
name: sum
window function: GenericUDAFSumLong
window frame: ROWS PRECEDING(MAX)~CURRENT
+ PTF Vectorization:
+ className: VectorPTFOperator
+ evaluatorClasses: [VectorPTFEvaluatorRank, VectorPTFEvaluatorDenseRank, VectorPTFEvaluatorStreamingLongSum]
+ functionInputExpressions: [col 0:string, col 0:string, col 2:int]
+ functionNames: [rank, dense_rank, sum]
+ keyInputColumns: [1, 0]
+ native: true
+ nonKeyInputColumns: [2]
+ orderExpressions: [col 0:string, col 1:string]
+ outputColumns: [3, 4, 5, 1, 0, 2]
+ outputTypes: [int, int, bigint, string, string, int]
+ streamingColumns: [3, 4, 5]
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 3, 4, 2, 5, 5]
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat