You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2018/01/28 15:58:57 UTC
[2/2] hive git commit: HIVE-18524: Vectorization: Execution failure
related to non-standard embedding of IfExprConditionalFilter inside
VectorUDFAdaptor (Revert HIVE-17139) (Matt McCline)
HIVE-18524: Vectorization: Execution failure related to non-standard embedding of IfExprConditionalFilter inside VectorUDFAdaptor (Revert HIVE-17139) (Matt McCline)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1dd863ab
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1dd863ab
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1dd863ab
Branch: refs/heads/master
Commit: 1dd863ab0bc47115d3c89ed8058967c1496819c6
Parents: 68e7a34
Author: Matt McCline <mm...@hortonworks.com>
Authored: Sun Jan 28 09:54:48 2018 -0600
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Sun Jan 28 09:54:48 2018 -0600
----------------------------------------------------------------------
data/files/student_2_lines | 2 +
.../test/resources/testconfiguration.properties | 1 +
.../vector/expressions/IfExprColumnNull.java | 22 ++-
.../expressions/IfExprConditionalFilter.java | 193 -------------------
.../IfExprDoubleColumnDoubleColumn.java | 21 +-
.../IfExprIntervalDayTimeColumnColumn.java | 21 +-
.../expressions/IfExprLongColumnLongColumn.java | 21 +-
.../vector/expressions/IfExprNullColumn.java | 21 +-
...fExprStringGroupColumnStringGroupColumn.java | 21 +-
.../IfExprTimestampColumnColumnBase.java | 22 ++-
.../ql/exec/vector/udf/VectorUDFAdaptor.java | 16 +-
.../clientpositive/vector_udf_adaptor_1.q | 27 +++
.../queries/clientpositive/vectorized_case.q | 12 +-
.../llap/vector_udf_adaptor_1.q.out | 157 +++++++++++++++
.../clientpositive/llap/vectorized_case.q.out | 162 ++++++++++++++--
.../clientpositive/spark/vectorized_case.q.out | 162 ++++++++++++++--
.../clientpositive/vector_udf_adaptor_1.q.out | 192 ++++++++++++++++++
.../clientpositive/vectorized_case.q.out | 162 ++++++++++++++--
18 files changed, 957 insertions(+), 278 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/1dd863ab/data/files/student_2_lines
----------------------------------------------------------------------
diff --git a/data/files/student_2_lines b/data/files/student_2_lines
new file mode 100644
index 0000000..9e86836
--- /dev/null
+++ b/data/files/student_2_lines
@@ -0,0 +1,2 @@
+tom thompson420.53
+luke king280.47
http://git-wip-us.apache.org/repos/asf/hive/blob/1dd863ab/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 80e6aee..1ce3ba6 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -731,6 +731,7 @@ minillaplocal.query.files=\
vector_number_compare_projection.q,\
vector_partitioned_date_time.q,\
vector_ptf_part_simple.q,\
+ vector_udf_adaptor_1,\
vector_udf1.q,\
vector_windowing.q,\
vector_windowing_expressions.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/1dd863ab/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnNull.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnNull.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnNull.java
index 56312d9..f9b3f76 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnNull.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprColumnNull.java
@@ -22,24 +22,34 @@ import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-public class IfExprColumnNull extends IfExprConditionalFilter {
+public class IfExprColumnNull extends VectorExpression {
private static final long serialVersionUID = 1L;
- public IfExprColumnNull(int arg1Column, int arg2Column, int outputColumn) {
- super(arg1Column, arg2Column, -1, outputColumn);
+ private final int arg1Column;
+ private final int arg2Column;
+
+ public IfExprColumnNull(int arg1Column, int arg2Column, int outputColumnNum) {
+ super(outputColumnNum);
+ this.arg1Column = arg1Column;
+ this.arg2Column = arg2Column;
}
public IfExprColumnNull() {
super();
+
+ // Dummy final assignments.
+ arg1Column = -1;
+ arg2Column = -1;
}
@Override
public void evaluate(VectorizedRowBatch batch) {
if (childExpressions != null) {
- super.evaluateIfConditionalExpr(batch, childExpressions);
+ super.evaluateChildren(batch);
}
+
final LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column];
final ColumnVector arg2ColVector = batch.cols[arg2Column];
final ColumnVector outputColVector = batch.cols[outputColumnNum];
@@ -94,4 +104,8 @@ public class IfExprColumnNull extends IfExprConditionalFilter {
return getColumnParamString(0, arg1Column) + ", " + getColumnParamString(1, arg2Column) + ", null";
}
+ @Override
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+ throw new UnsupportedOperationException("Undefined descriptor");
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/1dd863ab/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprConditionalFilter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprConditionalFilter.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprConditionalFilter.java
deleted file mode 100644
index c17407e..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprConditionalFilter.java
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.exec.vector.expressions;
-
-
-import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-
-/**
- * For conditional expressions, the{@code IfExprConditionalFilter} class updated
- * the selected array of batch parameter after the conditional expression is executed.
- * Then the remaining expression will only do the selected rows instead of all.
- */
-public class IfExprConditionalFilter extends VectorExpression {
- protected int arg1Column = -1;
- protected int arg2Column = -1;
- protected int arg3Column = -1;
- protected int arg2ColumnTmp = -1;
-
- public IfExprConditionalFilter(int arg1Column, int arg2Column, int arg3Column,
- int outputColumnNum) {
- super(outputColumnNum);
- this.arg1Column = arg1Column;
- if(arg2Column == -1){
- this.arg2Column = arg3Column;
- this.arg2ColumnTmp = -1;
- } else{
- this.arg2Column = arg2Column;
- this.arg3Column = arg3Column;
- this.arg2ColumnTmp = arg2Column;
- }
- }
-
- public IfExprConditionalFilter() {
- super();
- }
-
- /**
- * For If(expr1,expr2,expr3) expression,
- * Firstly, save the previous selected vector, size and selectedInUse value of batch.
- * Secondly evaluate the conditional expression and update the selected array of batch based
- * on the result of conditional expression(1 denote done, 0 denote not done)
- * Then evaluate the expr2 based on the updated selected.
- * After the expr2 is executed, remove the indexes which have done in expr2.
- * Last, evaluate the expr3 based on the updated selected.
- *
- * @param batch
- * @param childExpressions the childExpressions need to be evaluated.
- */
- public void evaluateIfConditionalExpr(VectorizedRowBatch batch, VectorExpression[] childExpressions) {
- if (childExpressions != null) {
- // Save the previous selected vector, size and selectedInUse value of batch.
- int[] prevSelected = new int[batch.selected.length];
- int[] prevSelectedFalse = new int[batch.selected.length];
- int prevSize = batch.size;
- boolean prevSelectInUse = batch.selectedInUse;
- if (!batch.selectedInUse) {
- for (int i = 0; i < batch.size; i++) {
- prevSelected[i] = i;
- }
- System.arraycopy(batch.selected, 0, prevSelectedFalse, 0, batch.selected.length);
- System.arraycopy(prevSelected, 0, batch.selected, 0, batch.size);
- } else {
- System.arraycopy(batch.selected, 0, prevSelected, 0, batch.selected.length);
- }
-
- // Evaluate the conditional expression.
- evaluateConditionalExpression(batch, childExpressions[0],
- prevSize, prevSelectInUse);
- if (childExpressions != null && childExpressions.length == 2) {
- // If the length is 2, it has two situations:If(expr1,expr2,null) or
- // If(expr1,null,expr3) distinguished by the indexes.
- if (childExpressions[1].getOutputColumnNum() == arg2ColumnTmp) {
- // Evaluate the expr2 expression.
- childExpressions[1].evaluate(batch);
- } else {
- // Update the selected array of batch to remove the index of being done.
- evaluateSelectedArray(batch, arg1Column, prevSelected, prevSize);
- // If(expr1,null,expr3), if the expr1 is false, expr3 will be evaluated.
- childExpressions[1].evaluate(batch);
- }
- } else if (childExpressions != null && childExpressions.length == 3) {
- // IF(expr1,expr2,expr3). expr1,expr2,expr3 are all the expression.
- // Evaluate the expr2 expression.
- childExpressions[1].evaluate(batch);
- // Update the selected array of batch to remove the index of being done.
- evaluateSelectedArray(batch, arg1Column, prevSelected, prevSize);
- // Evaluate the expr3 expression.
- childExpressions[2].evaluate(batch);
- }
- // When evaluate all the expressions, restore the previous selected
- // vector,size and selectedInUse value of batch.
- batch.size = prevSize;
- batch.selectedInUse = prevSelectInUse;
- if(!prevSelectInUse){
- batch.selected = prevSelectedFalse;
- } else{
- batch.selected = prevSelected;
- }
- }
- }
-
-
- /**
- * Update the selected array of batch based on the conditional expression
- * result, remove the index of being done.
- *
- * @param batch
- * @param num the column num of conditional expression in batch cols
- * @param prevSelected the previous selected array
- */
- private static void evaluateSelectedArray(VectorizedRowBatch batch, int num,
- int[] prevSelected, int prevSize) {
- // Get the result of conditional expression.
- LongColumnVector outputColVector = (LongColumnVector) batch.cols[num];
- long[] flag = outputColVector.vector;
- int newSize = 0;
- // Update the selected array of batch
- for (int j = 0; j < prevSize; j++) {
- if (flag[prevSelected[j]] == 0) {
- batch.selected[newSize++] = prevSelected[j];
- }
- }
- batch.size = newSize;
- batch.selectedInUse = true;
- }
-
- /**
- * Evaluate the conditional expression and update the selected array of batch
- * based on the result of conditional expression.
- *
- * @param batch
- * @param ve the conditional expression need to evaluate
- * @param prevSize the previous batch size
- * @param prevSelectInUse the previous selectInUse
- */
- private static void evaluateConditionalExpression(VectorizedRowBatch batch,
- VectorExpression ve, int prevSize,
- boolean prevSelectInUse) {
- batch.size = prevSize;
- batch.selectedInUse = prevSelectInUse;
- int colNum = ve.getOutputColumnNum();
- // Evaluate the conditional expression.
- ve.evaluate(batch);
- LongColumnVector outputColVector = (LongColumnVector) batch.cols[colNum];
- long[] flag = outputColVector.vector;
- int[] sel = batch.selected;
- int newSize = 0;
- // Update the selected array of the batch based on the conditional expression.
- for (int j = 0; j < batch.size; j++) {
- int k = sel[j];
- if (flag[k] == 1) {
- sel[newSize++] = k;
- }
- }
- if(newSize < batch.size ) {
- batch.size = newSize;
- batch.selectedInUse = true;
- }
- }
-
- @Override
- public void evaluate(VectorizedRowBatch batch) {
-
- }
-
- @Override
- public VectorExpressionDescriptor.Descriptor getDescriptor() {
- throw new UnsupportedOperationException("Undefined descriptor");
- }
-
- @Override
- public String vectorExpressionParameters() {
- return null;
- }
-}
-
http://git-wip-us.apache.org/repos/asf/hive/blob/1dd863ab/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java
index d0a9785..e7d4e4d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprDoubleColumnDoubleColumn.java
@@ -27,23 +27,36 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
* The first is always a boolean (LongColumnVector).
* The second and third are long columns or long expression results.
*/
-public class IfExprDoubleColumnDoubleColumn extends IfExprConditionalFilter {
+public class IfExprDoubleColumnDoubleColumn extends VectorExpression {
private static final long serialVersionUID = 1L;
- public IfExprDoubleColumnDoubleColumn(int arg1Column, int arg2Column, int arg3Column, int outputColumn) {
- super(arg1Column, arg2Column, arg3Column, outputColumn);
+ private final int arg1Column;
+ private final int arg2Column;
+ private final int arg3Column;
+
+ public IfExprDoubleColumnDoubleColumn(int arg1Column, int arg2Column, int arg3Column,
+ int outputColumnNum) {
+ super(outputColumnNum);
+ this.arg1Column = arg1Column;
+ this.arg2Column = arg2Column;
+ this.arg3Column = arg3Column;
}
public IfExprDoubleColumnDoubleColumn() {
super();
+
+ // Dummy final assignments.
+ arg1Column = -1;
+ arg2Column = -1;
+ arg3Column = -1;
}
@Override
public void evaluate(VectorizedRowBatch batch) {
if (childExpressions != null) {
- super.evaluateIfConditionalExpr(batch, childExpressions);
+ super.evaluateChildren(batch);
}
LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column];
http://git-wip-us.apache.org/repos/asf/hive/blob/1dd863ab/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java
index 22a00f6..fa7b2da 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprIntervalDayTimeColumnColumn.java
@@ -27,23 +27,36 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
* The first is always a boolean (LongColumnVector).
* The second and third are long columns or long expression results.
*/
-public class IfExprIntervalDayTimeColumnColumn extends IfExprConditionalFilter {
+public class IfExprIntervalDayTimeColumnColumn extends VectorExpression {
private static final long serialVersionUID = 1L;
- public IfExprIntervalDayTimeColumnColumn(int arg1Column, int arg2Column, int arg3Column, int outputColumn) {
- super(arg1Column, arg2Column, arg3Column, outputColumn);
+ private final int arg1Column;
+ private final int arg2Column;
+ private final int arg3Column;
+
+ public IfExprIntervalDayTimeColumnColumn(int arg1Column, int arg2Column, int arg3Column,
+ int outputColumnNum) {
+ super(outputColumnNum);
+ this.arg1Column = arg1Column;
+ this.arg2Column = arg2Column;
+ this.arg3Column = arg3Column;
}
public IfExprIntervalDayTimeColumnColumn() {
super();
+
+ // Dummy final assignments.
+ arg1Column = -1;
+ arg2Column = -1;
+ arg3Column = -1;
}
@Override
public void evaluate(VectorizedRowBatch batch) {
if (childExpressions != null) {
- super.evaluateIfConditionalExpr(batch, childExpressions);
+ super.evaluateChildren(batch);
}
LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column];
http://git-wip-us.apache.org/repos/asf/hive/blob/1dd863ab/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java
index 71346f0..0c8a2f6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprLongColumnLongColumn.java
@@ -26,23 +26,36 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
* The first is always a boolean (LongColumnVector).
* The second and third are long columns or long expression results.
*/
-public class IfExprLongColumnLongColumn extends IfExprConditionalFilter {
+public class IfExprLongColumnLongColumn extends VectorExpression {
private static final long serialVersionUID = 1L;
- public IfExprLongColumnLongColumn(int arg1Column, int arg2Column, int arg3Column, int outputColumn) {
- super(arg1Column, arg2Column, arg3Column, outputColumn);
+ private final int arg1Column;
+ private final int arg2Column;
+ private final int arg3Column;
+
+ public IfExprLongColumnLongColumn(int arg1Column, int arg2Column, int arg3Column,
+ int outputColumnNum) {
+ super(outputColumnNum);
+ this.arg1Column = arg1Column;
+ this.arg2Column = arg2Column;
+ this.arg3Column = arg3Column;
}
public IfExprLongColumnLongColumn() {
super();
+
+ // Dummy final assignments.
+ arg1Column = -1;
+ arg2Column = -1;
+ arg3Column = -1;
}
@Override
public void evaluate(VectorizedRowBatch batch) {
if (childExpressions != null) {
- super.evaluateIfConditionalExpr(batch, childExpressions);
+ super.evaluateChildren(batch);
}
LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column];
http://git-wip-us.apache.org/repos/asf/hive/blob/1dd863ab/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullColumn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullColumn.java
index 99185a0..85c37f9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullColumn.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprNullColumn.java
@@ -22,23 +22,32 @@ import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-public class IfExprNullColumn extends IfExprConditionalFilter {
+public class IfExprNullColumn extends VectorExpression {
private static final long serialVersionUID = 1L;
- public IfExprNullColumn(int arg1Column, int arg2Column, int outputColumn) {
- super(arg1Column, -1, arg2Column, outputColumn);
+ private final int arg1Column;
+ private final int arg2Column;
+
+ public IfExprNullColumn(int arg1Column, int arg2Column, int outputColumnNum) {
+ super(outputColumnNum);
+ this.arg1Column = arg1Column;
+ this.arg2Column = arg2Column;
}
public IfExprNullColumn() {
super();
+
+ // Dummy final assignments.
+ arg1Column = -1;
+ arg2Column = -1;
}
@Override
public void evaluate(VectorizedRowBatch batch) {
if (childExpressions != null) {
- super.evaluateIfConditionalExpr(batch, childExpressions);
+ super.evaluateChildren(batch);
}
final LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column];
@@ -95,4 +104,8 @@ public class IfExprNullColumn extends IfExprConditionalFilter {
return getColumnParamString(0, arg1Column) + ", null, col "+ arg2Column;
}
+ @Override
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+ throw new UnsupportedOperationException("Undefined descriptor");
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/1dd863ab/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java
index 91c6c91..09aa9ab 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java
@@ -29,23 +29,36 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
* The first is always a boolean (LongColumnVector).
* The second and third are string columns or string expression results.
*/
-public class IfExprStringGroupColumnStringGroupColumn extends IfExprConditionalFilter {
+public class IfExprStringGroupColumnStringGroupColumn extends VectorExpression {
private static final long serialVersionUID = 1L;
- public IfExprStringGroupColumnStringGroupColumn(int arg1Column, int arg2Column, int arg3Column, int outputColumn) {
- super(arg1Column, arg2Column, arg3Column, outputColumn);
+ private final int arg1Column;
+ private final int arg2Column;
+ private final int arg3Column;
+
+ public IfExprStringGroupColumnStringGroupColumn(int arg1Column, int arg2Column, int arg3Column,
+ int outputColumnNum) {
+ super(outputColumnNum);
+ this.arg1Column = arg1Column;
+ this.arg2Column = arg2Column;
+ this.arg3Column = arg3Column;
}
public IfExprStringGroupColumnStringGroupColumn() {
super();
+
+ // Dummy final assignments.
+ arg1Column = -1;
+ arg2Column = -1;
+ arg3Column = -1;
}
@Override
public void evaluate(VectorizedRowBatch batch) {
if (childExpressions != null) {
- super.evaluateIfConditionalExpr(batch, childExpressions);
+ super.evaluateChildren(batch);
}
LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column];
http://git-wip-us.apache.org/repos/asf/hive/blob/1dd863ab/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java
index 690f04c..ee3cd19 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprTimestampColumnColumnBase.java
@@ -26,22 +26,36 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
* The first is always a boolean (LongColumnVector).
* The second and third are long columns or long expression results.
*/
-public abstract class IfExprTimestampColumnColumnBase extends IfExprConditionalFilter {
+public abstract class IfExprTimestampColumnColumnBase extends VectorExpression {
private static final long serialVersionUID = 1L;
- public IfExprTimestampColumnColumnBase(int arg1Column, int arg2Column, int arg3Column, int outputColumn) {
- super(arg1Column, arg2Column, arg3Column, outputColumn);
+ private final int arg1Column;
+ private final int arg2Column;
+ private final int arg3Column;
+
+ public IfExprTimestampColumnColumnBase(int arg1Column, int arg2Column, int arg3Column,
+ int outputColumnNum) {
+ super(outputColumnNum);
+ this.arg1Column = arg1Column;
+ this.arg2Column = arg2Column;
+ this.arg3Column = arg3Column;
}
public IfExprTimestampColumnColumnBase() {
super();
+
+ // Dummy final assignments.
+ arg1Column = -1;
+ arg2Column = -1;
+ arg3Column = -1;
}
@Override
public void evaluate(VectorizedRowBatch batch) {
+
if (childExpressions != null) {
- super.evaluateIfConditionalExpr(batch, childExpressions);
+ super.evaluateChildren(batch);
}
LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column];
http://git-wip-us.apache.org/repos/asf/hive/blob/1dd863ab/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java
index 7326842..a1a1282 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java
@@ -31,11 +31,9 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprConditionalFilter;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIf;
import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.io.HiveCharWritable;
import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
@@ -62,7 +60,6 @@ public class VectorUDFAdaptor extends VectorExpression {
private String resultType;
private VectorUDFArgDesc[] argDescs;
private ExprNodeGenericFuncDesc expr;
- private IfExprConditionalFilter cf;
private transient GenericUDF genericUDF;
private transient GenericUDF.DeferredObject[] deferredChildren;
@@ -105,13 +102,6 @@ public class VectorUDFAdaptor extends VectorExpression {
outputVectorAssignRow.init(outputTypeInfo, outputColumnNum);
genericUDF.initialize(childrenOIs);
- if((GenericUDFIf.class.getName()).equals(genericUDF.getUdfName())){
-
- // UNDONE: This kind of work should be done in VectorizationContext.
- cf = new IfExprConditionalFilter
- (argDescs[0].getColumnNum(), argDescs[1].getColumnNum(),
- argDescs[2].getColumnNum(), outputColumnNum);
- }
// Initialize constant arguments
for (int i = 0; i < argDescs.length; i++) {
@@ -133,11 +123,7 @@ public class VectorUDFAdaptor extends VectorExpression {
}
if (childExpressions != null) {
- if ((GenericUDFIf.class.getName()).equals(genericUDF.getUdfName()) && cf != null) {
- cf.evaluateIfConditionalExpr(batch, childExpressions);
- } else {
- super.evaluateChildren(batch);
- }
+ super.evaluateChildren(batch);
}
int[] sel = batch.selected;
http://git-wip-us.apache.org/repos/asf/hive/blob/1dd863ab/ql/src/test/queries/clientpositive/vector_udf_adaptor_1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_udf_adaptor_1.q b/ql/src/test/queries/clientpositive/vector_udf_adaptor_1.q
new file mode 100644
index 0000000..2eb0a0a
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_udf_adaptor_1.q
@@ -0,0 +1,27 @@
+SET hive.vectorized.execution.enabled=true;
+set hive.fetch.task.conversion=none;
+set hive.stats.column.autogather=false;
+
+create table student_2_lines(
+name string,
+age int,
+gpa double)
+row format delimited
+fields terminated by '\001'
+stored as textfile;
+LOAD DATA LOCAL INPATH '../../data/files/student_2_lines' OVERWRITE INTO TABLE student_2_lines;
+analyze table student_2_lines compute statistics;
+
+create table insert_10_1 (a float, b int, c timestamp, d binary);
+
+explain vectorization detail
+insert overwrite table insert_10_1
+ select cast(gpa as float),
+ age,
+ IF(age>40,cast('2011-01-01 01:01:01' as timestamp),NULL),
+ IF(LENGTH(name)>10,cast(name as binary),NULL) from student_2_lines;
+insert overwrite table insert_10_1
+ select cast(gpa as float),
+ age,
+ IF(age>40,cast('2011-01-01 01:01:01' as timestamp),NULL),
+ IF(LENGTH(name)>10,cast(name as binary),NULL) from student_2_lines;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/1dd863ab/ql/src/test/queries/clientpositive/vectorized_case.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vectorized_case.q b/ql/src/test/queries/clientpositive/vectorized_case.q
index 3c48607..99d7cfc 100644
--- a/ql/src/test/queries/clientpositive/vectorized_case.q
+++ b/ql/src/test/queries/clientpositive/vectorized_case.q
@@ -79,19 +79,19 @@ CREATE TABLE test_1 (member DECIMAL , attr DECIMAL) STORED AS ORC;
INSERT INTO test_1 VALUES (3.0,1.0),(2.0,2.0),(1.0,3.0);
--for length=3
-EXPLAIN
+EXPLAIN VECTORIZATION EXPRESSION
SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1;
SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1;
--for length=2 and the expr2 is null
-EXPLAIN
+EXPLAIN VECTORIZATION EXPRESSION
SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1;
SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1;
--for length=2 and the expr3 is null
-EXPLAIN
+EXPLAIN VECTORIZATION EXPRESSION
SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1;
SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1;
@@ -102,19 +102,19 @@ CREATE TABLE test_2 (member BIGINT, attr BIGINT) STORED AS ORC;
INSERT INTO test_2 VALUES (3,1),(2,2),(1,3);
--for length=3
-EXPLAIN
+EXPLAIN VECTORIZATION EXPRESSION
SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2;
SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2;
--for length=2 and the expression2 is null
-EXPLAIN
+EXPLAIN VECTORIZATION EXPRESSION
SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2;
SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2;
--for length=2 and the expression3 is null
-EXPLAIN
+EXPLAIN VECTORIZATION EXPRESSION
SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2;
SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/1dd863ab/ql/src/test/results/clientpositive/llap/vector_udf_adaptor_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_udf_adaptor_1.q.out b/ql/src/test/results/clientpositive/llap/vector_udf_adaptor_1.q.out
new file mode 100644
index 0000000..a752dfa
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vector_udf_adaptor_1.q.out
@@ -0,0 +1,157 @@
+PREHOOK: query: create table student_2_lines(
+name string,
+age int,
+gpa double)
+row format delimited
+fields terminated by '\001'
+stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@student_2_lines
+POSTHOOK: query: create table student_2_lines(
+name string,
+age int,
+gpa double)
+row format delimited
+fields terminated by '\001'
+stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@student_2_lines
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/student_2_lines' OVERWRITE INTO TABLE student_2_lines
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@student_2_lines
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/student_2_lines' OVERWRITE INTO TABLE student_2_lines
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@student_2_lines
+PREHOOK: query: analyze table student_2_lines compute statistics
+PREHOOK: type: QUERY
+PREHOOK: Input: default@student_2_lines
+PREHOOK: Output: default@student_2_lines
+POSTHOOK: query: analyze table student_2_lines compute statistics
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@student_2_lines
+POSTHOOK: Output: default@student_2_lines
+PREHOOK: query: create table insert_10_1 (a float, b int, c timestamp, d binary)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@insert_10_1
+POSTHOOK: query: create table insert_10_1 (a float, b int, c timestamp, d binary)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@insert_10_1
+PREHOOK: query: explain vectorization detail
+insert overwrite table insert_10_1
+ select cast(gpa as float),
+ age,
+ IF(age>40,cast('2011-01-01 01:01:01' as timestamp),NULL),
+ IF(LENGTH(name)>10,cast(name as binary),NULL) from student_2_lines
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+insert overwrite table insert_10_1
+ select cast(gpa as float),
+ age,
+ IF(age>40,cast('2011-01-01 01:01:01' as timestamp),NULL),
+ IF(LENGTH(name)>10,cast(name as binary),NULL) from student_2_lines
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: student_2_lines
+ Statistics: Num rows: 2 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:name:string, 1:age:int, 2:gpa:double, 3:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
+ Select Operator
+ expressions: UDFToFloat(gpa) (type: float), age (type: int), if((age > 40), 2011-01-01 01:01:01.0, null) (type: timestamp), if((length(name) > 10), CAST( name AS BINARY), null) (type: binary)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [2, 1, 5, 8]
+ selectExpressions: VectorUDFAdaptor(if((age > 40), 2011-01-01 01:01:01.0, null))(children: LongColGreaterLongScalar(col 1:int, val 40) -> 4:boolean) -> 5:timestamp, VectorUDFAdaptor(if((length(name) > 10), CAST( name AS BINARY), null))(children: LongColGreaterLongScalar(col 4:int, val 10)(children: StringLength(col 0:string) -> 4:int) -> 6:boolean, VectorUDFAdaptor(CAST( name AS BINARY)) -> 7:binary) -> 8:binary
+ Statistics: Num rows: 2 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 2 Data size: 392 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.insert_10_1
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled]
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: true
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 3
+ includeColumns: [0, 1, 2]
+ dataColumns: name:string, age:int, gpa:double
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, timestamp, bigint, string, string]
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.insert_10_1
+
+ Stage: Stage-3
+ Stats Work
+ Basic Stats Work:
+
+PREHOOK: query: insert overwrite table insert_10_1
+ select cast(gpa as float),
+ age,
+ IF(age>40,cast('2011-01-01 01:01:01' as timestamp),NULL),
+ IF(LENGTH(name)>10,cast(name as binary),NULL) from student_2_lines
+PREHOOK: type: QUERY
+PREHOOK: Input: default@student_2_lines
+PREHOOK: Output: default@insert_10_1
+POSTHOOK: query: insert overwrite table insert_10_1
+ select cast(gpa as float),
+ age,
+ IF(age>40,cast('2011-01-01 01:01:01' as timestamp),NULL),
+ IF(LENGTH(name)>10,cast(name as binary),NULL) from student_2_lines
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@student_2_lines
+POSTHOOK: Output: default@insert_10_1
+POSTHOOK: Lineage: insert_10_1.a EXPRESSION [(student_2_lines)student_2_lines.FieldSchema(name:gpa, type:double, comment:null), ]
+POSTHOOK: Lineage: insert_10_1.b SIMPLE [(student_2_lines)student_2_lines.FieldSchema(name:age, type:int, comment:null), ]
+POSTHOOK: Lineage: insert_10_1.c EXPRESSION [(student_2_lines)student_2_lines.FieldSchema(name:age, type:int, comment:null), ]
+POSTHOOK: Lineage: insert_10_1.d EXPRESSION [(student_2_lines)student_2_lines.FieldSchema(name:name, type:string, comment:null), ]
http://git-wip-us.apache.org/repos/asf/hive/blob/1dd863ab/ql/src/test/results/clientpositive/llap/vectorized_case.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_case.q.out b/ql/src/test/results/clientpositive/llap/vectorized_case.q.out
index 8dcff32..f56d9ce 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_case.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_case.q.out
@@ -514,12 +514,16 @@ POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@test_1
POSTHOOK: Lineage: test_1.attr SCRIPT []
POSTHOOK: Lineage: test_1.member SCRIPT []
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -534,12 +538,22 @@ STAGE PLANS:
TableScan
alias: test_1
Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
Select Operator
expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE ((attr + 2)) END (type: decimal(11,0))
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [6]
+ selectExpressions: VectorUDFAdaptor(if((member = 1), (attr + 1), (attr + 2)))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0), DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 5:decimal(11,0)) -> 6:decimal(11,0)
Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -547,6 +561,15 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: true
+ vectorized: true
Stage: Stage-0
Fetch Operator
@@ -565,12 +588,16 @@ POSTHOOK: Input: default@test_1
3
4
4
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -585,12 +612,22 @@ STAGE PLANS:
TableScan
alias: test_1
Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
Select Operator
expressions: CASE WHEN ((member = 1)) THEN (1) ELSE ((attr + 2)) END (type: decimal(11,0))
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [5]
+ selectExpressions: VectorUDFAdaptor(if((member = 1), 1, (attr + 2)))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 4:decimal(11,0)) -> 5:decimal(11,0)
Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -598,6 +635,15 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: true
+ vectorized: true
Stage: Stage-0
Fetch Operator
@@ -616,12 +662,16 @@ POSTHOOK: Input: default@test_1
3
4
1
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -636,12 +686,22 @@ STAGE PLANS:
TableScan
alias: test_1
Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
Select Operator
expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE (2) END (type: decimal(11,0))
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [5]
+ selectExpressions: VectorUDFAdaptor(if((member = 1), (attr + 1), 2))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0)) -> 5:decimal(11,0)
Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -649,6 +709,15 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: true
+ vectorized: true
Stage: Stage-0
Fetch Operator
@@ -685,12 +754,16 @@ POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@test_2
POSTHOOK: Lineage: test_2.attr SCRIPT []
POSTHOOK: Lineage: test_2.member SCRIPT []
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -705,12 +778,22 @@ STAGE PLANS:
TableScan
alias: test_2
Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
Select Operator
expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE ((attr + 2)) END (type: bigint)
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [6]
+ selectExpressions: IfExprLongColumnLongColumn(col 3:boolean, col 4:bigint, col 5:bigint)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint, LongColAddLongScalar(col 1:bigint, val 2) -> 5:bigint) -> 6:bigint
Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -718,6 +801,15 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Stage: Stage-0
Fetch Operator
@@ -736,12 +828,16 @@ POSTHOOK: Input: default@test_2
3
4
4
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -756,12 +852,22 @@ STAGE PLANS:
TableScan
alias: test_2
Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
Select Operator
expressions: CASE WHEN ((member = 1)) THEN (null) ELSE ((attr + 2)) END (type: bigint)
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [5]
+ selectExpressions: IfExprNullColumn(col 3:boolean, null, col 4)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 2) -> 4:bigint) -> 5:bigint
Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -769,6 +875,15 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Stage: Stage-0
Fetch Operator
@@ -787,12 +902,16 @@ POSTHOOK: Input: default@test_2
3
4
NULL
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -807,12 +926,22 @@ STAGE PLANS:
TableScan
alias: test_2
Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
Select Operator
expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE (null) END (type: bigint)
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [5]
+ selectExpressions: IfExprColumnNull(col 3:boolean, col 4:bigint, null)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint) -> 5:bigint
Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -820,6 +949,15 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized, llap
LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Stage: Stage-0
Fetch Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/1dd863ab/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorized_case.q.out b/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
index 583e902..c1dd74c 100644
--- a/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
@@ -508,12 +508,16 @@ POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@test_1
POSTHOOK: Lineage: test_1.attr SCRIPT []
POSTHOOK: Lineage: test_1.member SCRIPT []
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -528,18 +532,37 @@ STAGE PLANS:
TableScan
alias: test_1
Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
Select Operator
expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE ((attr + 2)) END (type: decimal(11,0))
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [6]
+ selectExpressions: VectorUDFAdaptor(if((member = 1), (attr + 1), (attr + 2)))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0), DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 5:decimal(11,0)) -> 6:decimal(11,0)
Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: true
+ vectorized: true
Stage: Stage-0
Fetch Operator
@@ -558,12 +581,16 @@ POSTHOOK: Input: default@test_1
3
4
4
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -578,18 +605,37 @@ STAGE PLANS:
TableScan
alias: test_1
Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
Select Operator
expressions: CASE WHEN ((member = 1)) THEN (1) ELSE ((attr + 2)) END (type: decimal(11,0))
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [5]
+ selectExpressions: VectorUDFAdaptor(if((member = 1), 1, (attr + 2)))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 2) -> 4:decimal(11,0)) -> 5:decimal(11,0)
Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: true
+ vectorized: true
Stage: Stage-0
Fetch Operator
@@ -608,12 +654,16 @@ POSTHOOK: Input: default@test_1
3
4
1
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -628,18 +678,37 @@ STAGE PLANS:
TableScan
alias: test_1
Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
Select Operator
expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE (2) END (type: decimal(11,0))
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [5]
+ selectExpressions: VectorUDFAdaptor(if((member = 1), (attr + 1), 2))(children: VectorUDFAdaptor((member = 1)) -> 3:boolean, DecimalColAddDecimalScalar(col 1:decimal(10,0), val 1) -> 4:decimal(11,0)) -> 5:decimal(11,0)
Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: true
+ vectorized: true
Stage: Stage-0
Fetch Operator
@@ -676,12 +745,16 @@ POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@test_2
POSTHOOK: Lineage: test_2.attr SCRIPT []
POSTHOOK: Lineage: test_2.member SCRIPT []
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -696,18 +769,37 @@ STAGE PLANS:
TableScan
alias: test_2
Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
Select Operator
expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE ((attr + 2)) END (type: bigint)
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [6]
+ selectExpressions: IfExprLongColumnLongColumn(col 3:boolean, col 4:bigint, col 5:bigint)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint, LongColAddLongScalar(col 1:bigint, val 2) -> 5:bigint) -> 6:bigint
Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Stage: Stage-0
Fetch Operator
@@ -726,12 +818,16 @@ POSTHOOK: Input: default@test_2
3
4
4
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -746,18 +842,37 @@ STAGE PLANS:
TableScan
alias: test_2
Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
Select Operator
expressions: CASE WHEN ((member = 1)) THEN (null) ELSE ((attr + 2)) END (type: bigint)
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [5]
+ selectExpressions: IfExprNullColumn(col 3:boolean, null, col 4)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 2) -> 4:bigint) -> 5:bigint
Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Stage: Stage-0
Fetch Operator
@@ -776,12 +891,16 @@ POSTHOOK: Input: default@test_2
3
4
NULL
-PREHOOK: query: EXPLAIN
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2
POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -796,18 +915,37 @@ STAGE PLANS:
TableScan
alias: test_2
Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
Select Operator
expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE (null) END (type: bigint)
outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [5]
+ selectExpressions: IfExprColumnNull(col 3:boolean, col 4:bigint, null)(children: LongColEqualLongScalar(col 0:bigint, val 1) -> 3:boolean, LongColAddLongScalar(col 1:bigint, val 1) -> 4:bigint) -> 5:bigint
Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: []
+ featureSupportInUse: []
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
Stage: Stage-0
Fetch Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/1dd863ab/ql/src/test/results/clientpositive/vector_udf_adaptor_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_udf_adaptor_1.q.out b/ql/src/test/results/clientpositive/vector_udf_adaptor_1.q.out
new file mode 100644
index 0000000..6efcd8c
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_udf_adaptor_1.q.out
@@ -0,0 +1,192 @@
+PREHOOK: query: create table student_2_lines(
+name string,
+age int,
+gpa double)
+row format delimited
+fields terminated by '\001'
+stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@student_2_lines
+POSTHOOK: query: create table student_2_lines(
+name string,
+age int,
+gpa double)
+row format delimited
+fields terminated by '\001'
+stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@student_2_lines
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/student_2_lines' OVERWRITE INTO TABLE student_2_lines
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@student_2_lines
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/student_2_lines' OVERWRITE INTO TABLE student_2_lines
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@student_2_lines
+PREHOOK: query: analyze table student_2_lines compute statistics
+PREHOOK: type: QUERY
+PREHOOK: Input: default@student_2_lines
+PREHOOK: Output: default@student_2_lines
+POSTHOOK: query: analyze table student_2_lines compute statistics
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@student_2_lines
+POSTHOOK: Output: default@student_2_lines
+PREHOOK: query: create table insert_10_1 (a float, b int, c timestamp, d binary)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@insert_10_1
+POSTHOOK: query: create table insert_10_1 (a float, b int, c timestamp, d binary)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@insert_10_1
+PREHOOK: query: explain vectorization detail
+insert overwrite table insert_10_1
+ select cast(gpa as float),
+ age,
+ IF(age>40,cast('2011-01-01 01:01:01' as timestamp),NULL),
+ IF(LENGTH(name)>10,cast(name as binary),NULL) from student_2_lines
+PREHOOK: type: QUERY
+POSTHOOK: query: explain vectorization detail
+insert overwrite table insert_10_1
+ select cast(gpa as float),
+ age,
+ IF(age>40,cast('2011-01-01 01:01:01' as timestamp),NULL),
+ IF(LENGTH(name)>10,cast(name as binary),NULL) from student_2_lines
+POSTHOOK: type: QUERY
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5
+ Stage-4
+ Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
+ Stage-2 depends on stages: Stage-0
+ Stage-3
+ Stage-5
+ Stage-6 depends on stages: Stage-5
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: student_2_lines
+ Statistics: Num rows: 2 Data size: 37 Basic stats: COMPLETE Column stats: NONE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:name:string, 1:age:int, 2:gpa:double, 3:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
+ Select Operator
+ expressions: UDFToFloat(gpa) (type: float), age (type: int), if((age > 40), 2011-01-01 01:01:01.0, null) (type: timestamp), if((length(name) > 10), CAST( name AS BINARY), null) (type: binary)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [2, 1, 5, 8]
+ selectExpressions: VectorUDFAdaptor(if((age > 40), 2011-01-01 01:01:01.0, null))(children: LongColGreaterLongScalar(col 1:int, val 40) -> 4:boolean) -> 5:timestamp, VectorUDFAdaptor(if((length(name) > 10), CAST( name AS BINARY), null))(children: LongColGreaterLongScalar(col 4:int, val 10)(children: StringLength(col 0:string) -> 4:int) -> 6:boolean, VectorUDFAdaptor(CAST( name AS BINARY)) -> 7:binary) -> 8:binary
+ Statistics: Num rows: 2 Data size: 37 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 2 Data size: 37 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.insert_10_1
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: true
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 3
+ includeColumns: [0, 1, 2]
+ dataColumns: name:string, age:int, gpa:double
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, timestamp, bigint, string, string]
+
+ Stage: Stage-7
+ Conditional Operator
+
+ Stage: Stage-4
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.insert_10_1
+
+ Stage: Stage-2
+ Stats Work
+ Basic Stats Work:
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.insert_10_1
+
+ Stage: Stage-5
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.insert_10_1
+
+ Stage: Stage-6
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
+PREHOOK: query: insert overwrite table insert_10_1
+ select cast(gpa as float),
+ age,
+ IF(age>40,cast('2011-01-01 01:01:01' as timestamp),NULL),
+ IF(LENGTH(name)>10,cast(name as binary),NULL) from student_2_lines
+PREHOOK: type: QUERY
+PREHOOK: Input: default@student_2_lines
+PREHOOK: Output: default@insert_10_1
+POSTHOOK: query: insert overwrite table insert_10_1
+ select cast(gpa as float),
+ age,
+ IF(age>40,cast('2011-01-01 01:01:01' as timestamp),NULL),
+ IF(LENGTH(name)>10,cast(name as binary),NULL) from student_2_lines
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@student_2_lines
+POSTHOOK: Output: default@insert_10_1
+POSTHOOK: Lineage: insert_10_1.a EXPRESSION [(student_2_lines)student_2_lines.FieldSchema(name:gpa, type:double, comment:null), ]
+POSTHOOK: Lineage: insert_10_1.b SIMPLE [(student_2_lines)student_2_lines.FieldSchema(name:age, type:int, comment:null), ]
+POSTHOOK: Lineage: insert_10_1.c EXPRESSION [(student_2_lines)student_2_lines.FieldSchema(name:age, type:int, comment:null), ]
+POSTHOOK: Lineage: insert_10_1.d EXPRESSION [(student_2_lines)student_2_lines.FieldSchema(name:name, type:string, comment:null), ]