You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by gu...@apache.org on 2015/08/08 01:54:41 UTC
hive git commit: HIVE-11448: Support vectorization of Multi-OR and
Multi-AND (Matt McCline, reviewed by Gopal V)
Repository: hive
Updated Branches:
refs/heads/master b6f1dfacc -> dc4777ce5
HIVE-11448: Support vectorization of Multi-OR and Multi-AND (Matt McCline, reviewed by Gopal V)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/dc4777ce
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/dc4777ce
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/dc4777ce
Branch: refs/heads/master
Commit: dc4777ce56f99a7afd96b4b0df9085266604254e
Parents: b6f1dfa
Author: Gunther Hagleitner <gu...@apache.org>
Authored: Fri Aug 7 16:50:02 2015 -0700
Committer: Gunther Hagleitner <gu...@apache.org>
Committed: Fri Aug 7 16:50:02 2015 -0700
----------------------------------------------------------------------
.../ql/exec/vector/VectorizationContext.java | 51 +++-
.../vector/expressions/FilterExprAndExpr.java | 8 +-
.../vector/expressions/FilterExprOrExpr.java | 140 ++++++++-
.../exec/vector/TestVectorizationContext.java | 78 +++++
.../TestVectorLogicalExpressions.java | 282 +++++++++++++++++++
5 files changed, 539 insertions(+), 20 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/dc4777ce/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index 8fbf064..5b702bb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -46,6 +46,7 @@ import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
import org.apache.hadoop.hive.ql.exec.FunctionInfo;
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.exec.UDF;
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.InputExpressionType;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Mode;
import org.apache.hadoop.hive.ql.exec.vector.expressions.*;
@@ -114,6 +115,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.util.StringUtils;
import org.apache.hive.common.util.DateUtils;
@@ -956,9 +958,43 @@ public class VectorizationContext {
return expr;
}
- private VectorExpression getVectorExpressionForUdf(Class<?> udf, List<ExprNodeDesc> childExpr, Mode mode,
+ private VectorExpression getVectorExpressionForUdf(GenericUDF genericeUdf,
+ Class<?> udfClass, List<ExprNodeDesc> childExpr, Mode mode,
TypeInfo returnType) throws HiveException {
+
int numChildren = (childExpr == null) ? 0 : childExpr.size();
+
+ if (numChildren > 2 && genericeUdf != null && mode == Mode.FILTER &&
+ ((genericeUdf instanceof GenericUDFOPOr) || (genericeUdf instanceof GenericUDFOPAnd))) {
+
+ // Special case handling for Multi-OR and Multi-AND.
+
+ for (int i = 0; i < numChildren; i++) {
+ ExprNodeDesc child = childExpr.get(i);
+ String childTypeString = child.getTypeString();
+ if (childTypeString == null) {
+ throw new HiveException("Null child type name string");
+ }
+ TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(childTypeString);
+ Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo);
+ if (columnVectorType != ColumnVector.Type.LONG){
+ return null;
+ }
+ if (!(child instanceof ExprNodeGenericFuncDesc) && !(child instanceof ExprNodeColumnDesc)) {
+ return null;
+ }
+ }
+ Class<?> vclass;
+ if (genericeUdf instanceof GenericUDFOPOr) {
+ vclass = FilterExprOrExpr.class;
+ } else if (genericeUdf instanceof GenericUDFOPAnd) {
+ vclass = FilterExprAndExpr.class;
+ } else {
+ throw new RuntimeException("Unexpected multi-child UDF");
+ }
+ Mode childrenMode = getChildrenMode(mode, udfClass);
+ return createVectorExpression(vclass, childExpr, childrenMode, returnType);
+ }
if (numChildren > VectorExpressionDescriptor.MAX_NUM_ARGUMENTS) {
return null;
}
@@ -985,14 +1021,14 @@ public class VectorizationContext {
}
}
VectorExpressionDescriptor.Descriptor descriptor = builder.build();
- Class<?> vclass = this.vMap.getVectorExpressionClass(udf, descriptor);
+ Class<?> vclass = this.vMap.getVectorExpressionClass(udfClass, descriptor);
if (vclass == null) {
if (LOG.isDebugEnabled()) {
- LOG.debug("No vector udf found for "+udf.getSimpleName() + ", descriptor: "+descriptor);
+ LOG.debug("No vector udf found for "+udfClass.getSimpleName() + ", descriptor: "+descriptor);
}
return null;
}
- Mode childrenMode = getChildrenMode(mode, udf);
+ Mode childrenMode = getChildrenMode(mode, udfClass);
return createVectorExpression(vclass, childExpr, childrenMode, returnType);
}
@@ -1157,11 +1193,14 @@ public class VectorizationContext {
}
// Now do a general lookup
Class<?> udfClass = udf.getClass();
+ boolean isSubstituted = false;
if (udf instanceof GenericUDFBridge) {
udfClass = ((GenericUDFBridge) udf).getUdfClass();
+ isSubstituted = true;
}
- VectorExpression ve = getVectorExpressionForUdf(udfClass, castedChildren, mode, returnType);
+ VectorExpression ve = getVectorExpressionForUdf((!isSubstituted ? udf : null),
+ udfClass, castedChildren, mode, returnType);
if (ve == null) {
throw new HiveException("Udf: "+udf.getClass().getSimpleName()+", is not supported");
@@ -1172,7 +1211,7 @@ public class VectorizationContext {
private VectorExpression getCastToTimestamp(GenericUDFTimestamp udf,
List<ExprNodeDesc> childExpr, Mode mode, TypeInfo returnType) throws HiveException {
- VectorExpression ve = getVectorExpressionForUdf(udf.getClass(), childExpr, mode, returnType);
+ VectorExpression ve = getVectorExpressionForUdf(udf, udf.getClass(), childExpr, mode, returnType);
// Replace with the milliseconds conversion
if (!udf.isIntToTimestampInSeconds() && ve instanceof CastLongToTimestampViaLongToLong) {
http://git-wip-us.apache.org/repos/asf/hive/blob/dc4777ce/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprAndExpr.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprAndExpr.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprAndExpr.java
index 1f14e42..41e3b0f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprAndExpr.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprAndExpr.java
@@ -35,7 +35,9 @@ public class FilterExprAndExpr extends VectorExpression {
@Override
public void evaluate(VectorizedRowBatch batch) {
childExpressions[0].evaluate(batch);
- childExpressions[1].evaluate(batch);
+ for (int childIndex = 1; childIndex < childExpressions.length; childIndex++) {
+ childExpressions[childIndex].evaluate(batch);
+ }
}
@Override
@@ -50,6 +52,10 @@ public class FilterExprAndExpr extends VectorExpression {
@Override
public VectorExpressionDescriptor.Descriptor getDescriptor() {
+
+ // IMPORTANT NOTE: For Multi-AND, the VectorizationContext class will catch cases with 3 or
+ // more parameters...
+
return (new VectorExpressionDescriptor.Builder())
.setMode(
VectorExpressionDescriptor.Mode.FILTER)
http://git-wip-us.apache.org/repos/asf/hive/blob/dc4777ce/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java
index f14307e..dc5139d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import com.google.common.base.Preconditions;
+
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
@@ -28,12 +30,88 @@ public class FilterExprOrExpr extends VectorExpression {
private static final long serialVersionUID = 1L;
private transient final int[] initialSelected = new int[VectorizedRowBatch.DEFAULT_SIZE];
private transient int[] unselected = new int[VectorizedRowBatch.DEFAULT_SIZE];
+ private transient int[] unselectedCopy = new int[VectorizedRowBatch.DEFAULT_SIZE];
+ private transient int[] difference = new int[VectorizedRowBatch.DEFAULT_SIZE];
private transient final int[] tmp = new int[VectorizedRowBatch.DEFAULT_SIZE];
public FilterExprOrExpr() {
super();
}
+ /**
+ * Remove (subtract) members from an array and produce the results into
+ * a difference array.
+
+ * @param all
+ * The selected array containing all members.
+ * @param allSize
+ * The size of all.
+ * @param remove
+ * The indices to remove. They must all be present in input selected array.
+ * @param removeSize
+ * The size of remove.
+ * @param difference
+ * The resulting difference -- the all array indices not in the
+ * remove array.
+ * @return
+ * The resulting size of the difference array.
+ */
+ private int subtract(int[] all, int allSize,
+ int[] remove, int removeSize, int[] difference) {
+
+ // UNDONE: Copied from VectorMapJoinOuterGenerateResultOperator.
+
+ Preconditions.checkState((all != remove) && (remove != difference) && (difference != all));
+
+ // Comment out these checks when we are happy..
+ if (!verifyMonotonicallyIncreasing(all, allSize)) {
+ throw new RuntimeException("all is not in sort order and unique");
+ }
+ if (!verifyMonotonicallyIncreasing(remove, removeSize)) {
+ throw new RuntimeException("remove is not in sort order and unique");
+ }
+
+ int differenceCount = 0;
+
+ // Determine which rows are left.
+ int removeIndex = 0;
+ for (int i = 0; i < allSize; i++) {
+ int candidateIndex = all[i];
+ if (removeIndex < removeSize && candidateIndex == remove[removeIndex]) {
+ removeIndex++;
+ } else {
+ difference[differenceCount++] = candidateIndex;
+ }
+ }
+
+ if (removeIndex != removeSize) {
+ throw new RuntimeException("Not all batch indices removed");
+ }
+
+ if (!verifyMonotonicallyIncreasing(difference, differenceCount)) {
+ throw new RuntimeException("difference is not in sort order and unique");
+ }
+
+ return differenceCount;
+ }
+
+ public boolean verifyMonotonicallyIncreasing(int[] selected, int size) {
+
+ if (size == 0) {
+ return true;
+ }
+ int prevBatchIndex = selected[0];
+
+ for (int i = 1; i < size; i++) {
+ int batchIndex = selected[i];
+ if (batchIndex <= prevBatchIndex) {
+ return false;
+ }
+ prevBatchIndex = batchIndex;
+ }
+ return true;
+ }
+
@Override
public void evaluate(VectorizedRowBatch batch) {
int n = batch.size;
@@ -42,7 +120,6 @@ public class FilterExprOrExpr extends VectorExpression {
}
VectorExpression childExpr1 = this.childExpressions[0];
- VectorExpression childExpr2 = this.childExpressions[1];
boolean prevSelectInUse = batch.selectedInUse;
@@ -80,17 +157,55 @@ public class FilterExprOrExpr extends VectorExpression {
}
}
- // Evaluate second child expression over unselected ones only.
+ int newSize = sizeAfterFirstChild;
+
batch.selected = unselected;
batch.size = unselectedSize;
- childExpr2.evaluate(batch);
-
- // Merge the result of last evaluate to previous evaluate.
- int newSize = batch.size + sizeAfterFirstChild;
- for (int i = 0; i < batch.size; i++) {
- tmp[batch.selected[i]] = 1;
+ if (unselectedSize > 0) {
+
+ // Evaluate subsequent child expression over unselected ones only.
+
+ final int childrenCount = this.childExpressions.length;
+ int childIndex = 1;
+ while (true) {
+
+ boolean isLastChild = (childIndex + 1 >= childrenCount);
+
+ // When we have yet another child beyond the current one... save unselected.
+ if (!isLastChild) {
+ System.arraycopy(batch.selected, 0, unselectedCopy, 0, unselectedSize);
+ }
+
+ VectorExpression childExpr = this.childExpressions[childIndex];
+
+ childExpr.evaluate(batch);
+
+ // Merge the result of last evaluate to previous evaluate.
+ newSize += batch.size;
+ for (int i = 0; i < batch.size; i++) {
+ tmp[batch.selected[i]] = 1;
+ }
+
+ if (isLastChild) {
+ break;
+ }
+
+ unselectedSize = subtract(unselectedCopy, unselectedSize, batch.selected, batch.size,
+ difference);
+ if (unselectedSize == 0) {
+ break;
+ }
+ System.arraycopy(difference, 0, batch.selected, 0, unselectedSize);
+ batch.size = unselectedSize;
+
+ childIndex++;
+ }
}
+
+ // Important: Restore the batch's selected array.
+ batch.selected = selectedAfterFirstChild;
+
int k = 0;
for (int j = 0; j < n; j++) {
int i = initialSelected[j];
@@ -99,16 +214,11 @@ public class FilterExprOrExpr extends VectorExpression {
}
}
-
batch.size = newSize;
if (newSize == n) {
// Filter didn't do anything
batch.selectedInUse = prevSelectInUse;
}
-
- // unselected array is taken away by the row batch
- // so take the row batch's original one.
- unselected = selectedAfterFirstChild;
}
@Override
@@ -123,6 +233,10 @@ public class FilterExprOrExpr extends VectorExpression {
@Override
public VectorExpressionDescriptor.Descriptor getDescriptor() {
+
+ // IMPORTANT NOTE: For Multi-OR, the VectorizationContext class will catch cases with 3 or
+ // more parameters...
+
return (new VectorExpressionDescriptor.Builder())
.setMode(
VectorExpressionDescriptor.Mode.FILTER)
http://git-wip-us.apache.org/repos/asf/hive/blob/dc4777ce/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java
index 1a77033..98a8c3e 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java
@@ -503,6 +503,84 @@ public class TestVectorizationContext {
}
@Test
+ public void testVectorizeFilterMultiAndOrExpression() throws HiveException {
+ ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Integer.class, "col1", "table", false);
+ ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc(new Integer(10));
+
+ GenericUDFOPGreaterThan udf = new GenericUDFOPGreaterThan();
+ ExprNodeGenericFuncDesc greaterExprDesc = new ExprNodeGenericFuncDesc();
+ greaterExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo);
+ greaterExprDesc.setGenericUDF(udf);
+ List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>(2);
+ children1.add(col1Expr);
+ children1.add(constDesc);
+ greaterExprDesc.setChildren(children1);
+
+ ExprNodeColumnDesc col2Expr = new ExprNodeColumnDesc(Float.class, "col2", "table", false);
+ ExprNodeConstantDesc const2Desc = new ExprNodeConstantDesc(new Float(1.0));
+
+ GenericUDFOPLessThan udf2 = new GenericUDFOPLessThan();
+ ExprNodeGenericFuncDesc lessExprDesc = new ExprNodeGenericFuncDesc();
+ lessExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo);
+ lessExprDesc.setGenericUDF(udf2);
+ List<ExprNodeDesc> children2 = new ArrayList<ExprNodeDesc>(2);
+ children2.add(col2Expr);
+ children2.add(const2Desc);
+ lessExprDesc.setChildren(children2);
+
+ ExprNodeColumnDesc col3Expr = new ExprNodeColumnDesc(Integer.class, "col3", "table", false);
+ ExprNodeConstantDesc const3Desc = new ExprNodeConstantDesc(new Integer(10));
+
+ GenericUDFOPGreaterThan udf3 = new GenericUDFOPGreaterThan();
+ ExprNodeGenericFuncDesc greaterExprDesc3 = new ExprNodeGenericFuncDesc();
+ greaterExprDesc3.setTypeInfo(TypeInfoFactory.booleanTypeInfo);
+ greaterExprDesc3.setGenericUDF(udf3);
+ List<ExprNodeDesc> children3 = new ArrayList<ExprNodeDesc>(2);
+ children3.add(col3Expr);
+ children3.add(const3Desc);
+ greaterExprDesc3.setChildren(children3);
+
+ GenericUDFOPAnd andUdf = new GenericUDFOPAnd();
+ ExprNodeGenericFuncDesc andExprDesc = new ExprNodeGenericFuncDesc();
+ andExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo);
+ andExprDesc.setGenericUDF(andUdf);
+ List<ExprNodeDesc> children4 = new ArrayList<ExprNodeDesc>(2);
+ children4.add(greaterExprDesc);
+ children4.add(lessExprDesc);
+ children4.add(greaterExprDesc3);
+ andExprDesc.setChildren(children4);
+
+ List<String> columns = new ArrayList<String>();
+ columns.add("col0");
+ columns.add("col1");
+ columns.add("col2");
+ columns.add("col3");
+ VectorizationContext vc = new VectorizationContext("name", columns);
+
+ VectorExpression ve = vc.getVectorExpression(andExprDesc, VectorExpressionDescriptor.Mode.FILTER);
+
+ assertEquals(ve.getClass(), FilterExprAndExpr.class);
+ assertEquals(ve.getChildExpressions()[0].getClass(), FilterLongColGreaterLongScalar.class);
+ assertEquals(ve.getChildExpressions()[1].getClass(), FilterDoubleColLessDoubleScalar.class);
+ assertEquals(ve.getChildExpressions()[2].getClass(), FilterLongColGreaterLongScalar.class);
+
+ GenericUDFOPOr orUdf = new GenericUDFOPOr();
+ ExprNodeGenericFuncDesc orExprDesc = new ExprNodeGenericFuncDesc();
+ orExprDesc.setTypeInfo(TypeInfoFactory.booleanTypeInfo);
+ orExprDesc.setGenericUDF(orUdf);
+ List<ExprNodeDesc> children5 = new ArrayList<ExprNodeDesc>(2);
+ children5.add(greaterExprDesc);
+ children5.add(lessExprDesc);
+ children5.add(greaterExprDesc3);
+ orExprDesc.setChildren(children5);
+ VectorExpression veOr = vc.getVectorExpression(orExprDesc, VectorExpressionDescriptor.Mode.FILTER);
+ assertEquals(veOr.getClass(), FilterExprOrExpr.class);
+ assertEquals(veOr.getChildExpressions()[0].getClass(), FilterLongColGreaterLongScalar.class);
+ assertEquals(veOr.getChildExpressions()[1].getClass(), FilterDoubleColLessDoubleScalar.class);
+ assertEquals(ve.getChildExpressions()[2].getClass(), FilterLongColGreaterLongScalar.class);
+ }
+
+ @Test
public void testVectorizeAndOrProjectionExpression() throws HiveException {
ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Integer.class, "col1", "table", false);
ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc(new Integer(10));
http://git-wip-us.apache.org/repos/asf/hive/blob/dc4777ce/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java
index 60e20a3..7d54a9c 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java
@@ -18,11 +18,14 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertEquals;
+
import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Descriptor;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.junit.Assert;
import org.junit.Test;
@@ -399,6 +402,157 @@ public class TestVectorLogicalExpressions {
Assert.assertEquals(initialSize, batch.size);
}
+ // A do nothing vectorized expression that passes all rows through.
+ public class SelectColumnAll extends VectorExpression {
+ private static final long serialVersionUID = 1L;
+ private int colNum1;
+
+ public SelectColumnAll(int colNum1) {
+ this();
+ this.colNum1 = colNum1;
+ }
+
+ public SelectColumnAll() {
+ super();
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) {
+
+ if (childExpressions != null) {
+ super.evaluateChildren(batch);
+ }
+
+ // Do nothing.
+ }
+
+ @Override
+ public int getOutputColumn() {
+ // TODO Auto-generated method stub
+ return 0;
+ }
+
+ @Override
+ public Descriptor getDescriptor() {
+ // TODO Auto-generated method stub
+ return null;
+ }
+ }
+
+ // A vectorized expression that we don't expect will be called due to short-circuit evaluation.
+ public class SelectColumnNotExpected extends VectorExpression {
+ private static final long serialVersionUID = 1L;
+ private int colNum1;
+
+ public SelectColumnNotExpected(int colNum1) {
+ this();
+ this.colNum1 = colNum1;
+ }
+
+ public SelectColumnNotExpected() {
+ super();
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) {
+
+ if (childExpressions != null) {
+ super.evaluateChildren(batch);
+ }
+
+ assertFalse(true);
+ }
+
+ @Override
+ public int getOutputColumn() {
+ // TODO Auto-generated method stub
+ return 0;
+ }
+
+ @Override
+ public Descriptor getDescriptor() {
+ // TODO Auto-generated method stub
+ return null;
+ }
+ }
+
+ // A vectorized expression that selects no rows.
+ public class SelectColumnNothing extends VectorExpression {
+ private static final long serialVersionUID = 1L;
+ private int colNum1;
+
+ public SelectColumnNothing(int colNum1) {
+ this();
+ this.colNum1 = colNum1;
+ }
+
+ public SelectColumnNothing() {
+ super();
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) {
+
+ if (childExpressions != null) {
+ super.evaluateChildren(batch);
+ }
+
+ batch.size = 0;
+ }
+
+ @Override
+ public int getOutputColumn() {
+ // TODO Auto-generated method stub
+ return 0;
+ }
+
+ @Override
+ public Descriptor getDescriptor() {
+ // TODO Auto-generated method stub
+ return null;
+ }
+ }
+
+ // A vectorized expression that selects no rows.
+ public class SelectColumnOne extends VectorExpression {
+ private static final long serialVersionUID = 1L;
+ private int colNum1;
+ private int batchIndex;
+
+ public SelectColumnOne(int colNum1, int batchIndex) {
+ this();
+ this.colNum1 = colNum1;
+ this.batchIndex = batchIndex;
+ }
+
+ public SelectColumnOne() {
+ super();
+ }
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) {
+
+ if (childExpressions != null) {
+ super.evaluateChildren(batch);
+ }
+
+ batch.selected[0] = batchIndex;
+ batch.size = 1;
+ }
+
+ @Override
+ public int getOutputColumn() {
+ // TODO Auto-generated method stub
+ return 0;
+ }
+
+ @Override
+ public Descriptor getDescriptor() {
+ // TODO Auto-generated method stub
+ return null;
+ }
+ }
+
@Test
public void testFilterExprOrExpr() {
VectorizedRowBatch batch1 = getBatchThreeBooleanCols();
@@ -441,6 +595,134 @@ public class TestVectorLogicalExpressions {
}
@Test
+ public void testFilterExprMultiOrExpr() {
+
+ // Select all with the first expression and expect the other 2 children to not be invoked.
+
+ VectorizedRowBatch batch1a = getBatchThreeBooleanCols();
+
+ SelectColumnAll expr1a = new SelectColumnAll(0);
+ SelectColumnNotExpected expr2a = new SelectColumnNotExpected(1);
+ SelectColumnNotExpected expr3a = new SelectColumnNotExpected(1);
+
+ FilterExprOrExpr orExpr = new FilterExprOrExpr();
+ orExpr.setChildExpressions(new VectorExpression[] {expr1a, expr2a, expr3a});
+
+ orExpr.evaluate(batch1a);
+
+ assertEquals(BOOLEAN_COLUMN_TEST_SIZE, batch1a.size);
+ for (int i = 0; i < BOOLEAN_COLUMN_TEST_SIZE; i++) {
+ assertEquals(i, batch1a.selected[i]);
+ }
+
+ // Select all with the is null and is not null as 2 child expressions, and then
+ // expect the 3rd child to not be invoked.
+
+ VectorizedRowBatch batch1b = getBatchThreeBooleanCols();
+
+ SelectColumnIsNotNull expr1b = new SelectColumnIsNotNull(0);
+ SelectColumnIsNull expr2b = new SelectColumnIsNull(0);
+ SelectColumnNotExpected expr3b = new SelectColumnNotExpected(0);
+
+ FilterExprOrExpr orExpr2 = new FilterExprOrExpr();
+ orExpr2.setChildExpressions(new VectorExpression[] {expr1b, expr2b, expr3b});
+
+ orExpr2.evaluate(batch1b);
+
+ assertEquals(BOOLEAN_COLUMN_TEST_SIZE, batch1b.size);
+ for (int i = 0; i < BOOLEAN_COLUMN_TEST_SIZE; i++) {
+ assertEquals(i, batch1b.selected[i]);
+ }
+
+ // Select all with a is not null child, none as 2nd child, and is null with 3rd, and then
+ // expect the 3rd child to not be invoked.
+
+ VectorizedRowBatch batch1c = getBatchThreeBooleanCols();
+
+ SelectColumnIsNotNull expr1c = new SelectColumnIsNotNull(0);
+ SelectColumnNothing expr2c = new SelectColumnNothing(0);
+ SelectColumnIsNull expr3c = new SelectColumnIsNull(0);
+ SelectColumnNotExpected expr4c = new SelectColumnNotExpected(0);
+
+ FilterExprOrExpr orExpr3 = new FilterExprOrExpr();
+ orExpr3.setChildExpressions(new VectorExpression[] {expr1c, expr2c, expr3c, expr4c});
+
+ orExpr3.evaluate(batch1c);
+
+ assertEquals(BOOLEAN_COLUMN_TEST_SIZE, batch1c.size);
+ for (int i = 0; i < BOOLEAN_COLUMN_TEST_SIZE; i++) {
+ assertEquals(i, batch1c.selected[i]);
+ }
+
+ // Select true fields child, none as 2nd child, and none as 3rd.
+
+ VectorizedRowBatch batch1d = getBatchThreeBooleanCols();
+
+ SelectColumnIsTrue expr1d = new SelectColumnIsTrue(0);
+ SelectColumnNothing expr2d = new SelectColumnNothing(0);
+ SelectColumnNothing expr3d = new SelectColumnNothing(0);
+
+ FilterExprOrExpr orExpr4 = new FilterExprOrExpr();
+ orExpr4.setChildExpressions(new VectorExpression[] {expr1d, expr3d, expr3d});
+
+ orExpr4.evaluate(batch1d);
+
+ int[] expected4 = {2,3,7};
+ assertEquals(expected4.length, batch1d.size);
+ for (int i = 0; i < expected4.length; i++) {
+ assertEquals(expected4[i], batch1d.selected[i]);
+ }
+
+
+ // Select none in 1st child, none as 2nd child, and none as 3rd.
+
+ VectorizedRowBatch batch1e = getBatchThreeBooleanCols();
+
+ SelectColumnNothing expr1e = new SelectColumnNothing(0);
+ SelectColumnNothing expr2e = new SelectColumnNothing(0);
+ SelectColumnNothing expr3e = new SelectColumnNothing(0);
+
+ FilterExprOrExpr orExpr5 = new FilterExprOrExpr();
+ orExpr5.setChildExpressions(new VectorExpression[] {expr1e, expr2e, expr3e});
+
+ orExpr5.evaluate(batch1e);
+
+ assertEquals(0, batch1e.size);
+
+ // Select one in 1st child, none as 2nd child, and none as 3rd.
+
+ VectorizedRowBatch batch1f = getBatchThreeBooleanCols();
+
+ SelectColumnOne expr1f = new SelectColumnOne(0, 4);
+ SelectColumnNothing expr2f = new SelectColumnNothing(0);
+ SelectColumnNothing expr3f = new SelectColumnNothing(0);
+
+ FilterExprOrExpr orExpr6 = new FilterExprOrExpr();
+ orExpr6.setChildExpressions(new VectorExpression[] {expr1f, expr2f, expr3f});
+
+ orExpr6.evaluate(batch1f);
+
+ assertEquals(1, batch1f.size);
+ assertEquals(4, batch1f.selected[0]);
+
+ // Select none in 1st child, one as 2nd child, and none as 3rd.
+
+ VectorizedRowBatch batch1g = getBatchThreeBooleanCols();
+
+ SelectColumnNothing expr1g = new SelectColumnNothing(0);
+ SelectColumnOne expr2g = new SelectColumnOne(0, 2);
+ SelectColumnNothing expr3g = new SelectColumnNothing(0);
+
+ FilterExprOrExpr orExpr7 = new FilterExprOrExpr();
+ orExpr7.setChildExpressions(new VectorExpression[] {expr1g, expr2g, expr3g});
+
+ orExpr7.evaluate(batch1g);
+
+ assertEquals(1, batch1g.size);
+ assertEquals(2, batch1g.selected[0]);
+ }
+
+ @Test
public void testFilterExprOrExprWithBatchReuse() {
VectorizedRowBatch batch1 = getBatchThreeBooleanCols();