You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by om...@apache.org on 2013/11/06 19:45:29 UTC
svn commit: r1539428 - in /hive/trunk/ql/src:
java/org/apache/hadoop/hive/ql/exec/vector/
java/org/apache/hadoop/hive/ql/exec/vector/expressions/
java/org/apache/hadoop/hive/ql/optimizer/physical/
test/org/apache/hadoop/hive/ql/exec/vector/ test/org/ap...
Author: omalley
Date: Wed Nov 6 18:45:28 2013
New Revision: 1539428
URL: http://svn.apache.org/r1539428
Log:
Revert HIVE-5583 since it broke the build.
Removed:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CuckooSetBytes.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CuckooSetDouble.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CuckooSetLong.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterDoubleColumnInList.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterLongColumnInList.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterStringColumnInList.java
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestCuckooSet.java
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterExpressions.java
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java?rev=1539428&r1=1539427&r2=1539428&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java Wed Nov 6 18:45:28 2013
@@ -76,7 +76,6 @@ import org.apache.hadoop.hive.ql.plan.Ex
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc;
import org.apache.hadoop.hive.ql.udf.UDFConv;
import org.apache.hadoop.hive.ql.udf.UDFHex;
import org.apache.hadoop.hive.ql.udf.UDFOPNegative;
@@ -558,8 +557,6 @@ public class VectorizationContext {
//First handle special cases
if (udf instanceof GenericUDFBetween) {
return getBetweenFilterExpression(childExpr);
- } else if (udf instanceof GenericUDFIn) {
- return getInFilterExpression(childExpr);
} else if (udf instanceof GenericUDFBridge) {
VectorExpression v = getGenericUDFBridgeVectorExpression((GenericUDFBridge) udf, childExpr, mode);
if (v != null) {
@@ -582,104 +579,6 @@ public class VectorizationContext {
}
/**
- * Create a filter expression for column IN ( <list-of-constants> )
- * @param childExpr
- * @return
- */
- private VectorExpression getInFilterExpression(List<ExprNodeDesc> childExpr)
- throws HiveException {
- ExprNodeDesc colExpr = childExpr.get(0);
- String colType = colExpr.getTypeString();
-
- // prepare arguments for createVectorExpression
- List<ExprNodeDesc> childrenForInList =
- foldConstantsForUnaryExprs(childExpr.subList(1, childExpr.size()));
-
- // Remove nulls. This is safe because "value IN ( <list> )" is never true for a NULL member
- // of <list>, under SQL semantics, because value = NULL is always false.
- childrenForInList = removeNullListEntries(childrenForInList);
- VectorExpression expr = null;
-
- // determine class
- Class<?> cl = null;
- if (isIntFamily(colType)) {
- cl = FilterLongColumnInList.class;
- long[] inVals = new long[childrenForInList.size()];
- for (int i = 0; i != inVals.length; i++) {
- inVals[i] = getIntFamilyScalarAsLong((ExprNodeConstantDesc) childrenForInList.get(i));
- }
- FilterLongColumnInList f = (FilterLongColumnInList)
- createVectorExpression(cl, childExpr.subList(0, 1), Mode.PROJECTION);
- f.setInListValues(inVals);
- expr = f;
- } else if (colType.equals("timestamp")) {
- cl = FilterLongColumnInList.class;
- long[] inVals = new long[childrenForInList.size()];
- for (int i = 0; i != inVals.length; i++) {
- inVals[i] = getTimestampScalar(childrenForInList.get(i));
- }
- FilterLongColumnInList f = (FilterLongColumnInList)
- createVectorExpression(cl, childExpr.subList(0, 1), Mode.PROJECTION);
- f.setInListValues(inVals);
- expr = f;
- } else if (colType.equals("string")) {
- cl = FilterStringColumnInList.class;
- byte[][] inVals = new byte[childrenForInList.size()][];
- for (int i = 0; i != inVals.length; i++) {
- inVals[i] = getStringScalarAsByteArray((ExprNodeConstantDesc) childrenForInList.get(i));
- }
- FilterStringColumnInList f =(FilterStringColumnInList)
- createVectorExpression(cl, childExpr.subList(0, 1), Mode.PROJECTION);
- f.setInListValues(inVals);
- expr = f;
- } else if (isFloatFamily(colType)) {
- cl = FilterDoubleColumnInList.class;
- double[] inValsD = new double[childrenForInList.size()];
- for (int i = 0; i != inValsD.length; i++) {
- inValsD[i] = getNumericScalarAsDouble(childrenForInList.get(i));
- }
- FilterDoubleColumnInList f = (FilterDoubleColumnInList)
- createVectorExpression(cl, childExpr.subList(0, 1), Mode.PROJECTION);
- f.setInListValues(inValsD);
- expr = f;
- } else {
- throw new HiveException("Type " + colType + " not supported for IN in vectorized mode");
- }
- return expr;
- }
-
- // Return a version of the input IN list with the NULL entries removed.
- private List<ExprNodeDesc> removeNullListEntries(List<ExprNodeDesc> childrenForInList) {
- boolean hasNulls = false;
- for (ExprNodeDesc e : childrenForInList) {
- if (e instanceof ExprNodeNullDesc) {
- hasNulls = true;
- break;
- }
- }
- if (!hasNulls) {
- return childrenForInList;
- } else {
- List<ExprNodeDesc> nullFreeList = new ArrayList<ExprNodeDesc>();
- for (ExprNodeDesc e : childrenForInList) {
- if (!(e instanceof ExprNodeNullDesc)) {
- nullFreeList.add(e);
- }
- }
- return nullFreeList;
- }
- }
-
- private byte[] getStringScalarAsByteArray(ExprNodeConstantDesc exprNodeConstantDesc)
- throws HiveException {
- Object o = getScalarValue(exprNodeConstantDesc);
- if (!(o instanceof byte[])) {
- throw new HiveException("Expected constant argument of type string");
- }
- return (byte[]) o;
- }
-
- /**
* Invoke special handling for expressions that can't be vectorized by regular
* descriptor based lookup.
*/
@@ -951,38 +850,8 @@ public class VectorizationContext {
}
}
- private long getIntFamilyScalarAsLong(ExprNodeConstantDesc constDesc)
- throws HiveException {
- Object o = getScalarValue(constDesc);
- if (o instanceof Integer) {
- return (Integer) o;
- } else if (o instanceof Long) {
- return (Long) o;
- }
- throw new HiveException("Unexpected type when converting to long");
- }
-
- private double getNumericScalarAsDouble(ExprNodeDesc constDesc)
- throws HiveException {
- Object o = getScalarValue((ExprNodeConstantDesc) constDesc);
- if (o instanceof Double) {
- return (Double) o;
- } else if (o instanceof Float) {
- return (Float) o;
- } else if (o instanceof Integer) {
- return (Integer) o;
- } else if (o instanceof Long) {
- return (Long) o;
- }
- throw new HiveException("Unexpected type when converting to double");
- }
-
- // Get a timestamp as a long in number of nanos, from a string constant or cast
+ // Get a timestamp as a long in number of nanos, from a string constant.
private long getTimestampScalar(ExprNodeDesc expr) throws HiveException {
- if (expr instanceof ExprNodeGenericFuncDesc &&
- ((ExprNodeGenericFuncDesc) expr).getGenericUDF() instanceof GenericUDFTimestamp) {
- return evaluateCastToTimestamp(expr);
- }
if (!(expr instanceof ExprNodeConstantDesc)) {
throw new HiveException("Constant timestamp value expected for expression argument. " +
"Non-constant argument not supported for vectorization.");
@@ -999,29 +868,25 @@ public class VectorizationContext {
expr2.setChildren(children);
// initialize and evaluate
- return evaluateCastToTimestamp(expr2);
+ ExprNodeEvaluator evaluator = ExprNodeEvaluatorFactory.get(expr2);
+ ObjectInspector output = evaluator.initialize(null);
+ Object constant = evaluator.evaluate(null);
+ Object java = ObjectInspectorUtils.copyToStandardJavaObject(constant, output);
+
+ if (!(java instanceof Timestamp)) {
+ throw new HiveException("Udf: failed to convert from string to timestamp");
+ }
+ Timestamp ts = (Timestamp) java;
+ long result = ts.getTime();
+ result *= 1000000; // shift left 6 digits to make room for nanos below ms precision
+ result += ts.getNanos() % 1000000; // add in nanos, after removing the ms portion
+ return result;
}
throw new HiveException("Udf: unhandled constant type for scalar argument. "
+ "Expecting string.");
}
- private long evaluateCastToTimestamp(ExprNodeDesc expr) throws HiveException {
- ExprNodeGenericFuncDesc expr2 = (ExprNodeGenericFuncDesc) expr;
- ExprNodeEvaluator evaluator = ExprNodeEvaluatorFactory.get(expr2);
- ObjectInspector output = evaluator.initialize(null);
- Object constant = evaluator.evaluate(null);
- Object java = ObjectInspectorUtils.copyToStandardJavaObject(constant, output);
-
- if (!(java instanceof Timestamp)) {
- throw new HiveException("Udf: failed to convert to timestamp");
- }
- Timestamp ts = (Timestamp) java;
- long result = ts.getTime();
- result *= 1000000; // shift left 6 digits to make room for nanos below ms precision
- result += ts.getNanos() % 1000000; // add in nanos, after removing the ms portion
- return result;
- }
private Constructor<?> getConstructor(Class<?> cl) throws HiveException {
try {
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java?rev=1539428&r1=1539427&r2=1539428&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java Wed Nov 6 18:45:28 2013
@@ -61,7 +61,6 @@ import org.apache.hadoop.hive.ql.udf.gen
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFConcat;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFLower;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
@@ -173,7 +172,6 @@ public class Vectorizer implements Physi
supportedGenericUDFs.add(GenericUDFConcat.class);
supportedGenericUDFs.add(GenericUDFAbs.class);
supportedGenericUDFs.add(GenericUDFBetween.class);
- supportedGenericUDFs.add(GenericUDFIn.class);
// For type casts
supportedGenericUDFs.add(UDFToLong.class);
Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java?rev=1539428&r1=1539427&r2=1539428&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java Wed Nov 6 18:45:28 2013
@@ -92,7 +92,6 @@ import org.apache.hadoop.hive.ql.udf.UDF
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFLower;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
@@ -940,42 +939,4 @@ public class TestVectorizationContext {
ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
assertTrue(ve instanceof FilterDoubleColumnNotBetween);
}
-
- @Test
- public void testInFilters() throws HiveException {
- ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(String.class, "col1", "table", false);
- ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc("Alpha");
- ExprNodeConstantDesc constDesc2 = new ExprNodeConstantDesc("Bravo");
-
- // string IN
- GenericUDFIn udf = new GenericUDFIn();
- ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc();
- exprDesc.setGenericUDF(udf);
- List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>();
- children1.add(col1Expr);
- children1.add(constDesc);
- children1.add(constDesc2);
- exprDesc.setChildren(children1);
-
- Map<String, Integer> columnMap = new HashMap<String, Integer>();
- columnMap.put("col1", 1);
- columnMap.put("col2", 2);
- VectorizationContext vc = new VectorizationContext(columnMap, 2);
- VectorExpression ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
- assertTrue(ve instanceof FilterStringColumnInList);
-
- // long IN
- children1.set(0, new ExprNodeColumnDesc(Long.class, "col1", "table", false));
- children1.set(1, new ExprNodeConstantDesc(10));
- children1.set(2, new ExprNodeConstantDesc(20));
- ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
- assertTrue(ve instanceof FilterLongColumnInList);
-
- // double IN
- children1.set(0, new ExprNodeColumnDesc(Double.class, "col1", "table", false));
- children1.set(1, new ExprNodeConstantDesc(10d));
- children1.set(2, new ExprNodeConstantDesc(20d));
- ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER);
- assertTrue(ve instanceof FilterDoubleColumnInList);
- }
}
Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterExpressions.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterExpressions.java?rev=1539428&r1=1539427&r2=1539428&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterExpressions.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterExpressions.java Wed Nov 6 18:45:28 2013
@@ -543,189 +543,4 @@ public class TestVectorFilterExpressions
assertTrue(vrb.selectedInUse);
assertEquals(0, vrb.selected[0]);
}
-
- /**
- * Test the IN filter VectorExpression classes.
- */
-
- @Test
- public void testFilterLongIn() {
- int seed = 17;
- VectorizedRowBatch vrb = VectorizedRowGroupGenUtil.getVectorizedRowBatch(
- 5, 2, seed);
- LongColumnVector lcv0 = (LongColumnVector) vrb.cols[0];
- long[] inList = {5, 20};
- FilterLongColumnInList f = new FilterLongColumnInList(0);
- f.setInListValues(inList);
- VectorExpression expr1 = f;
-
- // Basic case
- lcv0.vector[0] = 5;
- lcv0.vector[1] = 20;
- lcv0.vector[2] = 17;
- lcv0.vector[3] = 15;
- lcv0.vector[4] = 10;
-
- expr1.evaluate(vrb);
-
- assertEquals(2, vrb.size);
- assertTrue(vrb.selectedInUse);
- assertEquals(0, vrb.selected[0]);
- assertEquals(1, vrb.selected[1]);
-
- // With nulls
- VectorizedRowBatch vrb1 = VectorizedRowGroupGenUtil.getVectorizedRowBatch(
- 5, 2, seed);
-
- lcv0 = (LongColumnVector) vrb1.cols[0];
-
- lcv0.vector[0] = 5;
- lcv0.vector[1] = 20;
- lcv0.vector[2] = 17;
- lcv0.vector[3] = 15;
- lcv0.vector[4] = 10;
-
- lcv0.noNulls = false;
- lcv0.isNull[0] = true;
- lcv0.isNull[2] = true;
-
- expr1.evaluate(vrb1);
- assertEquals(1, vrb1.size);
- assertTrue(vrb1.selectedInUse);
- assertEquals(1, vrb1.selected[0]);
-
- // With nulls and selected
- VectorizedRowBatch vrb2 = VectorizedRowGroupGenUtil.getVectorizedRowBatch(
- 7, 2, seed);
- vrb2.selectedInUse = true;
- vrb2.selected[0] = 1;
- vrb2.selected[1] = 2;
- vrb2.selected[2] = 4;
- vrb2.size = 3;
-
- lcv0 = (LongColumnVector) vrb2.cols[0];
-
- lcv0.vector[0] = 5;
- lcv0.vector[1] = 20;
- lcv0.vector[2] = 17;
- lcv0.vector[3] = 15;
- lcv0.vector[4] = 10;
- lcv0.vector[5] = 19;
- lcv0.vector[6] = 21;
-
- lcv0.noNulls = false;
- lcv0.isNull[0] = true;
- lcv0.isNull[2] = true;
- lcv0.isNull[5] = true;
-
- expr1.evaluate(vrb2);
- assertEquals(1, vrb2.size);
- assertEquals(1, vrb2.selected[0]);
-
- // Repeating non null
- VectorizedRowBatch vrb3 = VectorizedRowGroupGenUtil.getVectorizedRowBatch(
- 7, 2, seed);
- lcv0 = (LongColumnVector) vrb3.cols[0];
-
- lcv0.isRepeating = true;
- lcv0.vector[0] = 5;
- lcv0.vector[1] = 20;
- lcv0.vector[2] = 17;
- lcv0.vector[3] = 15;
- lcv0.vector[4] = 10;
-
- expr1.evaluate(vrb3);
- assertEquals(7, vrb3.size);
- assertFalse(vrb3.selectedInUse);
- assertTrue(lcv0.isRepeating);
-
- // Repeating null
- lcv0.noNulls = false;
- lcv0.vector[0] = 5;
- lcv0.isNull[0] = true;
-
- expr1.evaluate(vrb3);
- assertEquals(0, vrb3.size);
- }
-
- @Test
- public void testFilterDoubleIn() {
- int seed = 17;
- VectorizedRowBatch vrb = VectorizedRowGroupGenUtil.getVectorizedRowBatch(
- 5, 2, seed);
- DoubleColumnVector dcv0 = new DoubleColumnVector();
- vrb.cols[0] = dcv0;
- double[] inList = {5.0, 20.2};
- FilterDoubleColumnInList f = new FilterDoubleColumnInList(0);
- f.setInListValues(inList);
- VectorExpression expr1 = f;
-
- // Basic sanity check. Other cases are not skipped because it is similar to the case for Long.
- dcv0.vector[0] = 5.0;
- dcv0.vector[1] = 20.2;
- dcv0.vector[2] = 17.0;
- dcv0.vector[3] = 15.0;
- dcv0.vector[4] = 10.0;
-
- expr1.evaluate(vrb);
-
- assertEquals(2, vrb.size);
- assertTrue(vrb.selectedInUse);
- assertEquals(0, vrb.selected[0]);
- assertEquals(1, vrb.selected[1]);
- }
-
- @Test
- public void testFilterStringIn() {
- int seed = 17;
- VectorizedRowBatch vrb = VectorizedRowGroupGenUtil.getVectorizedRowBatch(
- 3, 2, seed);
- vrb.cols[0] = new BytesColumnVector();
- BytesColumnVector bcv = (BytesColumnVector) vrb.cols[0];
-
- bcv.initBuffer();
- bcv.setVal(0, a, 0, 1);
- bcv.setVal(1, b, 0, 1);
- bcv.setVal(2, c, 0, 1);
-
- VectorExpression expr = new FilterStringColumnInList(0);
- byte[][] inList = {b, c};
- ((FilterStringColumnInList) expr).setInListValues(inList);
-
- // basic test
- expr.evaluate(vrb);
-
- assertEquals(2, vrb.size);
- assertTrue(vrb.selectedInUse);
- assertEquals(1, vrb.selected[0]);
- assertEquals(2, vrb.selected[1]);
-
- // nulls
- vrb.selectedInUse = false;
- vrb.size = 3;
- bcv.noNulls = false;
- bcv.isNull[2] = true;
- expr.evaluate(vrb);
- assertEquals(1, vrb.size);
- assertEquals(1, vrb.selected[0]);
- assertTrue(vrb.selectedInUse);
-
- // repeating
- vrb.selectedInUse = false;
- vrb.size = 3;
- bcv.noNulls = true;
- bcv.isRepeating = true;
- expr.evaluate(vrb);
- assertEquals(0, vrb.size);
-
- // nulls and repeating
- vrb.selectedInUse = false;
- vrb.size = 3;
- bcv.noNulls = false;
- bcv.isRepeating = true;
- bcv.isNull[0] = true;
- bcv.setVal(0, b, 0, 1);
- expr.evaluate(vrb);
- assertEquals(0, vrb.size);
- }
}