You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by br...@apache.org on 2013/10/22 19:59:08 UTC
svn commit: r1534711 [5/15] - in /hive/branches/maven: ./
ant/src/org/apache/hadoop/hive/ant/
beeline/src/java/org/apache/hive/beeline/ bin/ bin/ext/
cli/src/java/org/apache/hadoop/hive/cli/ common/
common/src/java/org/apache/hadoop/hive/common/type/ c...
Modified: hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
URL: http://svn.apache.org/viewvc/hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java?rev=1534711&r1=1534710&r2=1534711&view=diff
==============================================================================
--- hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java (original)
+++ hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java Tue Oct 22 17:58:59 2013
@@ -39,6 +39,10 @@ import org.apache.hadoop.hive.ql.exec.ve
import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterExprAndExpr;
import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterExprOrExpr;
import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterStringColLikeStringScalar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.FuncRand;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.ISetDoubleArg;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.ISetLongArg;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterStringColRegExpStringScalar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression;
import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsNotNull;
import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsNull;
@@ -71,6 +75,7 @@ import org.apache.hadoop.hive.ql.exec.ve
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarPopLong;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarSampDouble;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarSampLong;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastLongToBooleanViaLongToLong;
import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor;
import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc;
import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -81,11 +86,26 @@ import org.apache.hadoop.hive.ql.plan.Ex
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.plan.api.OperatorType;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFConcat;
+import org.apache.hadoop.hive.ql.udf.UDFAcos;
+import org.apache.hadoop.hive.ql.udf.UDFAsin;
+import org.apache.hadoop.hive.ql.udf.UDFAtan;
+import org.apache.hadoop.hive.ql.udf.UDFBin;
+import org.apache.hadoop.hive.ql.udf.UDFCeil;
+import org.apache.hadoop.hive.ql.udf.UDFConv;
+import org.apache.hadoop.hive.ql.udf.UDFCos;
import org.apache.hadoop.hive.ql.udf.UDFDayOfMonth;
+import org.apache.hadoop.hive.ql.udf.UDFDegrees;
+import org.apache.hadoop.hive.ql.udf.UDFExp;
+import org.apache.hadoop.hive.ql.udf.UDFFloor;
+import org.apache.hadoop.hive.ql.udf.UDFHex;
import org.apache.hadoop.hive.ql.udf.UDFHour;
import org.apache.hadoop.hive.ql.udf.UDFLTrim;
import org.apache.hadoop.hive.ql.udf.UDFLength;
import org.apache.hadoop.hive.ql.udf.UDFLike;
+import org.apache.hadoop.hive.ql.udf.UDFLn;
+import org.apache.hadoop.hive.ql.udf.UDFLog;
+import org.apache.hadoop.hive.ql.udf.UDFLog10;
+import org.apache.hadoop.hive.ql.udf.UDFLog2;
import org.apache.hadoop.hive.ql.udf.UDFMinute;
import org.apache.hadoop.hive.ql.udf.UDFMonth;
import org.apache.hadoop.hive.ql.udf.UDFOPDivide;
@@ -95,13 +115,32 @@ import org.apache.hadoop.hive.ql.udf.UDF
import org.apache.hadoop.hive.ql.udf.UDFOPNegative;
import org.apache.hadoop.hive.ql.udf.UDFOPPlus;
import org.apache.hadoop.hive.ql.udf.UDFOPPositive;
+import org.apache.hadoop.hive.ql.udf.UDFPosMod;
+import org.apache.hadoop.hive.ql.udf.UDFPower;
+import org.apache.hadoop.hive.ql.udf.UDFRegExp;
import org.apache.hadoop.hive.ql.udf.UDFRTrim;
+import org.apache.hadoop.hive.ql.udf.UDFRadians;
+import org.apache.hadoop.hive.ql.udf.UDFRand;
+import org.apache.hadoop.hive.ql.udf.UDFRound;
import org.apache.hadoop.hive.ql.udf.UDFSecond;
+import org.apache.hadoop.hive.ql.udf.UDFSign;
+import org.apache.hadoop.hive.ql.udf.UDFSin;
+import org.apache.hadoop.hive.ql.udf.UDFSqrt;
import org.apache.hadoop.hive.ql.udf.UDFSubstr;
+import org.apache.hadoop.hive.ql.udf.UDFTan;
+import org.apache.hadoop.hive.ql.udf.UDFToBoolean;
+import org.apache.hadoop.hive.ql.udf.UDFToByte;
+import org.apache.hadoop.hive.ql.udf.UDFToInteger;
+import org.apache.hadoop.hive.ql.udf.UDFToLong;
+import org.apache.hadoop.hive.ql.udf.UDFToShort;
+import org.apache.hadoop.hive.ql.udf.UDFToFloat;
+import org.apache.hadoop.hive.ql.udf.UDFToDouble;
+import org.apache.hadoop.hive.ql.udf.UDFToString;
import org.apache.hadoop.hive.ql.udf.UDFTrim;
import org.apache.hadoop.hive.ql.udf.UDFWeekOfYear;
import org.apache.hadoop.hive.ql.udf.UDFYear;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFAbs;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFLower;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
@@ -115,10 +154,14 @@ import org.apache.hadoop.hive.ql.udf.gen
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFTimestamp;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUpper;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
/**
@@ -139,6 +182,16 @@ public class VectorizationContext {
//Map column number to type
private final OutputColumnManager ocm;
+ // Package where custom (hand-built) vector expression classes are located.
+ private static final String CUSTOM_EXPR_PACKAGE =
+ "org.apache.hadoop.hive.ql.exec.vector.expressions";
+
+ // Package where vector expression packages generated from templates are located.
+ private static final String GENERATED_EXPR_PACKAGE =
+ "org.apache.hadoop.hive.ql.exec.vector.expressions.gen";
+
+ private String fileKey = null;
+
public VectorizationContext(Map<String, Integer> columnMap,
int initialOutputCol) {
this.columnMap = columnMap;
@@ -146,25 +199,20 @@ public class VectorizationContext {
this.firstOutputColumnIndex = initialOutputCol;
}
- private int getInputColumnIndex(String name) {
- if (columnMap == null) {
- //Null is treated as test call, is used for validation test.
- return 0;
- } else {
- return columnMap.get(name);
- }
+ public String getFileKey() {
+ return fileKey;
}
- /* Return true if we are running in the planner, and false if we
- * are running in a task.
- */
- /*
- private boolean isPlanner() {
+ public void setFileKey(String fileKey) {
+ this.fileKey = fileKey;
+ }
- // This relies on the behavior that columnMap is null in the planner.
- return columnMap == null;
+ private int getInputColumnIndex(String name) {
+ if (!columnMap.containsKey(name)) {
+ LOG.error(String.format("The column %s is not in the vectorization context column map.", name));
+ }
+ return columnMap.get(name);
}
- */
private class OutputColumnManager {
private final int initialOutputCol;
@@ -234,6 +282,7 @@ public class VectorizationContext {
//Important: It will come here only if the column is being used as a boolean
expr = new SelectColumnIsTrue(columnNum);
break;
+ case MAPJOIN:
case SELECT:
case GROUPBY:
case REDUCESINK:
@@ -268,11 +317,11 @@ public class VectorizationContext {
ve = getVectorExpression((ExprNodeColumnDesc) exprDesc);
} else if (exprDesc instanceof ExprNodeGenericFuncDesc) {
ExprNodeGenericFuncDesc expr = (ExprNodeGenericFuncDesc) exprDesc;
- if (isCustomUDF(expr)) {
+ if (isCustomUDF(expr) || isLegacyPathUDF(expr)) {
ve = getCustomUDFExpression(expr);
} else {
ve = getVectorExpression(expr.getGenericUDF(),
- expr.getChildExprs());
+ expr.getChildren());
}
} else if (exprDesc instanceof ExprNodeConstantDesc) {
ve = getConstantVectorExpression((ExprNodeConstantDesc) exprDesc);
@@ -283,6 +332,54 @@ public class VectorizationContext {
return ve;
}
+ /* Return true if this is one of a small set of functions for which
+ * it is significantly easier to use the old code path in vectorized
+ * mode instead of implementing a new, optimized VectorExpression.
+ *
+ * Depending on performance requirements and frequency of use, these
+ * may be implemented in the future with an optimized VectorExpression.
+ */
+ public static boolean isLegacyPathUDF(ExprNodeGenericFuncDesc expr) {
+ GenericUDF gudf = expr.getGenericUDF();
+ if (gudf instanceof GenericUDFBridge) {
+ GenericUDFBridge bridge = (GenericUDFBridge) gudf;
+ Class<? extends UDF> udfClass = bridge.getUdfClass();
+ if (udfClass.equals(UDFHex.class)
+ || udfClass.equals(UDFConv.class)
+ || isCastToIntFamily(udfClass) && arg0Type(expr).equals("string")
+ || isCastToFloatFamily(udfClass) && arg0Type(expr).equals("string")
+ || udfClass.equals(UDFToString.class) &&
+ (arg0Type(expr).equals("timestamp")
+ || arg0Type(expr).equals("double")
+ || arg0Type(expr).equals("float"))) {
+ return true;
+ }
+ } else if (gudf instanceof GenericUDFTimestamp && arg0Type(expr).equals("string")) {
+ return true;
+ }
+ return false;
+ }
+
+ public static boolean isCastToIntFamily(Class<? extends UDF> udfClass) {
+ return udfClass.equals(UDFToByte.class)
+ || udfClass.equals(UDFToShort.class)
+ || udfClass.equals(UDFToInteger.class)
+ || udfClass.equals(UDFToLong.class);
+
+ // Boolean is purposely excluded.
+ }
+
+ public static boolean isCastToFloatFamily(Class<? extends UDF> udfClass) {
+ return udfClass.equals(UDFToDouble.class)
+ || udfClass.equals(UDFToFloat.class);
+ }
+
+ // Return the type string of the first argument (argument 0).
+ public static String arg0Type(ExprNodeGenericFuncDesc expr) {
+ String type = expr.getChildren().get(0).getTypeString();
+ return type;
+ }
+
// Return true if this is a custom UDF or custom GenericUDF.
// This is for use only in the planner. It will fail in a task.
public static boolean isCustomUDF(ExprNodeGenericFuncDesc expr) {
@@ -402,7 +499,52 @@ public class VectorizationContext {
return expr;
}
- private VectorExpression getUnaryPlusExpression(List<ExprNodeDesc> childExprList)
+ /* For functions that take one argument, and can be translated using a vector
+ * expression class of the form
+ * <packagePrefix>.<classPrefix><argumentType>To<resultType>
+ * The argumentType is inferred from the input expression.
+ */
+ private VectorExpression getUnaryFunctionExpression(
+ String classPrefix,
+ String resultType,
+ List<ExprNodeDesc> childExprList,
+ String packagePrefix)
+ throws HiveException {
+ ExprNodeDesc childExpr = childExprList.get(0);
+ int inputCol;
+ String colType;
+ VectorExpression v1 = null;
+ if (childExpr instanceof ExprNodeGenericFuncDesc) {
+ v1 = getVectorExpression(childExpr);
+ inputCol = v1.getOutputColumn();
+ colType = v1.getOutputType();
+ } else if (childExpr instanceof ExprNodeColumnDesc) {
+ ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) childExpr;
+ inputCol = getInputColumnIndex(colDesc.getColumn());
+ colType = colDesc.getTypeString();
+ } else {
+ throw new HiveException("Expression not supported: "+childExpr);
+ }
+ String funcInputColType = getNormalizedTypeName(colType);
+ int outputCol = ocm.allocateOutputColumn(resultType);
+ String className = packagePrefix + "."
+ + classPrefix + funcInputColType + "To" + resultType;
+ VectorExpression expr;
+ try {
+ expr = (VectorExpression) getConstructor(className).newInstance(inputCol, outputCol);
+ } catch (Exception ex) {
+ throw new HiveException(ex);
+ }
+ if (v1 != null) {
+ expr.setChildExpressions(new VectorExpression [] {v1});
+ ocm.freeOutputColumn(v1.getOutputColumn());
+ }
+ return expr;
+ }
+
+ // Used as a fast path for operations that don't modify their input, like unary +
+ // and casting boolean to long.
+ private VectorExpression getIdentityExpression(List<ExprNodeDesc> childExprList)
throws HiveException {
ExprNodeDesc childExpr = childExprList.get(0);
int inputCol;
@@ -460,11 +602,29 @@ public class VectorizationContext {
return getUnaryStringExpression("StringUpper", "String", childExpr);
} else if (udf instanceof GenericUDFConcat) {
return getConcatExpression(childExpr);
+ } else if (udf instanceof GenericUDFAbs) {
+ return getUnaryAbsExpression(childExpr);
+ } else if (udf instanceof GenericUDFTimestamp) {
+ return getCastToTimestamp(childExpr);
}
throw new HiveException("Udf: "+udf.getClass().getSimpleName()+", is not supported");
}
+ private VectorExpression getUnaryAbsExpression(List<ExprNodeDesc> childExpr)
+ throws HiveException {
+ String argType = childExpr.get(0).getTypeString();
+ if (isIntFamily(argType)) {
+ return getUnaryFunctionExpression("FuncAbs", "Long", childExpr,
+ GENERATED_EXPR_PACKAGE);
+ } else if (isFloatFamily(argType)) {
+ return getUnaryFunctionExpression("FuncAbs", "Double", childExpr,
+ GENERATED_EXPR_PACKAGE);
+ }
+
+ throw new HiveException("Udf: Abs() not supported for argument type " + argType);
+ }
+
private VectorExpression getVectorExpression(GenericUDFToUnixTimeStamp udf,
List<ExprNodeDesc> childExpr) throws HiveException {
ExprNodeDesc leftExpr = childExpr.get(0);
@@ -503,7 +663,7 @@ public class VectorizationContext {
} else if (cl.equals(UDFOPNegative.class)) {
return getUnaryMinusExpression(childExpr);
} else if (cl.equals(UDFOPPositive.class)) {
- return getUnaryPlusExpression(childExpr);
+ return getIdentityExpression(childExpr);
} else if (cl.equals(UDFYear.class) ||
cl.equals(UDFMonth.class) ||
cl.equals(UDFWeekOfYear.class) ||
@@ -513,7 +673,9 @@ public class VectorizationContext {
cl.equals(UDFSecond.class)) {
return getTimestampFieldExpression(cl.getSimpleName(), childExpr);
} else if (cl.equals(UDFLike.class)) {
- return getLikeExpression(childExpr);
+ return getLikeExpression(childExpr, true);
+ } else if (cl.equals(UDFRegExp.class)) {
+ return getLikeExpression(childExpr, false);
} else if (cl.equals(UDFLength.class)) {
return getUnaryStringExpression("StringLength", "Long", childExpr);
} else if (cl.equals(UDFSubstr.class)) {
@@ -524,11 +686,336 @@ public class VectorizationContext {
return getUnaryStringExpression("StringRTrim", "String", childExpr);
} else if (cl.equals(UDFTrim.class)) {
return getUnaryStringExpression("StringTrim", "String", childExpr);
+ } else if (cl.equals(UDFSin.class)) {
+ return getUnaryFunctionExpression("FuncSin", "Double", childExpr,
+ GENERATED_EXPR_PACKAGE);
+ } else if (cl.equals(UDFCos.class)) {
+ return getUnaryFunctionExpression("FuncCos", "Double", childExpr,
+ GENERATED_EXPR_PACKAGE);
+ } else if (cl.equals(UDFTan.class)) {
+ return getUnaryFunctionExpression("FuncTan", "Double", childExpr,
+ GENERATED_EXPR_PACKAGE);
+ } else if (cl.equals(UDFAsin.class)) {
+ return getUnaryFunctionExpression("FuncASin", "Double", childExpr,
+ GENERATED_EXPR_PACKAGE);
+ } else if (cl.equals(UDFAcos.class)) {
+ return getUnaryFunctionExpression("FuncACos", "Double", childExpr,
+ GENERATED_EXPR_PACKAGE);
+ } else if (cl.equals(UDFAtan.class)) {
+ return getUnaryFunctionExpression("FuncATan", "Double", childExpr,
+ GENERATED_EXPR_PACKAGE);
+ } else if (cl.equals(UDFFloor.class)) {
+ return getUnaryFunctionExpression("FuncFloor", "Long", childExpr,
+ GENERATED_EXPR_PACKAGE);
+ } else if (cl.equals(UDFCeil.class)) {
+ return getUnaryFunctionExpression("FuncCeil", "Long", childExpr,
+ GENERATED_EXPR_PACKAGE);
+ } else if (cl.equals(UDFDegrees.class)) {
+ return getUnaryFunctionExpression("FuncDegrees", "Double", childExpr,
+ GENERATED_EXPR_PACKAGE);
+ } else if (cl.equals(UDFRadians.class)) {
+ return getUnaryFunctionExpression("FuncRadians", "Double", childExpr,
+ GENERATED_EXPR_PACKAGE);
+ } else if (cl.equals(UDFLn.class)) {
+ return getUnaryFunctionExpression("FuncLn", "Double", childExpr,
+ GENERATED_EXPR_PACKAGE);
+ } else if (cl.equals(UDFLog2.class)) {
+ return getUnaryFunctionExpression("FuncLog2", "Double", childExpr,
+ GENERATED_EXPR_PACKAGE);
+ } else if (cl.equals(UDFLog10.class)) {
+ return getUnaryFunctionExpression("FuncLog10", "Double", childExpr,
+ GENERATED_EXPR_PACKAGE);
+ } else if (cl.equals(UDFSign.class)) {
+ return getUnaryFunctionExpression("FuncSign", "Double", childExpr,
+ GENERATED_EXPR_PACKAGE);
+ } else if (cl.equals(UDFSqrt.class)) {
+ return getUnaryFunctionExpression("FuncSqrt", "Double", childExpr,
+ GENERATED_EXPR_PACKAGE);
+ } else if (cl.equals(UDFExp.class)) {
+ return getUnaryFunctionExpression("FuncExp", "Double", childExpr,
+ GENERATED_EXPR_PACKAGE);
+ } else if (cl.equals(UDFLog.class)) {
+ return getLogWithBaseExpression(childExpr);
+ } else if (cl.equals(UDFPower.class)) {
+ return getPowerExpression(childExpr);
+ } else if (cl.equals(UDFRound.class)) {
+ return getRoundExpression(childExpr);
+ } else if (cl.equals(UDFRand.class)) {
+ return getRandExpression(childExpr);
+ } else if (cl.equals(UDFBin.class)) {
+ return getUnaryStringExpression("FuncBin", "String", childExpr);
+ } else if (isCastToIntFamily(cl)) {
+ return getCastToLongExpression(childExpr);
+ } else if (cl.equals(UDFToBoolean.class)) {
+ return getCastToBoolean(childExpr);
+ } else if (isCastToFloatFamily(cl)) {
+ return getCastToDoubleExpression(childExpr);
+ } else if (cl.equals(UDFToString.class)) {
+ return getCastToString(childExpr);
+ } else if (cl.equals(UDFPosMod.class)) {
+ return getPosModExpression(childExpr);
}
throw new HiveException("Udf: "+udf.getClass().getSimpleName()+", is not supported");
}
+ private VectorExpression getPosModExpression(List<ExprNodeDesc> childExpr)
+ throws HiveException {
+ String inputType = childExpr.get(0).getTypeString();
+
+ if (isIntFamily(inputType)) {
+ // Try to get the second argument (the modulo divisor)
+ long divisor = getLongScalar(childExpr.get(1));
+
+ // Use the standard logic for a unary function to handle the first argument.
+ VectorExpression e = getUnaryFunctionExpression("PosMod", "Long", childExpr,
+ CUSTOM_EXPR_PACKAGE);
+
+ // Set second argument for this special case
+ ((ISetLongArg) e).setArg(divisor);
+ return e;
+ } else if (isFloatFamily(inputType)) {
+
+ // Try to get the second argument (the modulo divisor)
+ double divisor = getDoubleScalar(childExpr.get(1));
+
+ // Use the standard logic for a unary function to handle the first argument.
+ VectorExpression e = getUnaryFunctionExpression("PosMod", "Double", childExpr,
+ CUSTOM_EXPR_PACKAGE);
+
+ // Set second argument for this special case
+ ((ISetDoubleArg) e).setArg(divisor);
+ return e;
+ }
+
+ throw new HiveException("Unhandled input type for PMOD(): " + inputType);
+ }
+
+ private VectorExpression getCastToTimestamp(List<ExprNodeDesc> childExpr)
+ throws HiveException {
+ String inputType = childExpr.get(0).getTypeString();
+ if (isIntFamily(inputType)) {
+ return getUnaryFunctionExpression("CastLongToTimestampVia", "Long", childExpr,
+ GENERATED_EXPR_PACKAGE);
+ } else if (isFloatFamily(inputType)) {
+ return getUnaryFunctionExpression("CastDoubleToTimestampVia", "Long", childExpr,
+ GENERATED_EXPR_PACKAGE);
+ }
+ // The string type is deliberately omitted -- it's handled elsewhere. See isLegacyPathUDF.
+
+ throw new HiveException("Unhandled cast input type: " + inputType);
+ }
+
+ private VectorExpression getCastToString(List<ExprNodeDesc> childExpr)
+ throws HiveException {
+ String inputType = childExpr.get(0).getTypeString();
+ if (inputType.equals("boolean")) {
+ // Boolean must come before the integer family. It's a special case.
+ return getUnaryFunctionExpression("CastBooleanToStringVia", "String", childExpr,
+ CUSTOM_EXPR_PACKAGE);
+ } else if (isIntFamily(inputType)) {
+ return getUnaryFunctionExpression("Cast", "String", childExpr,
+ CUSTOM_EXPR_PACKAGE);
+ }
+ /* The string type is deliberately omitted -- the planner removes string to string casts.
+ * Timestamp, float, and double types are handled by the legacy code path. See isLegacyPathUDF.
+ */
+
+ throw new HiveException("Unhandled cast input type: " + inputType);
+ }
+
+ private VectorExpression getCastToDoubleExpression(List<ExprNodeDesc> childExpr)
+ throws HiveException {
+ String inputType = childExpr.get(0).getTypeString();
+ if (isIntFamily(inputType)) {
+ return getUnaryFunctionExpression("Cast", "Double", childExpr,
+ GENERATED_EXPR_PACKAGE);
+ } else if (inputType.equals("timestamp")) {
+ return getUnaryFunctionExpression("CastTimestampToDoubleVia", "Double", childExpr,
+ GENERATED_EXPR_PACKAGE);
+ } else if (isFloatFamily(inputType)) {
+
+ // float types require no conversion, so use a no-op
+ return getIdentityExpression(childExpr);
+ }
+ // The string type is deliberately omitted -- it's handled elsewhere. See isLegacyPathUDF.
+
+ throw new HiveException("Unhandled cast input type: " + inputType);
+ }
+
+ private VectorExpression getCastToBoolean(List<ExprNodeDesc> childExpr)
+ throws HiveException {
+ String inputType = childExpr.get(0).getTypeString();
+ if (isFloatFamily(inputType)) {
+ return getUnaryFunctionExpression("CastDoubleToBooleanVia", "Long", childExpr,
+ GENERATED_EXPR_PACKAGE);
+ } else if (isIntFamily(inputType) || inputType.equals("timestamp")) {
+ return getUnaryFunctionExpression("CastLongToBooleanVia", "Long", childExpr,
+ GENERATED_EXPR_PACKAGE);
+ } else if (inputType.equals("string")) {
+
+ // string casts to false if it is 0 characters long, otherwise true
+ VectorExpression lenExpr = getUnaryStringExpression("StringLength", "Long", childExpr);
+
+ int outputCol = ocm.allocateOutputColumn("integer");
+ VectorExpression lenToBoolExpr =
+ new CastLongToBooleanViaLongToLong(lenExpr.getOutputColumn(), outputCol);
+ lenToBoolExpr.setChildExpressions(new VectorExpression[] {lenExpr});
+ ocm.freeOutputColumn(lenExpr.getOutputColumn());
+ return lenToBoolExpr;
+ }
+ // cast(booleanExpr as boolean) case is omitted because planner removes it as a no-op
+
+ throw new HiveException("Unhandled cast input type: " + inputType);
+ }
+
+ private VectorExpression getCastToLongExpression(List<ExprNodeDesc> childExpr)
+ throws HiveException {
+ String inputType = childExpr.get(0).getTypeString();
+ if (isFloatFamily(inputType)) {
+ return getUnaryFunctionExpression("Cast", "Long", childExpr,
+ GENERATED_EXPR_PACKAGE);
+ } else if (inputType.equals("timestamp")) {
+ return getUnaryFunctionExpression("CastTimestampToLongVia", "Long", childExpr,
+ GENERATED_EXPR_PACKAGE);
+ } else if (isIntFamily(inputType)) {
+
+ // integer and boolean types require no conversion, so use a no-op
+ return getIdentityExpression(childExpr);
+ }
+ // string type is deliberately omitted -- it's handled elsewhere. See isLegacyPathUDF.
+
+ throw new HiveException("Unhandled cast input type: " + inputType);
+ }
+
+ private VectorExpression getRandExpression(List<ExprNodeDesc> childExpr)
+ throws HiveException {
+
+ // prepare one output column
+ int outputCol = ocm.allocateOutputColumn("Double");
+ if (childExpr == null || childExpr.size() == 0) {
+
+ // make no-argument vectorized Rand expression
+ return new FuncRand(outputCol);
+ } else if (childExpr.size() == 1) {
+
+ // Make vectorized Rand expression with seed
+ long seed = getLongScalar(childExpr.get(0));
+ return new FuncRand(seed, outputCol);
+ }
+
+ throw new HiveException("Vectorization error. Rand has more than 1 argument.");
+ }
+
+ private VectorExpression getRoundExpression(List<ExprNodeDesc> childExpr)
+ throws HiveException {
+
+ // Handle one-argument case
+ if (childExpr.size() == 1) {
+ return getUnaryFunctionExpression("FuncRound", "Double", childExpr,
+ GENERATED_EXPR_PACKAGE);
+ }
+
+ // Handle two-argument case
+
+ // Try to get the second argument (the number of digits)
+ long numDigits = getLongScalar(childExpr.get(1));
+
+ // Use the standard logic for a unary function to handle the first argument.
+ VectorExpression e = getUnaryFunctionExpression("RoundWithNumDigits", "Double", childExpr,
+ CUSTOM_EXPR_PACKAGE);
+
+ // Set second argument for this special case
+ ((ISetLongArg) e).setArg(numDigits);
+ return e;
+ }
+
+ private VectorExpression getPowerExpression(List<ExprNodeDesc> childExpr)
+ throws HiveException {
+ String argType = childExpr.get(0).getTypeString();
+
+ // Try to get the second argument, typically a constant value (the power).
+ double power = getDoubleScalar(childExpr.get(1));
+
+ // Use the standard logic for a unary function to handle the first argument.
+ VectorExpression e = getUnaryFunctionExpression("FuncPower", "Double", childExpr,
+ CUSTOM_EXPR_PACKAGE);
+
+ // Set the second argument for this special case
+ ((ISetDoubleArg) e).setArg(power);
+ return e;
+ }
+
+ private VectorExpression getLogWithBaseExpression(List<ExprNodeDesc> childExpr)
+ throws HiveException {
+ if (childExpr.size() == 1) {
+
+ // No base provided, so this is equivalent to Ln
+ return getUnaryFunctionExpression("FuncLn", "Double", childExpr,
+ GENERATED_EXPR_PACKAGE);
+ } else if (childExpr.size() == 2) {
+
+ // Get the type of the (normally variable) input expression
+ String argType = childExpr.get(1).getTypeString();
+
+ // Try to get the first argument, typically a constant value (the base)
+ double base = getDoubleScalar(childExpr.get(0));
+
+ // Use the standard logic for a unary function to handle the second argument.
+ VectorExpression e = getUnaryFunctionExpression("FuncLogWithBase", "Double",
+ childExpr.subList(1, 2), // pass the second argument as the first
+ CUSTOM_EXPR_PACKAGE);
+
+ // set the first argument (the base) for this special case
+ ((ISetDoubleArg) e).setArg(base);
+ return e;
+ }
+
+ throw new HiveException("Udf: Log could not be vectorized");
+ }
+
+ private double getDoubleScalar(ExprNodeDesc expr) throws HiveException {
+ if (!(expr instanceof ExprNodeConstantDesc)) {
+ throw new HiveException("Constant value expected for UDF argument. " +
+ "Non-constant argument not supported for vectorization.");
+ }
+ ExprNodeConstantDesc constExpr = (ExprNodeConstantDesc) expr;
+ Object obj = getScalarValue(constExpr);
+ if (obj instanceof Double) {
+ return ((Double) obj).doubleValue();
+ } else if (obj instanceof DoubleWritable) {
+ return ((DoubleWritable) obj).get();
+ } else if (obj instanceof Integer) {
+ return (double) ((Integer) obj).longValue();
+ } else if (obj instanceof IntWritable) {
+ return (double) ((IntWritable) obj).get();
+ }
+
+ throw new HiveException("Udf: unhandled constant type for scalar argument."
+ + "Expecting double or integer");
+ }
+
+ private long getLongScalar(ExprNodeDesc expr) throws HiveException {
+ if (!(expr instanceof ExprNodeConstantDesc)) {
+ throw new HiveException("Constant value expected for UDF argument. " +
+ "Non-constant argument not supported for vectorization.");
+ }
+ ExprNodeConstantDesc constExpr = (ExprNodeConstantDesc) expr;
+ Object obj = getScalarValue(constExpr);
+ if (obj instanceof Integer) {
+ return (long) ((Integer) obj).longValue();
+ } else if (obj instanceof IntWritable) {
+ return (long) ((IntWritable) obj).get();
+ } else if (obj instanceof Long) {
+ return ((Long) obj).longValue();
+ } else if (obj instanceof LongWritable) {
+ return ((LongWritable) obj).get();
+ }
+
+ throw new HiveException("Udf: unhandled constant type for scalar argument."
+ + "Expecting integer or bigint");
+ }
+
/* Return a vector expression for string concatenation, including the column-scalar,
* scalar-column, and column-column cases.
*/
@@ -622,10 +1109,10 @@ public class VectorizationContext {
throws HiveException {
//GenericUDFBridge udfBridge = (GenericUDFBridge) expr.getGenericUDF();
- List<ExprNodeDesc> childExprList = expr.getChildExprs();
+ List<ExprNodeDesc> childExprList = expr.getChildren();
// argument descriptors
- VectorUDFArgDesc[] argDescs = new VectorUDFArgDesc[expr.getChildExprs().size()];
+ VectorUDFArgDesc[] argDescs = new VectorUDFArgDesc[expr.getChildren().size()];
for (int i = 0; i < argDescs.length; i++) {
argDescs[i] = new VectorUDFArgDesc();
}
@@ -698,6 +1185,14 @@ public class VectorizationContext {
return ve;
}
+ public static boolean isStringFamily(String resultType) {
+ return resultType.equalsIgnoreCase("string");
+ }
+
+ public static boolean isDatetimeFamily(String resultType) {
+ return resultType.equalsIgnoreCase("timestamp");
+ }
+
// return true if this is any kind of float
public static boolean isFloatFamily(String resultType) {
return resultType.equalsIgnoreCase("double")
@@ -710,7 +1205,23 @@ public class VectorizationContext {
|| resultType.equalsIgnoreCase("smallint")
|| resultType.equalsIgnoreCase("int")
|| resultType.equalsIgnoreCase("bigint")
- || resultType.equalsIgnoreCase("boolean");
+ || resultType.equalsIgnoreCase("boolean")
+ || resultType.equalsIgnoreCase("long");
+ }
+
+ public static String mapJavaTypeToVectorType(String javaType)
+ throws HiveException {
+ if (isStringFamily(javaType)) {
+ return "string";
+ }
+ if (isFloatFamily(javaType)) {
+ return "double";
+ }
+ if (isIntFamily(javaType) ||
+ isDatetimeFamily(javaType)) {
+ return "bigint";
+ }
+ throw new HiveException("Unsuported type for vectorization: " + javaType);
}
/* Return a unary string vector expression. This is used for functions like
@@ -720,6 +1231,16 @@ public class VectorizationContext {
String resultType, // result type name
List<ExprNodeDesc> childExprList) throws HiveException {
+ return getUnaryExpression(vectorExprClassName, resultType, childExprList,
+ CUSTOM_EXPR_PACKAGE);
+ }
+
+ private VectorExpression getUnaryExpression(String vectorExprClassName,
+ String resultType, // result type name
+ List<ExprNodeDesc> childExprList,
+ String packagePathPrefix // prefix of package path name
+ ) throws HiveException {
+
/* Create an instance of the class vectorExprClassName for the input column or expression result
* and return it.
*/
@@ -734,13 +1255,12 @@ public class VectorizationContext {
ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) childExpr;
inputCol = getInputColumnIndex(colDesc.getColumn());
} else {
- // TODO? add code to handle constant argument case
+ // constant argument case not supported
throw new HiveException("Expression not supported: "+childExpr);
}
String outputColumnType = getNormalizedTypeName(resultType);
int outputCol = ocm.allocateOutputColumn(outputColumnType);
- String className = "org.apache.hadoop.hive.ql.exec.vector.expressions."
- + vectorExprClassName;
+ String className = packagePathPrefix + "." + vectorExprClassName;
VectorExpression expr;
try {
expr = (VectorExpression) getConstructor(className).newInstance(inputCol, outputCol);
@@ -754,6 +1274,7 @@ public class VectorizationContext {
return expr;
}
+
private VectorExpression getSubstrExpression(
List<ExprNodeDesc> childExprList) throws HiveException {
@@ -813,7 +1334,16 @@ public class VectorizationContext {
return expr;
}
- private VectorExpression getLikeExpression(List<ExprNodeDesc> childExpr) throws HiveException {
+ /**
+ * Returns a vector expression for a LIKE or REGEXP expression
+ * @param childExpr A list of child expressions
+ * @param isLike {@code true}: the expression is LIKE.
+ * {@code false}: the expression is REGEXP.
+ * @return A {@link FilterStringColLikeStringScalar} or
+ * a {@link FilterStringColRegExpStringScalar}
+ * @throws HiveException
+ */
+ private VectorExpression getLikeExpression(List<ExprNodeDesc> childExpr, boolean isLike) throws HiveException {
ExprNodeDesc leftExpr = childExpr.get(0);
ExprNodeDesc rightExpr = childExpr.get(1);
@@ -827,15 +1357,25 @@ public class VectorizationContext {
ExprNodeColumnDesc leftColDesc = (ExprNodeColumnDesc) leftExpr;
constDesc = (ExprNodeConstantDesc) rightExpr;
inputCol = getInputColumnIndex(leftColDesc.getColumn());
- expr = (VectorExpression) new FilterStringColLikeStringScalar(inputCol,
- new Text((byte[]) getScalarValue(constDesc)));
+ if (isLike) {
+ expr = (VectorExpression) new FilterStringColLikeStringScalar(inputCol,
+ new Text((byte[]) getScalarValue(constDesc)));
+ } else {
+ expr = (VectorExpression) new FilterStringColRegExpStringScalar(inputCol,
+ new Text((byte[]) getScalarValue(constDesc)));
+ }
} else if ((leftExpr instanceof ExprNodeGenericFuncDesc) &&
(rightExpr instanceof ExprNodeConstantDesc)) {
v1 = getVectorExpression(leftExpr);
inputCol = v1.getOutputColumn();
constDesc = (ExprNodeConstantDesc) rightExpr;
- expr = (VectorExpression) new FilterStringColLikeStringScalar(inputCol,
- new Text((byte[]) getScalarValue(constDesc)));
+ if (isLike) {
+ expr = (VectorExpression) new FilterStringColLikeStringScalar(inputCol,
+ new Text((byte[]) getScalarValue(constDesc)));
+ } else {
+ expr = (VectorExpression) new FilterStringColRegExpStringScalar(inputCol,
+ new Text((byte[]) getScalarValue(constDesc)));
+ }
}
// TODO add logic to handle cases where left input is an expression.
if (expr == null) {
@@ -1138,6 +1678,12 @@ public class VectorizationContext {
} catch (Exception ex) {
throw new HiveException(ex);
}
+ } else if (constDesc.getTypeString().equalsIgnoreCase("boolean")) {
+ if (constDesc.getValue().equals(Boolean.valueOf(true))) {
+ return 1;
+ } else {
+ return 0;
+ }
} else {
return constDesc.getValue();
}
@@ -1565,21 +2111,43 @@ public class VectorizationContext {
return map;
}
- public ColumnVector allocateColumnVector(String type, int defaultSize) {
- if (type.equalsIgnoreCase("double")) {
+ public Map<String, Integer> getColumnMap() {
+ return columnMap;
+ }
+
+ public static ColumnVector allocateColumnVector(String type, int defaultSize) {
+ if (isFloatFamily(type)) {
return new DoubleColumnVector(defaultSize);
- } else if (type.equalsIgnoreCase("string")) {
+ } else if (isStringFamily(type)) {
return new BytesColumnVector(defaultSize);
} else {
return new LongColumnVector(defaultSize);
}
}
+ public void addToColumnMap(String columnName, int outputColumn) throws HiveException {
+ if (columnMap.containsKey(columnName) && (columnMap.get(columnName) != outputColumn)) {
+ throw new HiveException(String.format("Column %s is already mapped to %d. Cannot remap to %d.",
+ columnName, columnMap.get(columnName), outputColumn));
+ }
+ columnMap.put(columnName, outputColumn);
+ }
- public void addToColumnMap(String columnName, int outputColumn) {
- if (columnMap != null) {
- columnMap.put(columnName, outputColumn);
+ public <T> Map<T, VectorExpression[]> getMapVectorExpressions(
+ Map<T, List<ExprNodeDesc>> expressions) throws HiveException {
+ Map<T, VectorExpression[]> result = new HashMap<T, VectorExpression[]>();
+ if (null != expressions) {
+ for(T key: expressions.keySet()) {
+ result.put(key, getVectorExpressions(expressions.get(key)));
+ }
}
+ return result;
+ }
+
+ public void addOutputColumn(String columnName, String columnType) throws HiveException {
+ String vectorType = mapJavaTypeToVectorType(columnType);
+ int columnIndex = ocm.allocateOutputColumn(vectorType);
+ this.addToColumnMap(columnName, columnIndex);
}
-}
+ }
Modified: hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java
URL: http://svn.apache.org/viewvc/hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java?rev=1534711&r1=1534710&r2=1534711&view=diff
==============================================================================
--- hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java (original)
+++ hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java Tue Oct 22 17:58:59 2013
@@ -20,6 +20,10 @@ package org.apache.hadoop.hive.ql.exec.v
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Map.Entry;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter;
import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -172,5 +176,67 @@ public class VectorizedRowBatch implemen
public void setValueWriters(VectorExpressionWriter[] valueWriters) {
this.valueWriters = valueWriters;
}
-}
+ public static VectorizedRowBatch buildBatch(Map<Integer, String> typeMap,
+ Map<String, Integer> columnMap) throws HiveException {
+
+ Map<Integer, ColumnVector> mapVectorColumn = new HashMap<Integer, ColumnVector>(typeMap.size());
+ int maxIndex = 0;
+
+ Iterator<Entry<Integer, String>> typeMapIt = typeMap.entrySet().iterator();
+ while(typeMapIt.hasNext()) {
+ Entry<Integer, String> type = typeMapIt.next();
+ ColumnVector cv = VectorizationContext.allocateColumnVector(type.getValue(),
+ VectorizedRowBatch.DEFAULT_SIZE);
+ mapVectorColumn.put(type.getKey(), cv);
+ if (maxIndex < type.getKey()) {
+ maxIndex = type.getKey();
+ }
+ }
+
+ VectorizedRowBatch batch = new VectorizedRowBatch(maxIndex+1);
+ for(int i=0; i <= maxIndex; ++i) {
+ ColumnVector cv = mapVectorColumn.get(i);
+ if (cv == null) {
+ // allocate a default type for the unused column.
+ // there are APIs that expect all cols[i] to be non NULL
+ cv = VectorizationContext.allocateColumnVector("long",
+ VectorizedRowBatch.DEFAULT_SIZE);
+ }
+ batch.cols[i] = cv;
+ }
+
+ // Validate that every column in the column map exists
+ Iterator<Entry<String, Integer>> columnMapIt = columnMap.entrySet().iterator();
+ while(columnMapIt.hasNext()) {
+ Entry<String, Integer> cm = columnMapIt.next();
+ if (batch.cols.length <= cm.getValue() || batch.cols[cm.getValue()] == null) {
+ throw new HiveException(String.format(
+ "Internal error: The type map has no entry for column %d %s",
+ cm.getValue(), cm.getKey()));
+ }
+ }
+
+ batch.reset();
+
+ return batch;
+ }
+
+ /**
+ * Resets the row batch to default state
+ * - sets selectedInUse to false
+ * - sets size to 0
+ * - sets endOfFile to false
+ * - resets each column
+ */
+ public void reset() {
+ selectedInUse = false;
+ size = 0;
+ endOfFile = false;
+ for (ColumnVector vc : cols) {
+ if (vc != null) {
+ vc.reset();
+ }
+ }
+ }
+}
Modified: hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java
URL: http://svn.apache.org/viewvc/hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java?rev=1534711&r1=1534710&r2=1534711&view=diff
==============================================================================
--- hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java (original)
+++ hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java Tue Oct 22 17:58:59 2013
@@ -42,8 +42,8 @@ public class ConstantVectorExpression ex
private byte[] bytesValue = null;
private String typeString;
- private transient Type type;
- private transient int bytesValueLength = 0;
+ private Type type;
+ private int bytesValueLength = 0;
public ConstantVectorExpression() {
super();
Modified: hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncBin.java
URL: http://svn.apache.org/viewvc/hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncBin.java?rev=1534711&r1=1534710&r2=1534711&view=diff
==============================================================================
--- hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncBin.java (original)
+++ hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncBin.java Tue Oct 22 17:58:59 2013
@@ -26,11 +26,11 @@ import org.apache.hadoop.hive.ql.exec.ve
public class FuncBin extends FuncLongToString {
private static final long serialVersionUID = 1L;
- FuncBin(int inputCol, int outputCol) {
+ public FuncBin(int inputCol, int outputCol) {
super(inputCol, outputCol);
}
- FuncBin() {
+ public FuncBin() {
super();
}
Modified: hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncHex.java
URL: http://svn.apache.org/viewvc/hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncHex.java?rev=1534711&r1=1534710&r2=1534711&view=diff
==============================================================================
--- hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncHex.java (original)
+++ hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncHex.java Tue Oct 22 17:58:59 2013
@@ -24,11 +24,11 @@ import org.apache.hadoop.hive.ql.exec.ve
public class FuncHex extends FuncLongToString {
private static final long serialVersionUID = 1L;
- FuncHex(int inputCol, int outputCol) {
+ public FuncHex(int inputCol, int outputCol) {
super(inputCol, outputCol);
}
- FuncHex() {
+ public FuncHex() {
super();
}
Modified: hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLogWithBaseDoubleToDouble.java
URL: http://svn.apache.org/viewvc/hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLogWithBaseDoubleToDouble.java?rev=1534711&r1=1534710&r2=1534711&view=diff
==============================================================================
--- hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLogWithBaseDoubleToDouble.java (original)
+++ hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncLogWithBaseDoubleToDouble.java Tue Oct 22 17:58:59 2013
@@ -18,22 +18,23 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
-public class FuncLogWithBaseDoubleToDouble extends MathFuncDoubleToDouble {
+
+public class FuncLogWithBaseDoubleToDouble extends MathFuncDoubleToDouble
+ implements ISetDoubleArg {
private static final long serialVersionUID = 1L;
private double base;
- FuncLogWithBaseDoubleToDouble(int colNum, double base, int outputColumn) {
+ public FuncLogWithBaseDoubleToDouble(int colNum, int outputColumn) {
super(colNum, outputColumn);
- this.base = base;
}
- FuncLogWithBaseDoubleToDouble() {
+ public FuncLogWithBaseDoubleToDouble() {
super();
}
@Override
- double func(double d) {
+ protected double func(double d) {
return Math.log(d) / Math.log(base);
}
@@ -44,4 +45,10 @@ public class FuncLogWithBaseDoubleToDoub
public void setBase(double base) {
this.base = base;
}
+
+ // used to set the second argument to function (a constant base)
+ @Override
+ public void setArg(double d) {
+ this.base = d;
+ }
}
Modified: hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncPowerDoubleToDouble.java
URL: http://svn.apache.org/viewvc/hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncPowerDoubleToDouble.java?rev=1534711&r1=1534710&r2=1534711&view=diff
==============================================================================
--- hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncPowerDoubleToDouble.java (original)
+++ hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncPowerDoubleToDouble.java Tue Oct 22 17:58:59 2013
@@ -18,20 +18,22 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+
/**
* Vectorized implementation for Pow(a, power) and Power(a, power)
*/
-public class FuncPowerDoubleToDouble extends MathFuncDoubleToDouble {
+public class FuncPowerDoubleToDouble extends MathFuncDoubleToDouble
+ implements ISetDoubleArg {
private static final long serialVersionUID = 1L;
private double power;
- FuncPowerDoubleToDouble(int colNum, double power, int outputColumn) {
+ public FuncPowerDoubleToDouble(int colNum, int outputColumn) {
super(colNum, outputColumn);
- this.power = power;
}
- FuncPowerDoubleToDouble() {
+ public FuncPowerDoubleToDouble() {
super();
}
@@ -47,4 +49,16 @@ public class FuncPowerDoubleToDouble ext
public void setPower(double power) {
this.power = power;
}
+
+ // set the second argument (the power)
+ @Override
+ public void setArg(double d) {
+ this.power = d;
+ }
+
+ @Override
+ protected void cleanup(DoubleColumnVector outputColVector, int[] sel,
+ boolean selectedInUse, int n) {
+ // do nothing
+ }
}
Modified: hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRand.java
URL: http://svn.apache.org/viewvc/hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRand.java?rev=1534711&r1=1534710&r2=1534711&view=diff
==============================================================================
--- hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRand.java (original)
+++ hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncRand.java Tue Oct 22 17:58:59 2013
@@ -31,17 +31,17 @@ public class FuncRand extends VectorExpr
private int outputCol;
private Random random;
- FuncRand(int outputCol) {
+ public FuncRand(int outputCol) {
this.outputCol = outputCol;
random = null;
}
- FuncRand(long seed, int outputCol) {
+ public FuncRand(long seed, int outputCol) {
this.outputCol = outputCol;
- random = new Random(seed);
+ this.random = new Random(seed);
}
- FuncRand() {
+ public FuncRand() {
}
@Override
Modified: hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathExpr.java
URL: http://svn.apache.org/viewvc/hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathExpr.java?rev=1534711&r1=1534710&r2=1534711&view=diff
==============================================================================
--- hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathExpr.java (original)
+++ hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathExpr.java Tue Oct 22 17:58:59 2013
@@ -18,12 +18,16 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
-/**
+import java.io.IOException;
+import java.io.OutputStream;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+
+/**
* Math expression evaluation helper functions.
* Some of these are referenced from ColumnUnaryFunc.txt.
*/
public class MathExpr {
-
+
// Round using the "half-up" method used in Hive.
public static double round(double d) {
if (d > 0.0) {
@@ -32,15 +36,15 @@ public class MathExpr {
return (double) ((long) (d - 0.5d));
}
}
-
+
public static double log2(double d) {
return Math.log(d) / Math.log(2);
}
-
+
public static long abs(long v) {
return v >= 0 ? v : -v;
}
-
+
public static double sign(double v) {
return v >= 0 ? 1.0 : -1.0;
}
@@ -48,4 +52,126 @@ public class MathExpr {
public static double sign(long v) {
return v >= 0 ? 1.0 : -1.0;
}
+
+ // for casting integral types to boolean
+ public static long toBool(long v) {
+ return v == 0 ? 0 : 1;
+ }
+
+ // for casting floating point types to boolean
+ public static long toBool(double v) {
+ return v == 0.0D ? 0L : 1L;
+ }
+
+ /* Convert an integer value in miliseconds since the epoch to a timestamp value
+ * for use in a long column vector, which is represented in nanoseconds since the epoch.
+ */
+ public static long longToTimestamp(long v) {
+ return v * 1000000;
+ }
+
+ // Convert seconds since the epoch (with fraction) to nanoseconds, as a long integer.
+ public static long doubleToTimestamp(double v) {
+ return (long)( v * 1000000000.0);
+ }
+
+ /* Convert an integer value representing a timestamp in nanoseconds to one
+ * that represents a timestamp in seconds (since the epoch).
+ */
+ public static long fromTimestamp(long v) {
+ return v / 1000000000;
+ }
+
+ /* Convert an integer value representing a timestamp in nanoseconds to one
+ * that represents a timestamp in seconds, with fraction, since the epoch.
+ */
+ public static double fromTimestampToDouble(long v) {
+ return ((double) v) / 1000000000.0;
+ }
+
+ /* Convert a long to a string. The string is output into the argument
+ * byte array, beginning at character 0. The length is returned.
+ */
+ public static int writeLongToUTF8(byte[] result, long i) {
+ if (i == 0) {
+ result[0] = '0';
+ return 1;
+ }
+
+ int current = 0;
+
+ if (i < 0) {
+ result[current++] ='-';
+ } else {
+ // negative range is bigger than positive range, so there is no risk
+ // of overflow here.
+ i = -i;
+ }
+
+ long start = 1000000000000000000L;
+ while (i / start == 0) {
+ start /= 10;
+ }
+
+ while (start > 0) {
+ result[current++] = (byte) ('0' - (i / start % 10));
+ start /= 10;
+ }
+
+ return current;
+ }
+
+ // Convert all NaN values in vector v to NULL. Should only be used if n > 0.
+ public static void NaNToNull(DoubleColumnVector v, int[] sel, boolean selectedInUse, int n) {
+
+ // handle repeating case
+ if (v.isRepeating) {
+ if (Double.isNaN(v.vector[0])){
+ v.isNull[0] = true;
+ v.noNulls = false;
+ }
+ return;
+ }
+
+ if (v.noNulls) {
+ if (selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ if (Double.isNaN(v.vector[i])) {
+ v.isNull[i] = true;
+ v.noNulls = false;
+ } else {
+
+ // Must set isNull[i] to false to make sure
+ // it gets initialized, in case we set noNulls to true.
+ v.isNull[i] = false;
+ }
+ }
+ } else {
+ for(int i = 0; i != n; i++) {
+ if (Double.isNaN(v.vector[i])) {
+ v.isNull[i] = true;
+ v.noNulls = false;
+ } else {
+ v.isNull[i] = false;
+ }
+ }
+ }
+ } else { // there are nulls, so null array entries are already initialized
+ if (selectedInUse) {
+ for (int j = 0; j != n; j++) {
+ int i = sel[j];
+ if(Double.isNaN(v.vector[i])) {
+ v.isNull[i] = true;
+ }
+ }
+ } else {
+ for (int i = 0; i != n; i++) {
+ if(Double.isNaN(v.vector[i])) {
+ v.isNull[i] = true;
+ }
+ }
+ }
+ }
+ }
}
Modified: hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncDoubleToDouble.java
URL: http://svn.apache.org/viewvc/hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncDoubleToDouble.java?rev=1534711&r1=1534710&r2=1534711&view=diff
==============================================================================
--- hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncDoubleToDouble.java (original)
+++ hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathFuncDoubleToDouble.java Tue Oct 22 17:58:59 2013
@@ -27,7 +27,7 @@ import org.apache.hadoop.hive.ql.exec.ve
* constant argument(s)) and returns long.
* May be used for functions like ROUND(d, N), Pow(a, p) etc.
*
- * Do NOT use this for simple math functions lone sin/cos/exp etc. that just take
+ * Do NOT use this for simple math functions like sin/cos/exp etc. that just take
* a single argument. For those, modify the template ColumnUnaryFunc.txt
* and expand the template to generate needed classes.
*/
@@ -38,14 +38,14 @@ public abstract class MathFuncDoubleToDo
private int outputColumn;
// Subclasses must override this with a function that implements the desired logic.
- abstract double func(double d);
+ protected abstract double func(double d);
- MathFuncDoubleToDouble(int colNum, int outputColumn) {
+ public MathFuncDoubleToDouble(int colNum, int outputColumn) {
this.colNum = colNum;
this.outputColumn = outputColumn;
}
- MathFuncDoubleToDouble() {
+ public MathFuncDoubleToDouble() {
}
@Override
@@ -103,6 +103,13 @@ public abstract class MathFuncDoubleToDo
}
outputColVector.isRepeating = false;
}
+ cleanup(outputColVector, sel, batch.selectedInUse, n);
+ }
+
+ // override this with a no-op if subclass doesn't need to treat NaN as null
+ protected void cleanup(DoubleColumnVector outputColVector, int[] sel,
+ boolean selectedInUse, int n) {
+ MathExpr.NaNToNull(outputColVector, sel, selectedInUse, n);
}
@Override
Modified: hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java
URL: http://svn.apache.org/viewvc/hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java?rev=1534711&r1=1534710&r2=1534711&view=diff
==============================================================================
--- hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java (original)
+++ hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java Tue Oct 22 17:58:59 2013
@@ -110,6 +110,10 @@ public class StringSubstrColStartLen ext
substrStart = length + substrStart;
}
+ if (substrLength == 0) {
+ return;
+ }
+
int endIdx = substrStart + substrLength - 1;
for (int i = start; i != end; ++i) {
if ((utf8String[i] & 0xc0) != 0x80) {
Modified: hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java?rev=1534711&r1=1534710&r2=1534711&view=diff
==============================================================================
--- hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java (original)
+++ hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java Tue Oct 22 17:58:59 2013
@@ -75,7 +75,7 @@ public final class VectorExpressionWrite
}
if (null == objectInspector) {
throw new HiveException(String.format(
- "Failed to initialize VectorExpressionWriter for expr: %s",
+ "Failed to initialize VectorExpressionWriter for expr: %s",
nodeDesc.getExprString()));
}
return this;
@@ -378,18 +378,22 @@ public final class VectorExpressionWrite
* A poor man Java closure. Works around the problem of having to return multiple objects
* from one function call.
*/
- public static interface Closure {
+ public static interface SingleOIDClosure {
void assign(VectorExpressionWriter[] writers, ObjectInspector objectInspector);
}
+ public static interface ListOIDClosure {
+ void assign(VectorExpressionWriter[] writers, List<ObjectInspector> oids);
+ }
+
/**
* Creates the value writers for a column vector expression list.
* Creates an appropriate output object inspector.
*/
public static void processVectorExpressions(
List<ExprNodeDesc> nodesDesc,
- List<String> outputColumnNames,
- Closure closure)
+ List<String> columnNames,
+ SingleOIDClosure closure)
throws HiveException {
VectorExpressionWriter[] writers = getExpressionWriters(nodesDesc);
List<ObjectInspector> oids = new ArrayList<ObjectInspector>(writers.length);
@@ -397,10 +401,26 @@ public final class VectorExpressionWrite
oids.add(writers[i].getObjectInspector());
}
ObjectInspector objectInspector = ObjectInspectorFactory.
- getStandardStructObjectInspector(outputColumnNames,oids);
+ getStandardStructObjectInspector(columnNames,oids);
closure.assign(writers, objectInspector);
}
+ /**
+ * Creates the value writers for a column vector expression list.
+ * Creates an appropriate output object inspector.
+ */
+ public static void processVectorExpressions(
+ List<ExprNodeDesc> nodesDesc,
+ ListOIDClosure closure)
+ throws HiveException {
+ VectorExpressionWriter[] writers = getExpressionWriters(nodesDesc);
+ List<ObjectInspector> oids = new ArrayList<ObjectInspector>(writers.length);
+ for(int i=0; i<writers.length; ++i) {
+ oids.add(writers[i].getObjectInspector());
+ }
+ closure.assign(writers, oids);
+ }
+
/**
* Returns {@link VectorExpressionWriter} objects for the fields in the given
Modified: hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java
URL: http://svn.apache.org/viewvc/hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java?rev=1534711&r1=1534710&r2=1534711&view=diff
==============================================================================
--- hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java (original)
+++ hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java Tue Oct 22 17:58:59 2013
@@ -92,9 +92,9 @@ public class VectorUDFAdaptor extends Ve
// Initialize transient fields. To be called after deserialization of other fields.
public void init() throws HiveException, UDFArgumentException {
genericUDF = expr.getGenericUDF();
- deferredChildren = new GenericUDF.DeferredObject[expr.getChildExprs().size()];
- childrenOIs = new ObjectInspector[expr.getChildExprs().size()];
- writers = VectorExpressionWriterFactory.getExpressionWriters(expr.getChildExprs());
+ deferredChildren = new GenericUDF.DeferredObject[expr.getChildren().size()];
+ childrenOIs = new ObjectInspector[expr.getChildren().size()];
+ writers = VectorExpressionWriterFactory.getExpressionWriters(expr.getChildren());
for (int i = 0; i < childrenOIs.length; i++) {
childrenOIs[i] = writers[i].getObjectInspector();
}
Modified: hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/index/IndexPredicateAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/index/IndexPredicateAnalyzer.java?rev=1534711&r1=1534710&r2=1534711&view=diff
==============================================================================
--- hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/index/IndexPredicateAnalyzer.java (original)
+++ hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/index/IndexPredicateAnalyzer.java Tue Oct 22 17:58:59 2013
@@ -26,6 +26,8 @@ import java.util.Map;
import java.util.Set;
import java.util.Stack;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
@@ -44,12 +46,10 @@ import org.apache.hadoop.hive.ql.plan.Ex
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.util.ReflectionUtils;
/**
@@ -62,7 +62,7 @@ import org.apache.hadoop.util.Reflection
public class IndexPredicateAnalyzer
{
private static final Log LOG = LogFactory.getLog(IndexPredicateAnalyzer.class.getName());
- private Set<String> udfNames;
+ private final Set<String> udfNames;
private Set<String> allowedColumnNames;
@@ -135,7 +135,7 @@ public class IndexPredicateAnalyzer
}
}
- return analyzeExpr((ExprNodeDesc) nd, searchConditions, nodeOutputs);
+ return analyzeExpr((ExprNodeGenericFuncDesc) nd, searchConditions, nodeOutputs);
}
};
@@ -155,13 +155,11 @@ public class IndexPredicateAnalyzer
}
private ExprNodeDesc analyzeExpr(
- ExprNodeDesc expr,
+ ExprNodeGenericFuncDesc expr,
List<IndexSearchCondition> searchConditions,
Object... nodeOutputs) {
- if (!(expr instanceof ExprNodeGenericFuncDesc)) {
- return expr;
- }
+ expr = (ExprNodeGenericFuncDesc) expr;
if (FunctionRegistry.isOpAnd(expr)) {
assert(nodeOutputs.length == 2);
ExprNodeDesc residual1 = (ExprNodeDesc) nodeOutputs[0];
@@ -182,12 +180,11 @@ public class IndexPredicateAnalyzer
}
String udfName;
- ExprNodeGenericFuncDesc funcDesc = (ExprNodeGenericFuncDesc) expr;
- if (funcDesc.getGenericUDF() instanceof GenericUDFBridge) {
- GenericUDFBridge func = (GenericUDFBridge) funcDesc.getGenericUDF();
+ if (expr.getGenericUDF() instanceof GenericUDFBridge) {
+ GenericUDFBridge func = (GenericUDFBridge) expr.getGenericUDF();
udfName = func.getUdfName();
} else {
- udfName = funcDesc.getGenericUDF().getClass().getName();
+ udfName = expr.getGenericUDF().getClass().getName();
}
if (!udfNames.contains(udfName)) {
return expr;
@@ -255,7 +252,7 @@ public class IndexPredicateAnalyzer
}
}
- for (ExprNodeDesc child : func.getChildExprs()) {
+ for (ExprNodeDesc child : func.getChildren()) {
if (child instanceof ExprNodeConstantDesc) {
continue;
} else if (child instanceof ExprNodeGenericFuncDesc) {
@@ -283,12 +280,12 @@ public class IndexPredicateAnalyzer
*
* @param searchConditions (typically produced by analyzePredicate)
*
- * @return ExprNodeDesc form of search conditions
+ * @return ExprNodeGenericFuncDesc form of search conditions
*/
- public ExprNodeDesc translateSearchConditions(
+ public ExprNodeGenericFuncDesc translateSearchConditions(
List<IndexSearchCondition> searchConditions) {
- ExprNodeDesc expr = null;
+ ExprNodeGenericFuncDesc expr = null;
for (IndexSearchCondition searchCondition : searchConditions) {
if (expr == null) {
expr = searchCondition.getComparisonExpr();
Modified: hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/index/IndexSearchCondition.java
URL: http://svn.apache.org/viewvc/hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/index/IndexSearchCondition.java?rev=1534711&r1=1534710&r2=1534711&view=diff
==============================================================================
--- hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/index/IndexSearchCondition.java (original)
+++ hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/index/IndexSearchCondition.java Tue Oct 22 17:58:59 2013
@@ -19,7 +19,7 @@ package org.apache.hadoop.hive.ql.index;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
/**
* IndexSearchCondition represents an individual search condition
@@ -31,7 +31,7 @@ public class IndexSearchCondition
private ExprNodeColumnDesc columnDesc;
private String comparisonOp;
private ExprNodeConstantDesc constantDesc;
- private ExprNodeDesc comparisonExpr;
+ private ExprNodeGenericFuncDesc comparisonExpr;
/**
* Constructs a search condition, which takes the form
@@ -50,7 +50,7 @@ public class IndexSearchCondition
ExprNodeColumnDesc columnDesc,
String comparisonOp,
ExprNodeConstantDesc constantDesc,
- ExprNodeDesc comparisonExpr) {
+ ExprNodeGenericFuncDesc comparisonExpr) {
this.columnDesc = columnDesc;
this.comparisonOp = comparisonOp;
@@ -82,11 +82,11 @@ public class IndexSearchCondition
return constantDesc;
}
- public void setComparisonExpr(ExprNodeDesc comparisonExpr) {
+ public void setComparisonExpr(ExprNodeGenericFuncDesc comparisonExpr) {
this.comparisonExpr = comparisonExpr;
}
- public ExprNodeDesc getComparisonExpr() {
+ public ExprNodeGenericFuncDesc getComparisonExpr() {
return comparisonExpr;
}
Modified: hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java
URL: http://svn.apache.org/viewvc/hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java?rev=1534711&r1=1534710&r2=1534711&view=diff
==============================================================================
--- hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java (original)
+++ hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java Tue Oct 22 17:58:59 2013
@@ -316,7 +316,8 @@ public class CompactIndexHandler extends
IndexPredicateAnalyzer analyzer = getIndexPredicateAnalyzer(index, queryPartitions);
List<IndexSearchCondition> searchConditions = new ArrayList<IndexSearchCondition>();
// split predicate into pushed (what we can handle), and residual (what we can't handle)
- ExprNodeDesc residualPredicate = analyzer.analyzePredicate(predicate, searchConditions);
+ ExprNodeGenericFuncDesc residualPredicate = (ExprNodeGenericFuncDesc)analyzer.
+ analyzePredicate(predicate, searchConditions);
if (searchConditions.size() == 0) {
return null;
Modified: hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java?rev=1534711&r1=1534710&r2=1534711&view=diff
==============================================================================
--- hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java (original)
+++ hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java Tue Oct 22 17:58:59 2013
@@ -37,7 +37,7 @@ import org.apache.hadoop.hive.ql.exec.Op
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.log.PerfLogger;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.plan.MapWork;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.PartitionDesc;
@@ -347,7 +347,7 @@ public class HiveInputFormat<K extends W
Utilities.setColumnNameList(jobConf, tableScan);
Utilities.setColumnTypeList(jobConf, tableScan);
// push down filters
- ExprNodeDesc filterExpr = scanDesc.getFilterExpr();
+ ExprNodeGenericFuncDesc filterExpr = (ExprNodeGenericFuncDesc)scanDesc.getFilterExpr();
if (filterExpr == null) {
return;
}
Modified: hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java?rev=1534711&r1=1534710&r2=1534711&view=diff
==============================================================================
--- hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java (original)
+++ hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java Tue Oct 22 17:58:59 2013
@@ -510,13 +510,13 @@ class ColumnStatisticsImpl implements Co
super(stats);
OrcProto.DecimalStatistics dec = stats.getDecimalStatistics();
if (dec.hasMaximum()) {
- maximum = new HiveDecimal(dec.getMaximum());
+ maximum = HiveDecimal.create(dec.getMaximum());
}
if (dec.hasMinimum()) {
- minimum = new HiveDecimal(dec.getMinimum());
+ minimum = HiveDecimal.create(dec.getMinimum());
}
if (dec.hasSum()) {
- sum = new HiveDecimal(dec.getSum());
+ sum = HiveDecimal.create(dec.getSum());
} else {
sum = null;
}
@@ -541,11 +541,7 @@ class ColumnStatisticsImpl implements Co
maximum = value;
}
if (sum != null) {
- try {
- sum = sum.add(value);
- } catch (NumberFormatException nfe) {
- sum = null;
- }
+ sum = sum.add(value);
}
}
Modified: hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java?rev=1534711&r1=1534710&r2=1534711&view=diff
==============================================================================
--- hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java (original)
+++ hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java Tue Oct 22 17:58:59 2013
@@ -27,6 +27,7 @@ import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
+
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
@@ -90,38 +91,12 @@ public class OrcInputFormat implements
OrcRecordReader(Reader file, Configuration conf,
long offset, long length) throws IOException {
- String serializedPushdown = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR);
- String columnNamesString =
- conf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR);
- String[] columnNames = null;
- SearchArgument sarg = null;
List<OrcProto.Type> types = file.getTypes();
- if (types.size() == 0) {
- numColumns = 0;
- } else {
- numColumns = types.get(0).getSubtypesCount();
- }
- columnNames = new String[types.size()];
- LOG.info("included column ids = " +
- conf.get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "null"));
- LOG.info("included columns names = " +
- conf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, "null"));
- boolean[] includeColumn = findIncludedColumns(types, conf);
- if (serializedPushdown != null && columnNamesString != null) {
- sarg = SearchArgument.FACTORY.create
- (Utilities.deserializeExpression(serializedPushdown, conf));
- LOG.info("ORC pushdown predicate: " + sarg);
- String[] neededColumnNames = columnNamesString.split(",");
- int i = 0;
- for(int columnId: types.get(0).getSubtypesList()) {
- if (includeColumn == null || includeColumn[columnId]) {
- columnNames[columnId] = neededColumnNames[i++];
- }
- }
- } else {
- LOG.info("No ORC pushdown predicate");
- }
- this.reader = file.rows(offset, length, includeColumn, sarg, columnNames);
+ numColumns = (types.size() == 0) ? 0 : types.get(0).getSubtypesCount();
+ boolean[] includedColumns = findIncludedColumns(types, conf);
+ String[] columnNames = getIncludedColumnNames(types, includedColumns, conf);
+ SearchArgument sarg = createSarg(types, conf);
+ this.reader = file.rows(offset, length, includedColumns, sarg, columnNames);
this.offset = offset;
this.length = length;
}
@@ -187,14 +162,45 @@ public class OrcInputFormat implements
}
}
+ public static SearchArgument createSarg(List<OrcProto.Type> types, Configuration conf) {
+ String serializedPushdown = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR);
+ if (serializedPushdown == null
+ || conf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR) == null) {
+ LOG.info("No ORC pushdown predicate");
+ return null;
+ }
+ SearchArgument sarg = SearchArgument.FACTORY.create
+ (Utilities.deserializeExpression(serializedPushdown));
+ LOG.info("ORC pushdown predicate: " + sarg);
+ return sarg;
+ }
+
+ public static String[] getIncludedColumnNames(
+ List<OrcProto.Type> types, boolean[] includedColumns, Configuration conf) {
+ String columnNamesString = conf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR);
+ LOG.info("included columns names = " + columnNamesString);
+ if (columnNamesString == null || conf.get(TableScanDesc.FILTER_EXPR_CONF_STR) == null) {
+ return null;
+ }
+ String[] neededColumnNames = columnNamesString.split(",");
+ int i = 0;
+ String[] columnNames = new String[types.size()];
+ for(int columnId: types.get(0).getSubtypesList()) {
+ if (includedColumns == null || includedColumns[columnId]) {
+ columnNames[columnId] = neededColumnNames[i++];
+ }
+ }
+ return columnNames;
+ }
+
/**
* Take the configuration and figure out which columns we need to include.
* @param types the types of the file
* @param conf the configuration
* @return true for each column that should be included
*/
- static boolean[] findIncludedColumns(List<OrcProto.Type> types,
- Configuration conf) {
+ public static boolean[] findIncludedColumns(List<OrcProto.Type> types, Configuration conf) {
+ LOG.info("included column ids = " + conf.get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR));
if (ColumnProjectionUtils.isReadAllColumns(conf)) {
return null;
} else {
Modified: hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java
URL: http://svn.apache.org/viewvc/hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java?rev=1534711&r1=1534710&r2=1534711&view=diff
==============================================================================
--- hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java (original)
+++ hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java Tue Oct 22 17:58:59 2013
@@ -32,18 +32,14 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.SettableMapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.ParameterizedPrimitiveTypeUtils;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams;
import org.apache.hadoop.io.Writable;
final class OrcStruct implements Writable {
@@ -487,12 +483,6 @@ final class OrcStruct implements Writabl
case STRING:
return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
case VARCHAR:
- // For varchar we need to retrieve the string length from the TypeInfo.
- VarcharTypeParams varcharParams = (VarcharTypeParams)
- ParameterizedPrimitiveTypeUtils.getTypeParamsFromTypeInfo(info);
- if (varcharParams == null) {
- throw new IllegalArgumentException("varchar type used without type params");
- }
return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
(PrimitiveTypeInfo) info);
case TIMESTAMP:
@@ -546,11 +536,8 @@ final class OrcStruct implements Writabl
throw new UnsupportedOperationException(
"Illegal use of varchar type without length in ORC type definition.");
}
- VarcharTypeParams varcharParams = new VarcharTypeParams();
- varcharParams.setLength(type.getMaximumLength());
return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
- PrimitiveObjectInspectorUtils.getTypeEntryFromTypeSpecs(
- PrimitiveCategory.VARCHAR, varcharParams));
+ TypeInfoFactory.getVarcharTypeInfo(type.getMaximumLength()));
case TIMESTAMP:
return PrimitiveObjectInspectorFactory.javaTimestampObjectInspector;
case DATE:
Modified: hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java?rev=1534711&r1=1534710&r2=1534711&view=diff
==============================================================================
--- hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (original)
+++ hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java Tue Oct 22 17:58:59 2013
@@ -1057,7 +1057,7 @@ class RecordReaderImpl implements Record
Object next(Object previous) throws IOException {
super.next(previous);
if (valuePresent) {
- return new HiveDecimal(SerializationUtils.readBigInteger(valueStream),
+ return HiveDecimal.create(SerializationUtils.readBigInteger(valueStream),
(int) scaleStream.next());
}
return null;
@@ -1726,6 +1726,7 @@ class RecordReaderImpl implements Record
"NextVector is not supported operation for List type");
}
+ @Override
void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) &&
(encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) {
@@ -1819,6 +1820,7 @@ class RecordReaderImpl implements Record
"NextVector is not supported operation for Map type");
}
+ @Override
void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
if ((encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) &&
(encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT_V2)) {
Modified: hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java?rev=1534711&r1=1534710&r2=1534711&view=diff
==============================================================================
--- hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java (original)
+++ hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java Tue Oct 22 17:58:59 2013
@@ -31,8 +31,8 @@ import org.apache.hadoop.hive.ql.exec.ve
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx;
import org.apache.hadoop.hive.ql.io.InputFormatChecker;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileSplit;
@@ -58,12 +58,14 @@ public class VectorizedOrcInputFormat ex
VectorizedOrcRecordReader(Reader file, Configuration conf,
FileSplit fileSplit) throws IOException {
+ List<OrcProto.Type> types = file.getTypes();
+ boolean[] includedColumns = OrcInputFormat.findIncludedColumns(types, conf);
+ String[] columnNames = OrcInputFormat.getIncludedColumnNames(types, includedColumns, conf);
+ SearchArgument sarg = OrcInputFormat.createSarg(types, conf);
this.offset = fileSplit.getStart();
this.length = fileSplit.getLength();
- this.reader = file.rows(offset, length,
- findIncludedColumns(file.getTypes(), conf));
-
+ this.reader = file.rows(offset, length, includedColumns, sarg, columnNames);
try {
rbCtx = new VectorizedRowBatchCtx();
rbCtx.init(conf, fileSplit);
@@ -134,63 +136,6 @@ public class VectorizedOrcInputFormat ex
setMinSplitSize(16 * 1024);
}
- /**
- * Recurse down into a type subtree turning on all of the sub-columns.
- *
- * @param types
- * the types of the file
- * @param result
- * the global view of columns that should be included
- * @param typeId
- * the root of tree to enable
- */
- private static void includeColumnRecursive(List<OrcProto.Type> types,
- boolean[] result,
- int typeId) {
- result[typeId] = true;
- OrcProto.Type type = types.get(typeId);
- int children = type.getSubtypesCount();
- for (int i = 0; i < children; ++i) {
- includeColumnRecursive(types, result, type.getSubtypes(i));
- }
- }
-
- /**
- * Take the configuration and figure out which columns we need to include.
- *
- * @param types
- * the types of the file
- * @param conf
- * the configuration
- * @return true for each column that should be included
- */
- private static boolean[] findIncludedColumns(List<OrcProto.Type> types,
- Configuration conf) {
- String includedStr =
- conf.get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR);
- if (includedStr == null || includedStr.trim().length() == 0) {
- return null;
- } else {
- int numColumns = types.size();
- boolean[] result = new boolean[numColumns];
- result[0] = true;
- OrcProto.Type root = types.get(0);
- List<Integer> included = ColumnProjectionUtils.getReadColumnIDs(conf);
- for (int i = 0; i < root.getSubtypesCount(); ++i) {
- if (included.contains(i)) {
- includeColumnRecursive(types, result, root.getSubtypes(i));
- }
- }
- // if we are filtering at least one column, return the boolean array
- for (boolean include : result) {
- if (!include) {
- return result;
- }
- }
- return null;
- }
- }
-
@Override
public RecordReader<NullWritable, VectorizedRowBatch>
getRecordReader(InputSplit inputSplit, JobConf conf,
Modified: hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java?rev=1534711&r1=1534710&r2=1534711&view=diff
==============================================================================
--- hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java (original)
+++ hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java Tue Oct 22 17:58:59 2013
@@ -59,8 +59,7 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.ParameterizedPrimitiveTypeUtils;
-import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams;
+import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
@@ -1609,14 +1608,9 @@ class WriterImpl implements Writer, Memo
case VARCHAR:
// The varchar length needs to be written to file and should be available
// from the object inspector
- VarcharTypeParams varcharParams = (VarcharTypeParams)
- ParameterizedPrimitiveTypeUtils.getTypeParamsFromPrimitiveObjectInspector(
- (PrimitiveObjectInspector) treeWriter.inspector);
- if (varcharParams == null) {
- throw new IllegalArgumentException("No varchar length specified in ORC type");
- }
+ VarcharTypeInfo typeInfo = (VarcharTypeInfo) ((PrimitiveObjectInspector) treeWriter.inspector).getTypeInfo();
type.setKind(Type.Kind.VARCHAR);
- type.setMaximumLength(varcharParams.getLength());
+ type.setMaximumLength(typeInfo.getLength());
break;
case BINARY:
type.setKind(OrcProto.Type.Kind.BINARY);
Modified: hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java
URL: http://svn.apache.org/viewvc/hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java?rev=1534711&r1=1534710&r2=1534711&view=diff
==============================================================================
--- hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java (original)
+++ hive/branches/maven/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java Tue Oct 22 17:58:59 2013
@@ -18,10 +18,10 @@
package org.apache.hadoop.hive.ql.io.sarg;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-
import java.util.List;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+
/**
* Primary interface for <a href="http://en.wikipedia.org/wiki/Sargable">
* SearchArgument</a>, which are the subset of predicates
@@ -170,7 +170,7 @@ public interface SearchArgument {
* in interfaces. *DOH*
*/
public static class Factory {
- public SearchArgument create(ExprNodeDesc expression) {
+ public SearchArgument create(ExprNodeGenericFuncDesc expression) {
return new SearchArgumentImpl(expression);
}