You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by gu...@apache.org on 2013/10/23 22:50:43 UTC
svn commit: r1535174 [3/8] - in /hive/branches/tez: ./
ant/src/org/apache/hadoop/hive/ant/ common/src/java/conf/
hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/impl/
hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/ hc...
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java?rev=1535174&r1=1535173&r2=1535174&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java Wed Oct 23 20:50:38 2013
@@ -34,26 +34,9 @@ import org.apache.hadoop.hive.ql.exec.Ex
import org.apache.hadoop.hive.ql.exec.FunctionInfo;
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.exec.UDF;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.ConstantVectorExpression;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterConstantBooleanVectorExpression;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterExprAndExpr;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterExprOrExpr;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterStringColLikeStringScalar;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.FuncRand;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.ISetDoubleArg;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.ISetLongArg;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterStringColRegExpStringScalar;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsNotNull;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsNull;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsTrue;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.StringConcatColCol;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.StringConcatColScalar;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.StringConcatScalarCol;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.StringSubstrColStart;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.StringSubstrColStartLen;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFUnixTimeStampLong;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.InputExpressionType;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.Mode;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.*;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFCount;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFCountStar;
@@ -76,6 +59,8 @@ import org.apache.hadoop.hive.ql.exec.ve
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarSampDouble;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarSampLong;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastLongToBooleanViaLongToLong;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastLongToDouble;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastTimestampToDoubleViaLongToDouble;
import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor;
import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc;
import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -84,85 +69,21 @@ import org.apache.hadoop.hive.ql.plan.Ex
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
-import org.apache.hadoop.hive.ql.plan.api.OperatorType;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFConcat;
-import org.apache.hadoop.hive.ql.udf.UDFAcos;
-import org.apache.hadoop.hive.ql.udf.UDFAsin;
-import org.apache.hadoop.hive.ql.udf.UDFAtan;
-import org.apache.hadoop.hive.ql.udf.UDFBin;
-import org.apache.hadoop.hive.ql.udf.UDFCeil;
import org.apache.hadoop.hive.ql.udf.UDFConv;
-import org.apache.hadoop.hive.ql.udf.UDFCos;
-import org.apache.hadoop.hive.ql.udf.UDFDayOfMonth;
-import org.apache.hadoop.hive.ql.udf.UDFDegrees;
-import org.apache.hadoop.hive.ql.udf.UDFExp;
-import org.apache.hadoop.hive.ql.udf.UDFFloor;
import org.apache.hadoop.hive.ql.udf.UDFHex;
-import org.apache.hadoop.hive.ql.udf.UDFHour;
-import org.apache.hadoop.hive.ql.udf.UDFLTrim;
-import org.apache.hadoop.hive.ql.udf.UDFLength;
-import org.apache.hadoop.hive.ql.udf.UDFLike;
-import org.apache.hadoop.hive.ql.udf.UDFLn;
-import org.apache.hadoop.hive.ql.udf.UDFLog;
-import org.apache.hadoop.hive.ql.udf.UDFLog10;
-import org.apache.hadoop.hive.ql.udf.UDFLog2;
-import org.apache.hadoop.hive.ql.udf.UDFMinute;
-import org.apache.hadoop.hive.ql.udf.UDFMonth;
-import org.apache.hadoop.hive.ql.udf.UDFOPDivide;
-import org.apache.hadoop.hive.ql.udf.UDFOPMinus;
-import org.apache.hadoop.hive.ql.udf.UDFOPMod;
-import org.apache.hadoop.hive.ql.udf.UDFOPMultiply;
import org.apache.hadoop.hive.ql.udf.UDFOPNegative;
-import org.apache.hadoop.hive.ql.udf.UDFOPPlus;
import org.apache.hadoop.hive.ql.udf.UDFOPPositive;
-import org.apache.hadoop.hive.ql.udf.UDFPosMod;
-import org.apache.hadoop.hive.ql.udf.UDFPower;
-import org.apache.hadoop.hive.ql.udf.UDFRegExp;
-import org.apache.hadoop.hive.ql.udf.UDFRTrim;
-import org.apache.hadoop.hive.ql.udf.UDFRadians;
-import org.apache.hadoop.hive.ql.udf.UDFRand;
-import org.apache.hadoop.hive.ql.udf.UDFRound;
-import org.apache.hadoop.hive.ql.udf.UDFSecond;
-import org.apache.hadoop.hive.ql.udf.UDFSign;
-import org.apache.hadoop.hive.ql.udf.UDFSin;
-import org.apache.hadoop.hive.ql.udf.UDFSqrt;
-import org.apache.hadoop.hive.ql.udf.UDFSubstr;
-import org.apache.hadoop.hive.ql.udf.UDFTan;
import org.apache.hadoop.hive.ql.udf.UDFToBoolean;
import org.apache.hadoop.hive.ql.udf.UDFToByte;
+import org.apache.hadoop.hive.ql.udf.UDFToDouble;
+import org.apache.hadoop.hive.ql.udf.UDFToFloat;
import org.apache.hadoop.hive.ql.udf.UDFToInteger;
import org.apache.hadoop.hive.ql.udf.UDFToLong;
import org.apache.hadoop.hive.ql.udf.UDFToShort;
-import org.apache.hadoop.hive.ql.udf.UDFToFloat;
-import org.apache.hadoop.hive.ql.udf.UDFToDouble;
import org.apache.hadoop.hive.ql.udf.UDFToString;
-import org.apache.hadoop.hive.ql.udf.UDFTrim;
-import org.apache.hadoop.hive.ql.udf.UDFWeekOfYear;
-import org.apache.hadoop.hive.ql.udf.UDFYear;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFAbs;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFLower;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNot;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFTimestamp;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUpper;
-import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.ql.udf.generic.*;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
/**
* Context class for vectorization execution.
@@ -171,25 +92,20 @@ import org.apache.hadoop.io.Text;
*
*/
public class VectorizationContext {
+
private static final Log LOG = LogFactory.getLog(
VectorizationContext.class.getName());
+ VectorExpressionDescriptor vMap;
+
//columnName to column position map
private final Map<String, Integer> columnMap;
private final int firstOutputColumnIndex;
+ private final Mode operatorMode = Mode.PROJECTION;
- private OperatorType opType;
//Map column number to type
private final OutputColumnManager ocm;
- // Package where custom (hand-built) vector expression classes are located.
- private static final String CUSTOM_EXPR_PACKAGE =
- "org.apache.hadoop.hive.ql.exec.vector.expressions";
-
- // Package where vector expression packages generated from templates are located.
- private static final String GENERATED_EXPR_PACKAGE =
- "org.apache.hadoop.hive.ql.exec.vector.expressions.gen";
-
private String fileKey = null;
public VectorizationContext(Map<String, Integer> columnMap,
@@ -197,6 +113,7 @@ public class VectorizationContext {
this.columnMap = columnMap;
this.ocm = new OutputColumnManager(initialOutputCol);
this.firstOutputColumnIndex = initialOutputCol;
+ vMap = new VectorExpressionDescriptor();
}
public String getFileKey() {
@@ -214,6 +131,10 @@ public class VectorizationContext {
return columnMap.get(name);
}
+ private int getInputColumnIndex(ExprNodeColumnDesc colExpr) {
+ return columnMap.get(colExpr.getColumn());
+ }
+
private class OutputColumnManager {
private final int initialOutputCol;
private int outputColCount = 0;
@@ -269,23 +190,16 @@ public class VectorizationContext {
}
}
- public void setOperatorType(OperatorType opType) {
- this.opType = opType;
- }
-
- private VectorExpression getVectorExpression(ExprNodeColumnDesc
- exprDesc) {
+ private VectorExpression getColumnVectorExpression(ExprNodeColumnDesc
+ exprDesc, Mode mode) {
int columnNum = getInputColumnIndex(exprDesc.getColumn());
VectorExpression expr = null;
- switch (opType) {
+ switch (mode) {
case FILTER:
//Important: It will come here only if the column is being used as a boolean
expr = new SelectColumnIsTrue(columnNum);
break;
- case MAPJOIN:
- case SELECT:
- case GROUPBY:
- case REDUCESINK:
+ case PROJECTION:
expr = new IdentityExpression(columnNum, exprDesc.getTypeString());
break;
}
@@ -293,38 +207,49 @@ public class VectorizationContext {
}
public VectorExpression[] getVectorExpressions(List<ExprNodeDesc> exprNodes) throws HiveException {
+ return getVectorExpressions(exprNodes, Mode.PROJECTION);
+ }
+
+ public VectorExpression[] getVectorExpressions(List<ExprNodeDesc> exprNodes, Mode mode)
+ throws HiveException {
+
int i = 0;
if (null == exprNodes) {
return new VectorExpression[0];
}
VectorExpression[] ret = new VectorExpression[exprNodes.size()];
for (ExprNodeDesc e : exprNodes) {
- ret[i++] = getVectorExpression(e);
+ ret[i++] = getVectorExpression(e, mode);
}
return ret;
}
+ public VectorExpression getVectorExpression(ExprNodeDesc exprDesc) throws HiveException {
+ return getVectorExpression(exprDesc, Mode.PROJECTION);
+ }
+
/**
* Returns a vector expression for a given expression
* description.
* @param exprDesc, Expression description
+ * @param mode
* @return {@link VectorExpression}
* @throws HiveException
*/
- public VectorExpression getVectorExpression(ExprNodeDesc exprDesc) throws HiveException {
+ public VectorExpression getVectorExpression(ExprNodeDesc exprDesc, Mode mode) throws HiveException {
VectorExpression ve = null;
if (exprDesc instanceof ExprNodeColumnDesc) {
- ve = getVectorExpression((ExprNodeColumnDesc) exprDesc);
+ ve = getColumnVectorExpression((ExprNodeColumnDesc) exprDesc, mode);
} else if (exprDesc instanceof ExprNodeGenericFuncDesc) {
ExprNodeGenericFuncDesc expr = (ExprNodeGenericFuncDesc) exprDesc;
if (isCustomUDF(expr) || isLegacyPathUDF(expr)) {
ve = getCustomUDFExpression(expr);
} else {
- ve = getVectorExpression(expr.getGenericUDF(),
- expr.getChildren());
+ ve = getGenericUdfVectorExpression(expr.getGenericUDF(),
+ expr.getChildren(), mode);
}
} else if (exprDesc instanceof ExprNodeConstantDesc) {
- ve = getConstantVectorExpression((ExprNodeConstantDesc) exprDesc);
+ ve = getConstantVectorExpression((ExprNodeConstantDesc) exprDesc, mode);
}
if (ve == null) {
throw new HiveException("Could not vectorize expression: "+exprDesc.getName());
@@ -419,10 +344,8 @@ public class VectorizationContext {
Class<? extends UDF> cl = ((GenericUDFBridge) gudf).getUdfClass();
- ExprNodeConstantDesc constExpr = (ExprNodeConstantDesc) exprDesc.getChildren().get(0);
-
if (cl.equals(UDFOPNegative.class) || cl.equals(UDFOPPositive.class)) {
- ExprNodeEvaluator evaluator = ExprNodeEvaluatorFactory.get(exprDesc);
+ ExprNodeEvaluator<?> evaluator = ExprNodeEvaluatorFactory.get(exprDesc);
ObjectInspector output = evaluator.initialize(null);
Object constant = evaluator.evaluate(null);
@@ -433,11 +356,14 @@ public class VectorizationContext {
}
}
- private VectorExpression getConstantVectorExpression(ExprNodeConstantDesc exprDesc)
+ private VectorExpression getConstantVectorExpression(ExprNodeConstantDesc exprDesc, Mode mode)
throws HiveException {
String type = exprDesc.getTypeString();
- String colVectorType = this.getOutputColType(type, "constant");
- int outCol = ocm.allocateOutputColumn(colVectorType);
+ String colVectorType = getNormalizedTypeName(type);
+ int outCol = -1;
+ if (mode == Mode.PROJECTION) {
+ outCol = ocm.allocateOutputColumn(colVectorType);
+ }
if (type.equalsIgnoreCase("long") || type.equalsIgnoreCase("int") ||
type.equalsIgnoreCase("short") || type.equalsIgnoreCase("byte")) {
return new ConstantVectorExpression(outCol,
@@ -447,7 +373,7 @@ public class VectorizationContext {
} else if (type.equalsIgnoreCase("string")) {
return new ConstantVectorExpression(outCol, ((String) exprDesc.getValue()).getBytes());
} else if (type.equalsIgnoreCase("boolean")) {
- if (this.opType == OperatorType.FILTER) {
+ if (mode == Mode.FILTER) {
if (((Boolean) exprDesc.getValue()).booleanValue()) {
return new FilterConstantBooleanVectorExpression(1);
} else {
@@ -465,7 +391,12 @@ public class VectorizationContext {
}
}
- private VectorExpression getUnaryMinusExpression(List<ExprNodeDesc> childExprList)
+ /**
+ * Used as a fast path for operations that don't modify their input, like unary +
+ * and casting boolean to long. IdentityExpression and its children are always
+ * projections.
+ */
+ private VectorExpression getIdentityExpression(List<ExprNodeDesc> childExprList)
throws HiveException {
ExprNodeDesc childExpr = childExprList.get(0);
int inputCol;
@@ -482,328 +413,161 @@ public class VectorizationContext {
} else {
throw new HiveException("Expression not supported: "+childExpr);
}
- String outputColumnType = getNormalizedTypeName(colType);
- int outputCol = ocm.allocateOutputColumn(outputColumnType);
- String className = "org.apache.hadoop.hive.ql.exec.vector.expressions.gen."
- + outputColumnType + "ColUnaryMinus";
- VectorExpression expr;
- try {
- expr = (VectorExpression) getConstructor(className).newInstance(inputCol, outputCol);
- } catch (Exception ex) {
- throw new HiveException(ex);
- }
+ VectorExpression expr = new IdentityExpression(inputCol, colType);
if (v1 != null) {
expr.setChildExpressions(new VectorExpression [] {v1});
- ocm.freeOutputColumn(v1.getOutputColumn());
}
return expr;
}
- /* For functions that take one argument, and can be translated using a vector
- * expression class of the form
- * <packagePrefix>.<classPrefix><argumentType>To<resultType>
- * The argumentType is inferred from the input expression.
- */
- private VectorExpression getUnaryFunctionExpression(
- String classPrefix,
- String resultType,
- List<ExprNodeDesc> childExprList,
- String packagePrefix)
+ private VectorExpression getVectorExpressionForUdf(Class<?> udf, List<ExprNodeDesc> childExpr, Mode mode)
throws HiveException {
- ExprNodeDesc childExpr = childExprList.get(0);
- int inputCol;
- String colType;
- VectorExpression v1 = null;
- if (childExpr instanceof ExprNodeGenericFuncDesc) {
- v1 = getVectorExpression(childExpr);
- inputCol = v1.getOutputColumn();
- colType = v1.getOutputType();
- } else if (childExpr instanceof ExprNodeColumnDesc) {
- ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) childExpr;
- inputCol = getInputColumnIndex(colDesc.getColumn());
- colType = colDesc.getTypeString();
- } else {
- throw new HiveException("Expression not supported: "+childExpr);
+ int numChildren = (childExpr == null) ? 0 : childExpr.size();
+ VectorExpressionDescriptor.Builder builder = new VectorExpressionDescriptor.Builder();
+ builder.setNumArguments(numChildren);
+ builder.setMode(mode);
+ for (int i = 0; i < numChildren; i++) {
+ ExprNodeDesc child = childExpr.get(i);
+ builder.setArgumentType(i, child.getTypeString());
+ if ((child instanceof ExprNodeGenericFuncDesc) || (child instanceof ExprNodeColumnDesc)) {
+ builder.setInputExpressionType(i, InputExpressionType.COLUMN);
+ } else if (child instanceof ExprNodeConstantDesc) {
+ builder.setInputExpressionType(i, InputExpressionType.SCALAR);
+ } else {
+ throw new HiveException("Cannot handle expression type: " + child.getClass().getSimpleName());
+ }
}
- String funcInputColType = getNormalizedTypeName(colType);
- int outputCol = ocm.allocateOutputColumn(resultType);
- String className = packagePrefix + "."
- + classPrefix + funcInputColType + "To" + resultType;
- VectorExpression expr;
+ Class<?> vclass = this.vMap.getVectorExpressionClass(udf, builder.build());
+ if (vclass == null) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("No vector udf found for "+udf.getSimpleName());
+ }
+ return null;
+ }
+ Mode childrenMode = getChildrenMode(mode, udf);
+ return createVectorExpression(vclass, childExpr, childrenMode);
+ }
+
+ private VectorExpression createVectorExpression(Class<?> vectorClass, List<ExprNodeDesc> childExpr,
+ Mode childrenMode) throws HiveException {
+ int numChildren = childExpr == null ? 0: childExpr.size();
+ List<VectorExpression> children = new ArrayList<VectorExpression>();
+ Object[] arguments = new Object[numChildren];
try {
- expr = (VectorExpression) getConstructor(className).newInstance(inputCol, outputCol);
+ for (int i = 0; i < numChildren; i++) {
+ ExprNodeDesc child = childExpr.get(i);
+ if (child instanceof ExprNodeGenericFuncDesc) {
+ VectorExpression vChild = getVectorExpression(child, childrenMode);
+ children.add(vChild);
+ arguments[i] = vChild.getOutputColumn();
+ } else if (child instanceof ExprNodeColumnDesc) {
+ int colIndex = getInputColumnIndex((ExprNodeColumnDesc) child);
+ if (childrenMode == Mode.FILTER) {
+ // In filter mode, the column must be a boolean
+ children.add(new SelectColumnIsTrue(colIndex));
+ }
+ arguments[i] = colIndex;
+ } else if (child instanceof ExprNodeConstantDesc) {
+ arguments[i] = getScalarValue((ExprNodeConstantDesc) child);
+ } else {
+ throw new HiveException("Cannot handle expression type: "
+ + child.getClass().getSimpleName());
+ }
+ }
+ VectorExpression vectorExpression = instantiateExpression(vectorClass, arguments);
+ if ((vectorExpression != null) && !children.isEmpty()) {
+ vectorExpression.setChildExpressions(children.toArray(new VectorExpression[0]));
+ }
+ return vectorExpression;
} catch (Exception ex) {
throw new HiveException(ex);
+ } finally {
+ for (VectorExpression ve : children) {
+ ocm.freeOutputColumn(ve.getOutputColumn());
+ }
}
- if (v1 != null) {
- expr.setChildExpressions(new VectorExpression [] {v1});
- ocm.freeOutputColumn(v1.getOutputColumn());
+ }
+
+ private Mode getChildrenMode(Mode mode, Class<?> udf) {
+ if (mode.equals(Mode.FILTER) && (udf.equals(GenericUDFOPAnd.class) || udf.equals(GenericUDFOPOr.class))) {
+ return Mode.FILTER;
}
- return expr;
+ return Mode.PROJECTION;
}
- // Used as a fast path for operations that don't modify their input, like unary +
- // and casting boolean to long.
- private VectorExpression getIdentityExpression(List<ExprNodeDesc> childExprList)
+ private VectorExpression instantiateExpression(Class<?> vclass, Object...args)
throws HiveException {
- ExprNodeDesc childExpr = childExprList.get(0);
- int inputCol;
- String colType;
- VectorExpression v1 = null;
- if (childExpr instanceof ExprNodeGenericFuncDesc) {
- v1 = getVectorExpression(childExpr);
- inputCol = v1.getOutputColumn();
- colType = v1.getOutputType();
- } else if (childExpr instanceof ExprNodeColumnDesc) {
- ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) childExpr;
- inputCol = getInputColumnIndex(colDesc.getColumn());
- colType = colDesc.getTypeString();
- } else {
- throw new HiveException("Expression not supported: "+childExpr);
- }
- VectorExpression expr = new IdentityExpression(inputCol, colType);
- if (v1 != null) {
- expr.setChildExpressions(new VectorExpression [] {v1});
+ Constructor<?> ctor = getConstructor(vclass);
+ int numParams = ctor.getParameterTypes().length;
+ int argsLength = (args == null) ? 0 : args.length;
+ try {
+ if (numParams == 0) {
+ return (VectorExpression) ctor.newInstance();
+ } else if (numParams == argsLength) {
+ return (VectorExpression) ctor.newInstance(args);
+ } else if (numParams == argsLength + 1) {
+ // Additional argument is needed, which is the outputcolumn.
+ String outType = ((VectorExpression) vclass.newInstance()).getOutputType();
+ int outputCol = ocm.allocateOutputColumn(outType);
+ Object [] newArgs = Arrays.copyOf(args, numParams);
+ newArgs[numParams-1] = outputCol;
+ return (VectorExpression) ctor.newInstance(newArgs);
+ }
+ } catch (Exception ex) {
+ throw new HiveException("Could not instantiate " + vclass.getSimpleName(), ex);
}
- return expr;
+ return null;
}
- private VectorExpression getVectorExpression(GenericUDF udf,
- List<ExprNodeDesc> childExpr) throws HiveException {
- if (udf instanceof GenericUDFOPLessThan) {
- return getVectorBinaryComparisonFilterExpression("Less", childExpr);
- } else if (udf instanceof GenericUDFOPEqualOrLessThan) {
- return getVectorBinaryComparisonFilterExpression("LessEqual", childExpr);
- } else if (udf instanceof GenericUDFOPEqual) {
- return getVectorBinaryComparisonFilterExpression("Equal", childExpr);
- } else if (udf instanceof GenericUDFOPGreaterThan) {
- return getVectorBinaryComparisonFilterExpression("Greater", childExpr);
- } else if (udf instanceof GenericUDFOPEqualOrGreaterThan) {
- return getVectorBinaryComparisonFilterExpression("GreaterEqual", childExpr);
- } else if (udf instanceof GenericUDFOPNotEqual) {
- return getVectorBinaryComparisonFilterExpression("NotEqual", childExpr);
- } else if (udf instanceof GenericUDFOPNotNull) {
- return getVectorExpression((GenericUDFOPNotNull) udf, childExpr);
- } else if (udf instanceof GenericUDFOPNull) {
- return getVectorExpression((GenericUDFOPNull) udf, childExpr);
- } else if (udf instanceof GenericUDFOPAnd) {
- return getVectorExpression((GenericUDFOPAnd) udf, childExpr);
- } else if (udf instanceof GenericUDFOPNot) {
- return getVectorExpression((GenericUDFOPNot) udf, childExpr);
- } else if (udf instanceof GenericUDFOPOr) {
- return getVectorExpression((GenericUDFOPOr) udf, childExpr);
- } else if (udf instanceof GenericUDFBridge) {
- return getVectorExpression((GenericUDFBridge) udf, childExpr);
- } else if(udf instanceof GenericUDFToUnixTimeStamp) {
- return getVectorExpression((GenericUDFToUnixTimeStamp) udf, childExpr);
- } else if (udf instanceof GenericUDFLower) {
- return getUnaryStringExpression("StringLower", "String", childExpr);
- } else if (udf instanceof GenericUDFUpper) {
- return getUnaryStringExpression("StringUpper", "String", childExpr);
- } else if (udf instanceof GenericUDFConcat) {
- return getConcatExpression(childExpr);
- } else if (udf instanceof GenericUDFAbs) {
- return getUnaryAbsExpression(childExpr);
- } else if (udf instanceof GenericUDFTimestamp) {
- return getCastToTimestamp(childExpr);
+ private VectorExpression getGenericUdfVectorExpression(GenericUDF udf,
+ List<ExprNodeDesc> childExpr, Mode mode) throws HiveException {
+ //First handle special cases
+ if (udf instanceof GenericUDFBridge) {
+ VectorExpression v = getGenericUDFBridgeVectorExpression((GenericUDFBridge) udf, childExpr, mode);
+ if (v != null) {
+ return v;
+ }
}
- throw new HiveException("Udf: "+udf.getClass().getSimpleName()+", is not supported");
- }
+ // Now do a general lookup
+ Class<?> udfClass = udf.getClass();
+ if (udf instanceof GenericUDFBridge) {
+ udfClass = ((GenericUDFBridge) udf).getUdfClass();
+ }
- private VectorExpression getUnaryAbsExpression(List<ExprNodeDesc> childExpr)
- throws HiveException {
- String argType = childExpr.get(0).getTypeString();
- if (isIntFamily(argType)) {
- return getUnaryFunctionExpression("FuncAbs", "Long", childExpr,
- GENERATED_EXPR_PACKAGE);
- } else if (isFloatFamily(argType)) {
- return getUnaryFunctionExpression("FuncAbs", "Double", childExpr,
- GENERATED_EXPR_PACKAGE);
- }
-
- throw new HiveException("Udf: Abs() not supported for argument type " + argType);
- }
-
- private VectorExpression getVectorExpression(GenericUDFToUnixTimeStamp udf,
- List<ExprNodeDesc> childExpr) throws HiveException {
- ExprNodeDesc leftExpr = childExpr.get(0);
- leftExpr = foldConstantsForUnaryExpression(leftExpr);
- VectorExpression v1 = getVectorExpression(leftExpr);
- String colType = v1.getOutputType();
- String outputType = "long";
- if(colType.equalsIgnoreCase("timestamp")) {
- int inputCol = v1.getOutputColumn();
- int outputCol = ocm.allocateOutputColumn(outputType);
- try {
- VectorExpression v2 = new VectorUDFUnixTimeStampLong(inputCol, outputCol);
- return v2;
- } catch(Exception e) {
- e.printStackTrace();
- throw new HiveException("Udf: Vector"+udf+", could not be initialized for " + colType, e);
+ List<ExprNodeDesc> constantFoldedChildren = new ArrayList<ExprNodeDesc>();
+ if (childExpr != null) {
+ for (ExprNodeDesc expr : childExpr) {
+ expr = this.foldConstantsForUnaryExpression(expr);
+ constantFoldedChildren.add(expr);
}
}
- throw new HiveException("Udf: "+udf.getClass().getSimpleName()+", is not supported");
+ VectorExpression ve = getVectorExpressionForUdf(udfClass, constantFoldedChildren, mode);
+ if (ve == null) {
+ throw new HiveException("Udf: "+udf.getClass().getSimpleName()+", is not supported");
+ }
+ return ve;
}
- private VectorExpression getVectorExpression(GenericUDFBridge udf,
- List<ExprNodeDesc> childExpr) throws HiveException {
+ /**
+ * Invoke special handling for expressions that can't be vectorized by regular
+ * descriptor based lookup.
+ */
+ private VectorExpression getGenericUDFBridgeVectorExpression(GenericUDFBridge udf,
+ List<ExprNodeDesc> childExpr, Mode mode) throws HiveException {
Class<? extends UDF> cl = udf.getUdfClass();
- // (UDFBaseNumericOp.class.isAssignableFrom(cl)) == true
- if (cl.equals(UDFOPPlus.class)) {
- return getBinaryArithmeticExpression("Add", childExpr);
- } else if (cl.equals(UDFOPMinus.class)) {
- return getBinaryArithmeticExpression("Subtract", childExpr);
- } else if (cl.equals(UDFOPMultiply.class)) {
- return getBinaryArithmeticExpression("Multiply", childExpr);
- } else if (cl.equals(UDFOPDivide.class)) {
- return getBinaryArithmeticExpression("Divide", childExpr);
- } else if (cl.equals(UDFOPMod.class)) {
- return getBinaryArithmeticExpression("Modulo", childExpr);
- } else if (cl.equals(UDFOPNegative.class)) {
- return getUnaryMinusExpression(childExpr);
- } else if (cl.equals(UDFOPPositive.class)) {
+ if (cl.equals(UDFOPPositive.class)) {
return getIdentityExpression(childExpr);
- } else if (cl.equals(UDFYear.class) ||
- cl.equals(UDFMonth.class) ||
- cl.equals(UDFWeekOfYear.class) ||
- cl.equals(UDFDayOfMonth.class) ||
- cl.equals(UDFHour.class) ||
- cl.equals(UDFMinute.class) ||
- cl.equals(UDFSecond.class)) {
- return getTimestampFieldExpression(cl.getSimpleName(), childExpr);
- } else if (cl.equals(UDFLike.class)) {
- return getLikeExpression(childExpr, true);
- } else if (cl.equals(UDFRegExp.class)) {
- return getLikeExpression(childExpr, false);
- } else if (cl.equals(UDFLength.class)) {
- return getUnaryStringExpression("StringLength", "Long", childExpr);
- } else if (cl.equals(UDFSubstr.class)) {
- return getSubstrExpression(childExpr);
- } else if (cl.equals(UDFLTrim.class)) {
- return getUnaryStringExpression("StringLTrim", "String", childExpr);
- } else if (cl.equals(UDFRTrim.class)) {
- return getUnaryStringExpression("StringRTrim", "String", childExpr);
- } else if (cl.equals(UDFTrim.class)) {
- return getUnaryStringExpression("StringTrim", "String", childExpr);
- } else if (cl.equals(UDFSin.class)) {
- return getUnaryFunctionExpression("FuncSin", "Double", childExpr,
- GENERATED_EXPR_PACKAGE);
- } else if (cl.equals(UDFCos.class)) {
- return getUnaryFunctionExpression("FuncCos", "Double", childExpr,
- GENERATED_EXPR_PACKAGE);
- } else if (cl.equals(UDFTan.class)) {
- return getUnaryFunctionExpression("FuncTan", "Double", childExpr,
- GENERATED_EXPR_PACKAGE);
- } else if (cl.equals(UDFAsin.class)) {
- return getUnaryFunctionExpression("FuncASin", "Double", childExpr,
- GENERATED_EXPR_PACKAGE);
- } else if (cl.equals(UDFAcos.class)) {
- return getUnaryFunctionExpression("FuncACos", "Double", childExpr,
- GENERATED_EXPR_PACKAGE);
- } else if (cl.equals(UDFAtan.class)) {
- return getUnaryFunctionExpression("FuncATan", "Double", childExpr,
- GENERATED_EXPR_PACKAGE);
- } else if (cl.equals(UDFFloor.class)) {
- return getUnaryFunctionExpression("FuncFloor", "Long", childExpr,
- GENERATED_EXPR_PACKAGE);
- } else if (cl.equals(UDFCeil.class)) {
- return getUnaryFunctionExpression("FuncCeil", "Long", childExpr,
- GENERATED_EXPR_PACKAGE);
- } else if (cl.equals(UDFDegrees.class)) {
- return getUnaryFunctionExpression("FuncDegrees", "Double", childExpr,
- GENERATED_EXPR_PACKAGE);
- } else if (cl.equals(UDFRadians.class)) {
- return getUnaryFunctionExpression("FuncRadians", "Double", childExpr,
- GENERATED_EXPR_PACKAGE);
- } else if (cl.equals(UDFLn.class)) {
- return getUnaryFunctionExpression("FuncLn", "Double", childExpr,
- GENERATED_EXPR_PACKAGE);
- } else if (cl.equals(UDFLog2.class)) {
- return getUnaryFunctionExpression("FuncLog2", "Double", childExpr,
- GENERATED_EXPR_PACKAGE);
- } else if (cl.equals(UDFLog10.class)) {
- return getUnaryFunctionExpression("FuncLog10", "Double", childExpr,
- GENERATED_EXPR_PACKAGE);
- } else if (cl.equals(UDFSign.class)) {
- return getUnaryFunctionExpression("FuncSign", "Double", childExpr,
- GENERATED_EXPR_PACKAGE);
- } else if (cl.equals(UDFSqrt.class)) {
- return getUnaryFunctionExpression("FuncSqrt", "Double", childExpr,
- GENERATED_EXPR_PACKAGE);
- } else if (cl.equals(UDFExp.class)) {
- return getUnaryFunctionExpression("FuncExp", "Double", childExpr,
- GENERATED_EXPR_PACKAGE);
- } else if (cl.equals(UDFLog.class)) {
- return getLogWithBaseExpression(childExpr);
- } else if (cl.equals(UDFPower.class)) {
- return getPowerExpression(childExpr);
- } else if (cl.equals(UDFRound.class)) {
- return getRoundExpression(childExpr);
- } else if (cl.equals(UDFRand.class)) {
- return getRandExpression(childExpr);
- } else if (cl.equals(UDFBin.class)) {
- return getUnaryStringExpression("FuncBin", "String", childExpr);
} else if (isCastToIntFamily(cl)) {
return getCastToLongExpression(childExpr);
} else if (cl.equals(UDFToBoolean.class)) {
return getCastToBoolean(childExpr);
} else if (isCastToFloatFamily(cl)) {
- return getCastToDoubleExpression(childExpr);
+ return getCastToDoubleExpression(cl, childExpr);
} else if (cl.equals(UDFToString.class)) {
return getCastToString(childExpr);
- } else if (cl.equals(UDFPosMod.class)) {
- return getPosModExpression(childExpr);
}
-
- throw new HiveException("Udf: "+udf.getClass().getSimpleName()+", is not supported");
- }
-
- private VectorExpression getPosModExpression(List<ExprNodeDesc> childExpr)
- throws HiveException {
- String inputType = childExpr.get(0).getTypeString();
-
- if (isIntFamily(inputType)) {
- // Try to get the second argument (the modulo divisor)
- long divisor = getLongScalar(childExpr.get(1));
-
- // Use the standard logic for a unary function to handle the first argument.
- VectorExpression e = getUnaryFunctionExpression("PosMod", "Long", childExpr,
- CUSTOM_EXPR_PACKAGE);
-
- // Set second argument for this special case
- ((ISetLongArg) e).setArg(divisor);
- return e;
- } else if (isFloatFamily(inputType)) {
-
- // Try to get the second argument (the modulo divisor)
- double divisor = getDoubleScalar(childExpr.get(1));
-
- // Use the standard logic for a unary function to handle the first argument.
- VectorExpression e = getUnaryFunctionExpression("PosMod", "Double", childExpr,
- CUSTOM_EXPR_PACKAGE);
-
- // Set second argument for this special case
- ((ISetDoubleArg) e).setArg(divisor);
- return e;
- }
-
- throw new HiveException("Unhandled input type for PMOD(): " + inputType);
- }
-
- private VectorExpression getCastToTimestamp(List<ExprNodeDesc> childExpr)
- throws HiveException {
- String inputType = childExpr.get(0).getTypeString();
- if (isIntFamily(inputType)) {
- return getUnaryFunctionExpression("CastLongToTimestampVia", "Long", childExpr,
- GENERATED_EXPR_PACKAGE);
- } else if (isFloatFamily(inputType)) {
- return getUnaryFunctionExpression("CastDoubleToTimestampVia", "Long", childExpr,
- GENERATED_EXPR_PACKAGE);
- }
- // The string type is deliberately omitted -- it's handled elsewhere. See isLegacyPathUDF.
-
- throw new HiveException("Unhandled cast input type: " + inputType);
+ return null;
}
private VectorExpression getCastToString(List<ExprNodeDesc> childExpr)
@@ -811,11 +575,9 @@ public class VectorizationContext {
String inputType = childExpr.get(0).getTypeString();
if (inputType.equals("boolean")) {
// Boolean must come before the integer family. It's a special case.
- return getUnaryFunctionExpression("CastBooleanToStringVia", "String", childExpr,
- CUSTOM_EXPR_PACKAGE);
+ return createVectorExpression(CastBooleanToStringViaLongToString.class, childExpr, Mode.PROJECTION);
} else if (isIntFamily(inputType)) {
- return getUnaryFunctionExpression("Cast", "String", childExpr,
- CUSTOM_EXPR_PACKAGE);
+ return createVectorExpression(CastLongToString.class, childExpr, Mode.PROJECTION);
}
/* The string type is deliberately omitted -- the planner removes string to string casts.
* Timestamp, float, and double types are handled by the legacy code path. See isLegacyPathUDF.
@@ -824,15 +586,13 @@ public class VectorizationContext {
throw new HiveException("Unhandled cast input type: " + inputType);
}
- private VectorExpression getCastToDoubleExpression(List<ExprNodeDesc> childExpr)
+ private VectorExpression getCastToDoubleExpression(Class<?> udf, List<ExprNodeDesc> childExpr)
throws HiveException {
String inputType = childExpr.get(0).getTypeString();
if (isIntFamily(inputType)) {
- return getUnaryFunctionExpression("Cast", "Double", childExpr,
- GENERATED_EXPR_PACKAGE);
+ return createVectorExpression(CastLongToDouble.class, childExpr, Mode.PROJECTION);
} else if (inputType.equals("timestamp")) {
- return getUnaryFunctionExpression("CastTimestampToDoubleVia", "Double", childExpr,
- GENERATED_EXPR_PACKAGE);
+ return createVectorExpression(CastTimestampToDoubleViaLongToDouble.class, childExpr, Mode.PROJECTION);
} else if (isFloatFamily(inputType)) {
// float types require no conversion, so use a no-op
@@ -846,16 +606,11 @@ public class VectorizationContext {
private VectorExpression getCastToBoolean(List<ExprNodeDesc> childExpr)
throws HiveException {
String inputType = childExpr.get(0).getTypeString();
- if (isFloatFamily(inputType)) {
- return getUnaryFunctionExpression("CastDoubleToBooleanVia", "Long", childExpr,
- GENERATED_EXPR_PACKAGE);
- } else if (isIntFamily(inputType) || inputType.equals("timestamp")) {
- return getUnaryFunctionExpression("CastLongToBooleanVia", "Long", childExpr,
- GENERATED_EXPR_PACKAGE);
- } else if (inputType.equals("string")) {
-
+ // Long and double are handled using descriptors, string needs to be specially handled.
+ if (inputType.equals("string")) {
// string casts to false if it is 0 characters long, otherwise true
- VectorExpression lenExpr = getUnaryStringExpression("StringLength", "Long", childExpr);
+ VectorExpression lenExpr = createVectorExpression(StringLength.class, childExpr,
+ Mode.PROJECTION);
int outputCol = ocm.allocateOutputColumn("integer");
VectorExpression lenToBoolExpr =
@@ -866,240 +621,21 @@ public class VectorizationContext {
}
// cast(booleanExpr as boolean) case is omitted because planner removes it as a no-op
- throw new HiveException("Unhandled cast input type: " + inputType);
+ return null;
}
private VectorExpression getCastToLongExpression(List<ExprNodeDesc> childExpr)
throws HiveException {
String inputType = childExpr.get(0).getTypeString();
- if (isFloatFamily(inputType)) {
- return getUnaryFunctionExpression("Cast", "Long", childExpr,
- GENERATED_EXPR_PACKAGE);
- } else if (inputType.equals("timestamp")) {
- return getUnaryFunctionExpression("CastTimestampToLongVia", "Long", childExpr,
- GENERATED_EXPR_PACKAGE);
- } else if (isIntFamily(inputType)) {
-
+ // Float family, timestamp are handled via descriptor based lookup, int family needs
+ // special handling.
+ if (isIntFamily(inputType)) {
// integer and boolean types require no conversion, so use a no-op
return getIdentityExpression(childExpr);
}
// string type is deliberately omitted -- it's handled elsewhere. See isLegacyPathUDF.
- throw new HiveException("Unhandled cast input type: " + inputType);
- }
-
- private VectorExpression getRandExpression(List<ExprNodeDesc> childExpr)
- throws HiveException {
-
- // prepare one output column
- int outputCol = ocm.allocateOutputColumn("Double");
- if (childExpr == null || childExpr.size() == 0) {
-
- // make no-argument vectorized Rand expression
- return new FuncRand(outputCol);
- } else if (childExpr.size() == 1) {
-
- // Make vectorized Rand expression with seed
- long seed = getLongScalar(childExpr.get(0));
- return new FuncRand(seed, outputCol);
- }
-
- throw new HiveException("Vectorization error. Rand has more than 1 argument.");
- }
-
- private VectorExpression getRoundExpression(List<ExprNodeDesc> childExpr)
- throws HiveException {
-
- // Handle one-argument case
- if (childExpr.size() == 1) {
- return getUnaryFunctionExpression("FuncRound", "Double", childExpr,
- GENERATED_EXPR_PACKAGE);
- }
-
- // Handle two-argument case
-
- // Try to get the second argument (the number of digits)
- long numDigits = getLongScalar(childExpr.get(1));
-
- // Use the standard logic for a unary function to handle the first argument.
- VectorExpression e = getUnaryFunctionExpression("RoundWithNumDigits", "Double", childExpr,
- CUSTOM_EXPR_PACKAGE);
-
- // Set second argument for this special case
- ((ISetLongArg) e).setArg(numDigits);
- return e;
- }
-
- private VectorExpression getPowerExpression(List<ExprNodeDesc> childExpr)
- throws HiveException {
- String argType = childExpr.get(0).getTypeString();
-
- // Try to get the second argument, typically a constant value (the power).
- double power = getDoubleScalar(childExpr.get(1));
-
- // Use the standard logic for a unary function to handle the first argument.
- VectorExpression e = getUnaryFunctionExpression("FuncPower", "Double", childExpr,
- CUSTOM_EXPR_PACKAGE);
-
- // Set the second argument for this special case
- ((ISetDoubleArg) e).setArg(power);
- return e;
- }
-
- private VectorExpression getLogWithBaseExpression(List<ExprNodeDesc> childExpr)
- throws HiveException {
- if (childExpr.size() == 1) {
-
- // No base provided, so this is equivalent to Ln
- return getUnaryFunctionExpression("FuncLn", "Double", childExpr,
- GENERATED_EXPR_PACKAGE);
- } else if (childExpr.size() == 2) {
-
- // Get the type of the (normally variable) input expression
- String argType = childExpr.get(1).getTypeString();
-
- // Try to get the first argument, typically a constant value (the base)
- double base = getDoubleScalar(childExpr.get(0));
-
- // Use the standard logic for a unary function to handle the second argument.
- VectorExpression e = getUnaryFunctionExpression("FuncLogWithBase", "Double",
- childExpr.subList(1, 2), // pass the second argument as the first
- CUSTOM_EXPR_PACKAGE);
-
- // set the first argument (the base) for this special case
- ((ISetDoubleArg) e).setArg(base);
- return e;
- }
-
- throw new HiveException("Udf: Log could not be vectorized");
- }
-
- private double getDoubleScalar(ExprNodeDesc expr) throws HiveException {
- if (!(expr instanceof ExprNodeConstantDesc)) {
- throw new HiveException("Constant value expected for UDF argument. " +
- "Non-constant argument not supported for vectorization.");
- }
- ExprNodeConstantDesc constExpr = (ExprNodeConstantDesc) expr;
- Object obj = getScalarValue(constExpr);
- if (obj instanceof Double) {
- return ((Double) obj).doubleValue();
- } else if (obj instanceof DoubleWritable) {
- return ((DoubleWritable) obj).get();
- } else if (obj instanceof Integer) {
- return (double) ((Integer) obj).longValue();
- } else if (obj instanceof IntWritable) {
- return (double) ((IntWritable) obj).get();
- }
-
- throw new HiveException("Udf: unhandled constant type for scalar argument."
- + "Expecting double or integer");
- }
-
- private long getLongScalar(ExprNodeDesc expr) throws HiveException {
- if (!(expr instanceof ExprNodeConstantDesc)) {
- throw new HiveException("Constant value expected for UDF argument. " +
- "Non-constant argument not supported for vectorization.");
- }
- ExprNodeConstantDesc constExpr = (ExprNodeConstantDesc) expr;
- Object obj = getScalarValue(constExpr);
- if (obj instanceof Integer) {
- return (long) ((Integer) obj).longValue();
- } else if (obj instanceof IntWritable) {
- return (long) ((IntWritable) obj).get();
- } else if (obj instanceof Long) {
- return ((Long) obj).longValue();
- } else if (obj instanceof LongWritable) {
- return ((LongWritable) obj).get();
- }
-
- throw new HiveException("Udf: unhandled constant type for scalar argument."
- + "Expecting integer or bigint");
- }
-
- /* Return a vector expression for string concatenation, including the column-scalar,
- * scalar-column, and column-column cases.
- */
- private VectorExpression getConcatExpression(List<ExprNodeDesc> childExprList)
- throws HiveException {
- ExprNodeDesc left = childExprList.get(0);
- ExprNodeDesc right = childExprList.get(1);
- int inputColLeft = -1;
- int inputColRight = -1;
- VectorExpression vLeft = null;
- VectorExpression vRight = null;
- VectorExpression expr = null;
-
- // Generate trees to evaluate non-leaf inputs, if there are any.
- if (left instanceof ExprNodeGenericFuncDesc) {
- vLeft = getVectorExpression(left);
- inputColLeft = vLeft.getOutputColumn();
- }
-
- if (right instanceof ExprNodeGenericFuncDesc) {
- vRight = getVectorExpression(right);
- inputColRight = vRight.getOutputColumn();
- }
-
- // Handle case for left input a column and right input a constant
- if ((left instanceof ExprNodeColumnDesc || inputColLeft != -1) &&
- right instanceof ExprNodeConstantDesc) {
- if (inputColLeft == -1) {
- inputColLeft = getInputColumnIndex(((ExprNodeColumnDesc) left).getColumn());
- }
- int outputCol = ocm.allocateOutputColumn("String");
- byte[] constant = (byte[]) getScalarValue((ExprNodeConstantDesc) right);
- expr = new StringConcatColScalar(inputColLeft, outputCol, constant);
- if (vLeft != null) {
- expr.setChildExpressions(new VectorExpression [] {vLeft});
- }
- }
-
- // Handle case for left input a constant and right input a column
- else if ((left instanceof ExprNodeConstantDesc) &&
- (right instanceof ExprNodeColumnDesc || inputColRight != -1)) {
- if (inputColRight == -1) {
- inputColRight = getInputColumnIndex(((ExprNodeColumnDesc) right).getColumn());
- }
- int outputCol = ocm.allocateOutputColumn("String");
- byte[] constant = (byte[]) getScalarValue((ExprNodeConstantDesc) left);
- expr = new StringConcatScalarCol(constant, inputColRight, outputCol);
- if (vRight != null) {
- expr.setChildExpressions(new VectorExpression [] {vRight});
- }
- }
-
- // Handle case where both left and right inputs are columns
- else if ((left instanceof ExprNodeColumnDesc || inputColLeft != -1) &&
- (right instanceof ExprNodeColumnDesc || inputColRight != -1)) {
- if (inputColLeft == -1) {
- inputColLeft = getInputColumnIndex(((ExprNodeColumnDesc) left).getColumn());
- }
- if (inputColRight == -1) {
- inputColRight = getInputColumnIndex(((ExprNodeColumnDesc) right).getColumn());
- }
- int outputCol = ocm.allocateOutputColumn("String");
- expr = new StringConcatColCol(inputColLeft, inputColRight, outputCol);
- if (vLeft == null && vRight != null) {
- expr.setChildExpressions(new VectorExpression [] {vRight});
- } else if (vLeft != null && vRight == null) {
- expr.setChildExpressions(new VectorExpression [] {vLeft});
- } else if (vLeft != null && vRight != null) {
-
- // Both left and right have child expressions
- expr.setChildExpressions(new VectorExpression [] {vLeft, vRight});
- }
- } else {
- throw new HiveException("Failed to vectorize CONCAT()");
- }
-
- // Free output columns if inputs have non-leaf expression trees.
- if (vLeft != null) {
- ocm.freeOutputColumn(vLeft.getOutputColumn());
- }
- if (vRight != null) {
- ocm.freeOutputColumn(vRight.getOutputColumn());
- }
- return expr;
+ return null;
}
/*
@@ -1129,7 +665,7 @@ public class VectorizationContext {
for (int i = 0; i < childExprList.size(); i++) {
ExprNodeDesc child = childExprList.get(i);
if (child instanceof ExprNodeGenericFuncDesc) {
- VectorExpression e = getVectorExpression(child);
+ VectorExpression e = getVectorExpression(child, Mode.PROJECTION);
vectorExprs.add(e);
variableArgPositions.add(i);
exprResultColumnNums.add(e.getOutputColumn());
@@ -1148,25 +684,12 @@ public class VectorizationContext {
// Allocate output column and get column number;
int outputCol = -1;
- String resultColVectorType;
String resultType = expr.getTypeInfo().getTypeName();
- if (resultType.equalsIgnoreCase("string")) {
- resultColVectorType = "String";
- } else if (isIntFamily(resultType)) {
- resultColVectorType = "Long";
- } else if (isFloatFamily(resultType)) {
- resultColVectorType = "Double";
- } else if (resultType.equalsIgnoreCase("timestamp")) {
- resultColVectorType = "Long";
- } else {
- throw new HiveException("Unable to vectorize due to unsupported custom UDF return type "
- + resultType);
- }
+ String resultColVectorType = getNormalizedTypeName(resultType);
outputCol = ocm.allocateOutputColumn(resultColVectorType);
// Make vectorized operator
- VectorExpression ve;
- ve = new VectorUDFAdaptor(expr, outputCol, resultColVectorType, argDescs);
+ VectorExpression ve = new VectorUDFAdaptor(expr, outputCol, resultColVectorType, argDescs);
// Set child expressions
VectorExpression[] childVEs = null;
@@ -1224,451 +747,6 @@ public class VectorizationContext {
throw new HiveException("Unsuported type for vectorization: " + javaType);
}
- /* Return a unary string vector expression. This is used for functions like
- * UPPER() and LOWER().
- */
- private VectorExpression getUnaryStringExpression(String vectorExprClassName,
- String resultType, // result type name
- List<ExprNodeDesc> childExprList) throws HiveException {
-
- return getUnaryExpression(vectorExprClassName, resultType, childExprList,
- CUSTOM_EXPR_PACKAGE);
- }
-
- private VectorExpression getUnaryExpression(String vectorExprClassName,
- String resultType, // result type name
- List<ExprNodeDesc> childExprList,
- String packagePathPrefix // prefix of package path name
- ) throws HiveException {
-
- /* Create an instance of the class vectorExprClassName for the input column or expression result
- * and return it.
- */
-
- ExprNodeDesc childExpr = childExprList.get(0);
- int inputCol;
- VectorExpression v1 = null;
- if (childExpr instanceof ExprNodeGenericFuncDesc) {
- v1 = getVectorExpression(childExpr);
- inputCol = v1.getOutputColumn();
- } else if (childExpr instanceof ExprNodeColumnDesc) {
- ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) childExpr;
- inputCol = getInputColumnIndex(colDesc.getColumn());
- } else {
- // constant argument case not supported
- throw new HiveException("Expression not supported: "+childExpr);
- }
- String outputColumnType = getNormalizedTypeName(resultType);
- int outputCol = ocm.allocateOutputColumn(outputColumnType);
- String className = packagePathPrefix + "." + vectorExprClassName;
- VectorExpression expr;
- try {
- expr = (VectorExpression) getConstructor(className).newInstance(inputCol, outputCol);
- } catch (Exception ex) {
- throw new HiveException(ex);
- }
- if (v1 != null) {
- expr.setChildExpressions(new VectorExpression [] {v1});
- ocm.freeOutputColumn(v1.getOutputColumn());
- }
- return expr;
- }
-
-
- private VectorExpression getSubstrExpression(
- List<ExprNodeDesc> childExprList) throws HiveException {
-
- ExprNodeDesc childExpr = childExprList.get(0);
- ExprNodeDesc startExpr = childExprList.get(1);
- startExpr = foldConstantsForUnaryExpression(startExpr);
-
- // Get second and optionally third arguments
- int start;
- if (startExpr instanceof ExprNodeConstantDesc) {
- ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) startExpr;
- start = ((Integer) constDesc.getValue()).intValue();
- } else {
- throw new HiveException("Cannot vectorize non-constant start argument for SUBSTR");
- }
- ExprNodeDesc lengthExpr = null;
- int length = 0;
- if (childExprList.size() == 3) {
- lengthExpr = childExprList.get(2);
- lengthExpr = foldConstantsForUnaryExpression(lengthExpr);
- if (lengthExpr instanceof ExprNodeConstantDesc) {
- ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) lengthExpr;
- length = ((Integer) constDesc.getValue()).intValue();
- } else {
- throw new HiveException("Cannot vectorize non-constant length argument for SUBSTR");
- }
- }
-
- // Prepare first argument (whether it is a column or an expression)
- int inputCol;
- VectorExpression v1 = null;
- if (childExpr instanceof ExprNodeGenericFuncDesc) {
- v1 = getVectorExpression(childExpr);
- inputCol = v1.getOutputColumn();
- } else if (childExpr instanceof ExprNodeColumnDesc) {
- ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) childExpr;
- inputCol = getInputColumnIndex(colDesc.getColumn());
- } else {
- throw new HiveException("Expression not supported: " + childExpr);
- }
- int outputCol = ocm.allocateOutputColumn("String");
-
- // Create appropriate vector expression for 2 or 3 argument version of SUBSTR()
- VectorExpression expr = null;
- if (childExprList.size() == 2) {
- expr = new StringSubstrColStart(inputCol, start, outputCol);
- } else if (childExprList.size() == 3) {
- expr = new StringSubstrColStartLen(inputCol, start, length, outputCol);
- } else {
- throw new HiveException("Invalid number of arguments for SUBSTR()");
- }
-
- if (v1 != null) {
- expr.setChildExpressions(new VectorExpression [] {v1});
- ocm.freeOutputColumn(v1.getOutputColumn());
- }
- return expr;
- }
-
- /**
- * Returns a vector expression for a LIKE or REGEXP expression
- * @param childExpr A list of child expressions
- * @param isLike {@code true}: the expression is LIKE.
- * {@code false}: the expression is REGEXP.
- * @return A {@link FilterStringColLikeStringScalar} or
- * a {@link FilterStringColRegExpStringScalar}
- * @throws HiveException
- */
- private VectorExpression getLikeExpression(List<ExprNodeDesc> childExpr, boolean isLike) throws HiveException {
- ExprNodeDesc leftExpr = childExpr.get(0);
- ExprNodeDesc rightExpr = childExpr.get(1);
-
- VectorExpression v1 = null;
- VectorExpression expr = null;
- int inputCol;
- ExprNodeConstantDesc constDesc;
-
- if ((leftExpr instanceof ExprNodeColumnDesc) &&
- (rightExpr instanceof ExprNodeConstantDesc) ) {
- ExprNodeColumnDesc leftColDesc = (ExprNodeColumnDesc) leftExpr;
- constDesc = (ExprNodeConstantDesc) rightExpr;
- inputCol = getInputColumnIndex(leftColDesc.getColumn());
- if (isLike) {
- expr = (VectorExpression) new FilterStringColLikeStringScalar(inputCol,
- new Text((byte[]) getScalarValue(constDesc)));
- } else {
- expr = (VectorExpression) new FilterStringColRegExpStringScalar(inputCol,
- new Text((byte[]) getScalarValue(constDesc)));
- }
- } else if ((leftExpr instanceof ExprNodeGenericFuncDesc) &&
- (rightExpr instanceof ExprNodeConstantDesc)) {
- v1 = getVectorExpression(leftExpr);
- inputCol = v1.getOutputColumn();
- constDesc = (ExprNodeConstantDesc) rightExpr;
- if (isLike) {
- expr = (VectorExpression) new FilterStringColLikeStringScalar(inputCol,
- new Text((byte[]) getScalarValue(constDesc)));
- } else {
- expr = (VectorExpression) new FilterStringColRegExpStringScalar(inputCol,
- new Text((byte[]) getScalarValue(constDesc)));
- }
- }
- // TODO add logic to handle cases where left input is an expression.
- if (expr == null) {
- throw new HiveException("Vector LIKE filter expression could not be initialized");
- }
- if (v1 != null) {
- expr.setChildExpressions(new VectorExpression [] {v1});
- ocm.freeOutputColumn(v1.getOutputColumn());
- }
- return expr;
- }
-
- private VectorExpression getTimestampFieldExpression(String udf,
- List<ExprNodeDesc> childExpr) throws HiveException {
- ExprNodeDesc leftExpr = childExpr.get(0);
- leftExpr = foldConstantsForUnaryExpression(leftExpr);
- VectorExpression v1 = getVectorExpression(leftExpr);
- String colType = v1.getOutputType();
- String outputType = "long";
- if(colType.equalsIgnoreCase("timestamp")) {
- int inputCol = v1.getOutputColumn();
- int outputCol = ocm.allocateOutputColumn(outputType);
- String pkg = "org.apache.hadoop.hive.ql.exec.vector.expressions";
- // org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFYearLong
- String vectorUDF = pkg + ".Vector"+udf+"Long";
- try {
- VectorExpression v2 = (VectorExpression) getConstructor(vectorUDF).
- newInstance(inputCol,outputCol);
- return v2;
- } catch(Exception e) {
- e.printStackTrace();
- throw new HiveException("Udf: Vector"+udf+", could not be initialized for " + colType, e);
- }
- }
- throw new HiveException("Udf: "+udf+", is not supported for " + colType);
- }
-
- private VectorExpression getBinaryArithmeticExpression(String method,
- List<ExprNodeDesc> childExpr) throws HiveException {
- ExprNodeDesc leftExpr = childExpr.get(0);
- ExprNodeDesc rightExpr = childExpr.get(1);
-
- // TODO: Remove this when constant folding is fixed in the optimizer.
- leftExpr = foldConstantsForUnaryExpression(leftExpr);
- rightExpr = foldConstantsForUnaryExpression(rightExpr);
-
- VectorExpression v1 = null;
- VectorExpression v2 = null;
-
- VectorExpression expr = null;
- if ( (leftExpr instanceof ExprNodeColumnDesc) &&
- (rightExpr instanceof ExprNodeConstantDesc) ) {
- ExprNodeColumnDesc leftColDesc = (ExprNodeColumnDesc) leftExpr;
- ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) rightExpr;
- int inputCol = getInputColumnIndex(leftColDesc.getColumn());
- String colType = leftColDesc.getTypeString();
- String scalarType = constDesc.getTypeString();
- String className = getBinaryColumnScalarExpressionClassName(colType,
- scalarType, method);
- int outputCol = ocm.allocateOutputColumn(getOutputColType(colType,
- scalarType, method));
- try {
- expr = (VectorExpression) getConstructor(className).newInstance(inputCol,
- getScalarValue(constDesc), outputCol);
- } catch (Exception ex) {
- throw new HiveException(ex);
- }
- } else if ( (leftExpr instanceof ExprNodeConstantDesc) &&
- (rightExpr instanceof ExprNodeColumnDesc) ) {
- ExprNodeColumnDesc rightColDesc = (ExprNodeColumnDesc) rightExpr;
- ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) leftExpr;
- int inputCol = getInputColumnIndex(rightColDesc.getColumn());
- String colType = rightColDesc.getTypeString();
- String scalarType = constDesc.getTypeString();
- String className = getBinaryScalarColumnExpressionClassName(colType,
- scalarType, method);
- String outputColType = getOutputColType(colType, scalarType, method);
- int outputCol = ocm.allocateOutputColumn(outputColType);
- try {
- expr = (VectorExpression) getConstructor(className).newInstance(getScalarValue(constDesc),
- inputCol, outputCol);
- } catch (Exception ex) {
- throw new HiveException("Could not instantiate: "+className, ex);
- }
- } else if ( (rightExpr instanceof ExprNodeColumnDesc) &&
- (leftExpr instanceof ExprNodeColumnDesc) ) {
- ExprNodeColumnDesc leftColDesc = (ExprNodeColumnDesc) leftExpr;
- ExprNodeColumnDesc rightColDesc = (ExprNodeColumnDesc) rightExpr;
- int inputCol1 = getInputColumnIndex(leftColDesc.getColumn());
- int inputCol2 = getInputColumnIndex(rightColDesc.getColumn());
- String colType1 = leftColDesc.getTypeString();
- String colType2 = rightColDesc.getTypeString();
- String outputColType = getOutputColType(colType1, colType2, method);
- String className = getBinaryColumnColumnExpressionClassName(colType1,
- colType2, method);
- int outputCol = ocm.allocateOutputColumn(outputColType);
- try {
- expr = (VectorExpression) getConstructor(className).newInstance(inputCol1, inputCol2,
- outputCol);
- } catch (Exception ex) {
- throw new HiveException(ex);
- }
- } else if ((leftExpr instanceof ExprNodeGenericFuncDesc)
- && (rightExpr instanceof ExprNodeColumnDesc)) {
- ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) rightExpr;
- v1 = getVectorExpression(leftExpr);
- int inputCol1 = v1.getOutputColumn();
- int inputCol2 = getInputColumnIndex(colDesc.getColumn());
- String colType1 = v1.getOutputType();
- String colType2 = colDesc.getTypeString();
- String outputColType = getOutputColType(colType1, colType2, method);
- String className = getBinaryColumnColumnExpressionClassName(colType1,
- colType2, method);
- int outputCol = ocm.allocateOutputColumn(outputColType);
- try {
- expr = (VectorExpression) getConstructor(className).newInstance(inputCol1, inputCol2,
- outputCol);
- } catch (Exception ex) {
- throw new HiveException((ex));
- }
- expr.setChildExpressions(new VectorExpression [] {v1});
- } else if ((leftExpr instanceof ExprNodeGenericFuncDesc)
- && (rightExpr instanceof ExprNodeConstantDesc)) {
- ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) rightExpr;
- v1 = getVectorExpression(leftExpr);
- int inputCol1 = v1.getOutputColumn();
- String colType1 = v1.getOutputType();
- String scalarType = constDesc.getTypeString();
- String outputColType = getOutputColType(colType1, scalarType, method);
- int outputCol = ocm.allocateOutputColumn(outputColType);
- String className = getBinaryColumnScalarExpressionClassName(colType1,
- scalarType, method);
- try {
- expr = (VectorExpression) getConstructor(className).newInstance(inputCol1,
- getScalarValue(constDesc), outputCol);
- } catch (Exception ex) {
- throw new HiveException((ex));
- }
- expr.setChildExpressions(new VectorExpression [] {v1});
- } else if ((leftExpr instanceof ExprNodeColumnDesc)
- && (rightExpr instanceof ExprNodeGenericFuncDesc)) {
- ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) leftExpr;
- v2 = getVectorExpression(rightExpr);
- int inputCol1 = getInputColumnIndex(colDesc.getColumn());
- int inputCol2 = v2.getOutputColumn();
- String colType1 = colDesc.getTypeString();
- String colType2 = v2.getOutputType();
- String outputColType = getOutputColType(colType1, colType2, method);
- int outputCol = ocm.allocateOutputColumn(outputColType);
- String className = getBinaryColumnColumnExpressionClassName(colType1,
- colType2, method);
- try {
- expr = (VectorExpression) getConstructor(className).newInstance(inputCol1, inputCol2,
- outputCol);
- } catch (Exception ex) {
- throw new HiveException(ex);
- }
- expr.setChildExpressions(new VectorExpression [] {v2});
- } else if ((leftExpr instanceof ExprNodeConstantDesc)
- && (rightExpr instanceof ExprNodeGenericFuncDesc)) {
- ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) leftExpr;
- v2 = getVectorExpression(rightExpr);
- int inputCol2 = v2.getOutputColumn();
- String colType2 = v2.getOutputType();
- String scalarType = constDesc.getTypeString();
- String outputColType = getOutputColType(colType2, scalarType, method);
- int outputCol = ocm.allocateOutputColumn(outputColType);
- String className = getBinaryScalarColumnExpressionClassName(colType2,
- scalarType, method);
- try {
- expr = (VectorExpression) getConstructor(className).newInstance(getScalarValue(constDesc),
- inputCol2, outputCol);
- } catch (Exception ex) {
- throw new HiveException(ex);
- }
- expr.setChildExpressions(new VectorExpression [] {v2});
- } else if ((leftExpr instanceof ExprNodeGenericFuncDesc)
- && (rightExpr instanceof ExprNodeGenericFuncDesc)) {
- //For arithmetic expression, the child expressions must be materializing
- //columns
- v1 = getVectorExpression(leftExpr);
- v2 = getVectorExpression(rightExpr);
- int inputCol1 = v1.getOutputColumn();
- int inputCol2 = v2.getOutputColumn();
- String colType1 = v1.getOutputType();
- String colType2 = v2.getOutputType();
- String outputColType = getOutputColType(colType1, colType2, method);
- int outputCol = ocm.allocateOutputColumn(outputColType);
- String className = getBinaryColumnColumnExpressionClassName(colType1,
- colType2, method);
- try {
- expr = (VectorExpression) getConstructor(className).newInstance(inputCol1, inputCol2,
- outputCol);
- } catch (Exception ex) {
- throw new HiveException(ex);
- }
- expr.setChildExpressions(new VectorExpression [] {v1, v2});
- }
- //Reclaim output columns of children to be re-used later
- if (v1 != null) {
- ocm.freeOutputColumn(v1.getOutputColumn());
- }
- if (v2 != null) {
- ocm.freeOutputColumn(v2.getOutputColumn());
- }
- return expr;
- }
-
- private VectorExpression getVectorExpression(GenericUDFOPOr udf,
- List<ExprNodeDesc> childExpr) throws HiveException {
- ExprNodeDesc leftExpr = childExpr.get(0);
- ExprNodeDesc rightExpr = childExpr.get(1);
-
- VectorExpression ve1;
- VectorExpression ve2;
- if (leftExpr instanceof ExprNodeColumnDesc) {
- ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) leftExpr;
- int inputCol = getInputColumnIndex(colDesc.getColumn());
- ve1 = new SelectColumnIsTrue(inputCol);
- } else {
- ve1 = getVectorExpression(leftExpr);
- }
-
- if (rightExpr instanceof ExprNodeColumnDesc) {
- ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) rightExpr;
- int inputCol = getInputColumnIndex(colDesc.getColumn());
- ve2 = new SelectColumnIsTrue(inputCol);
- } else {
- ve2 = getVectorExpression(rightExpr);
- }
-
- return new FilterExprOrExpr(ve1,ve2);
- }
-
- private VectorExpression getVectorExpression(GenericUDFOPNot udf,
- List<ExprNodeDesc> childExpr) throws HiveException {
- throw new HiveException("Not is not supported");
- }
-
- private VectorExpression getVectorExpression(GenericUDFOPAnd udf,
- List<ExprNodeDesc> childExpr) throws HiveException {
-
- ExprNodeDesc leftExpr = childExpr.get(0);
- ExprNodeDesc rightExpr = childExpr.get(1);
-
- VectorExpression ve1;
- VectorExpression ve2;
- if (leftExpr instanceof ExprNodeColumnDesc) {
- ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) leftExpr;
- int inputCol = getInputColumnIndex(colDesc.getColumn());
- ve1 = new SelectColumnIsTrue(inputCol);
- } else {
- ve1 = getVectorExpression(leftExpr);
- }
-
- if (rightExpr instanceof ExprNodeColumnDesc) {
- ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) rightExpr;
- int inputCol = getInputColumnIndex(colDesc.getColumn());
- ve2 = new SelectColumnIsTrue(inputCol);
- } else {
- ve2 = getVectorExpression(rightExpr);
- }
-
- return new FilterExprAndExpr(ve1,ve2);
- }
-
- private VectorExpression getVectorExpression(GenericUDFOPNull udf,
- List<ExprNodeDesc> childExpr) throws HiveException {
- ExprNodeDesc expr = childExpr.get(0);
- VectorExpression ve = null;
- if (expr instanceof ExprNodeColumnDesc) {
- ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) expr;
- int inputCol = getInputColumnIndex(colDesc.getColumn());
- ve = new SelectColumnIsNull(inputCol);
- } else {
- throw new HiveException("Not supported");
- }
- return ve;
- }
-
- private VectorExpression getVectorExpression(GenericUDFOPNotNull udf,
- List<ExprNodeDesc> childExpr) throws HiveException {
- ExprNodeDesc expr = childExpr.get(0);
- if (expr instanceof ExprNodeColumnDesc) {
- ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) expr;
- int inputCol = getInputColumnIndex(colDesc.getColumn());
- VectorExpression ve = new SelectColumnIsNotNull(inputCol);
- return ve;
- } else {
- throw new HiveException("Not supported");
- }
- }
-
private Object getScalarValue(ExprNodeConstantDesc constDesc)
throws HiveException {
if (constDesc.getTypeString().equalsIgnoreCase("String")) {
@@ -1689,161 +767,12 @@ public class VectorizationContext {
}
}
- private VectorExpression getVectorBinaryComparisonFilterExpression(String
- opName, List<ExprNodeDesc> childExpr) throws HiveException {
-
- ExprNodeDesc leftExpr = childExpr.get(0);
- ExprNodeDesc rightExpr = childExpr.get(1);
-
- // TODO: Remove this when constant folding is fixed in the optimizer.
- leftExpr = foldConstantsForUnaryExpression(leftExpr);
- rightExpr = foldConstantsForUnaryExpression(rightExpr);
-
- VectorExpression expr = null;
- VectorExpression v1 = null;
- VectorExpression v2 = null;
- if ( (leftExpr instanceof ExprNodeColumnDesc) &&
- (rightExpr instanceof ExprNodeConstantDesc) ) {
- ExprNodeColumnDesc leftColDesc = (ExprNodeColumnDesc) leftExpr;
- ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) rightExpr;
- int inputCol = getInputColumnIndex(leftColDesc.getColumn());
- String colType = leftColDesc.getTypeString();
- String scalarType = constDesc.getTypeString();
- String className = getFilterColumnScalarExpressionClassName(colType,
- scalarType, opName);
- try {
- Constructor<?> ctor = getConstructor(className);
- expr = (VectorExpression) ctor.newInstance(inputCol,
- getScalarValue(constDesc));
- } catch (Exception ex) {
- throw new HiveException(ex);
- }
- } else if ((leftExpr instanceof ExprNodeConstantDesc) &&
- (rightExpr instanceof ExprNodeColumnDesc)) {
- ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) leftExpr;
- ExprNodeColumnDesc rightColDesc = (ExprNodeColumnDesc) rightExpr;
- int inputCol = getInputColumnIndex(rightColDesc.getColumn());
- String colType = rightColDesc.getTypeString();
- String scalarType = constDesc.getTypeString();
- String className = getFilterScalarColumnExpressionClassName(colType,
- scalarType, opName);
- try {
- //Constructor<?>
- expr = (VectorExpression) getConstructor(className).newInstance(inputCol,
- getScalarValue(constDesc));
- } catch (Exception ex) {
- throw new HiveException(ex);
- }
- } else if ( (rightExpr instanceof ExprNodeColumnDesc) &&
- (leftExpr instanceof ExprNodeColumnDesc) ) {
- ExprNodeColumnDesc leftColDesc = (ExprNodeColumnDesc) leftExpr;
- ExprNodeColumnDesc rightColDesc = (ExprNodeColumnDesc) rightExpr;
- int inputCol1 = getInputColumnIndex(leftColDesc.getColumn());
- int inputCol2 = getInputColumnIndex(rightColDesc.getColumn());
- String colType1 = leftColDesc.getTypeString();
- String colType2 = rightColDesc.getTypeString();
- String className = getFilterColumnColumnExpressionClassName(colType1,
- colType2, opName);
- try {
- expr = (VectorExpression) getConstructor(className).newInstance(inputCol1, inputCol2);
- } catch (Exception ex) {
- throw new HiveException(ex);
- }
- } else if ( (leftExpr instanceof ExprNodeGenericFuncDesc) &&
- (rightExpr instanceof ExprNodeColumnDesc) ) {
- v1 = getVectorExpression((ExprNodeGenericFuncDesc) leftExpr);
- ExprNodeColumnDesc rightColDesc = (ExprNodeColumnDesc) rightExpr;
- int inputCol1 = v1.getOutputColumn();
- int inputCol2 = getInputColumnIndex(rightColDesc.getColumn());
- String colType1 = v1.getOutputType();
- String colType2 = rightColDesc.getTypeString();
- String className = getFilterColumnColumnExpressionClassName(colType1,
- colType2, opName);
- try {
- expr = (VectorExpression) getConstructor(className).newInstance(inputCol1, inputCol2);
- } catch (Exception ex) {
- throw new HiveException(ex);
- }
- expr.setChildExpressions(new VectorExpression [] {v1});
- } else if ( (leftExpr instanceof ExprNodeColumnDesc) &&
- (rightExpr instanceof ExprNodeGenericFuncDesc) ) {
- ExprNodeColumnDesc rightColDesc = (ExprNodeColumnDesc) leftExpr;
- v2 = getVectorExpression((ExprNodeGenericFuncDesc) rightExpr);
- int inputCol1 = getInputColumnIndex(rightColDesc.getColumn());
- int inputCol2 = v2.getOutputColumn();
- String colType1 = rightColDesc.getTypeString();
- String colType2 = v2.getOutputType();
- String className = getFilterColumnColumnExpressionClassName(colType1,
- colType2, opName);
- try {
- expr = (VectorExpression) getConstructor(className).newInstance(inputCol1, inputCol2);
- } catch (Exception ex) {
- throw new HiveException(ex);
- }
- expr.setChildExpressions(new VectorExpression [] {v2});
- } else if ( (leftExpr instanceof ExprNodeGenericFuncDesc) &&
- (rightExpr instanceof ExprNodeConstantDesc) ) {
- v1 = getVectorExpression((ExprNodeGenericFuncDesc) leftExpr);
- ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) rightExpr;
- int inputCol1 = v1.getOutputColumn();
- String colType1 = v1.getOutputType();
- String scalarType = constDesc.getTypeString();
- String className = getFilterColumnScalarExpressionClassName(colType1,
- scalarType, opName);
- try {
- expr = (VectorExpression) getConstructor(className).newInstance(inputCol1,
- getScalarValue(constDesc));
- } catch (Exception ex) {
- throw new HiveException(ex);
- }
- expr.setChildExpressions(new VectorExpression [] {v1});
- } else if ( (leftExpr instanceof ExprNodeConstantDesc) &&
- (rightExpr instanceof ExprNodeGenericFuncDesc) ) {
- ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) leftExpr;
- v2 = getVectorExpression((ExprNodeGenericFuncDesc) rightExpr);
- int inputCol2 = v2.getOutputColumn();
- String scalarType = constDesc.getTypeString();
- String colType = v2.getOutputType();
- String className = getFilterScalarColumnExpressionClassName(colType,
- scalarType, opName);
- try {
- expr = (VectorExpression) getConstructor(className).newInstance(inputCol2,
- getScalarValue(constDesc));
- } catch (Exception ex) {
- throw new HiveException(ex);
- }
- expr.setChildExpressions(new VectorExpression [] {v2});
- } else {
- //For comparison expression, the child expressions must be materializing
- //columns
- v1 = getVectorExpression(leftExpr);
- v2 = getVectorExpression(rightExpr);
- int inputCol1 = v1.getOutputColumn();
- int inputCol2 = v2.getOutputColumn();
- String colType1 = v1.getOutputType();
- String colType2 = v2.getOutputType();
- String className = getFilterColumnColumnExpressionClassName(colType1,
- colType2, opName);
- try {
- expr = (VectorExpression) getConstructor(className).newInstance(inputCol1, inputCol2);
- } catch (Exception ex) {
- throw new HiveException(ex);
- }
- expr.setChildExpressions(new VectorExpression [] {v1, v2});
- }
- if (v1 != null) {
- ocm.freeOutputColumn(v1.getOutputColumn());
- }
- if (v2 != null) {
- ocm.freeOutputColumn(v2.getOutputColumn());
- }
- return expr;
- }
-
- private Constructor<?> getConstructor(String className) throws HiveException {
+ private Constructor<?> getConstructor(Class<?> cl) throws HiveException {
try {
- Class<?> cl = Class.forName(className);
Constructor<?> [] ctors = cl.getDeclaredConstructors();
+ if (ctors.length == 1) {
+ return ctors[0];
+ }
Constructor<?> defaultCtor = cl.getConstructor();
for (Constructor<?> ctor : ctors) {
if (!ctor.equals(defaultCtor)) {
@@ -1856,8 +785,7 @@ public class VectorizationContext {
}
}
- private String getNormalizedTypeName(String colType) throws HiveException {
- validateInputType(colType);
+ static String getNormalizedTypeName(String colType) {
String normalizedType = null;
if (colType.equalsIgnoreCase("Double") || colType.equalsIgnoreCase("Float")) {
normalizedType = "Double";
@@ -1869,153 +797,6 @@ public class VectorizationContext {
return normalizedType;
}
- private String getFilterColumnColumnExpressionClassName(String colType1,
- String colType2, String opName) throws HiveException {
- StringBuilder b = new StringBuilder();
- b.append("org.apache.hadoop.hive.ql.exec.vector.expressions.gen.");
- if (opType.equals(OperatorType.FILTER)) {
- b.append("Filter");
- }
- b.append(getNormalizedTypeName(colType1));
- b.append("Col");
- b.append(opName);
- b.append(getNormalizedTypeName(colType2));
- b.append("Column");
- return b.toString();
- }
-
- private String getFilterColumnScalarExpressionClassName(String colType, String
- scalarType, String opName) throws HiveException {
- StringBuilder b = new StringBuilder();
- b.append("org.apache.hadoop.hive.ql.exec.vector.expressions.gen.");
- if (opType.equals(OperatorType.FILTER)) {
- b.append("Filter");
- }
- b.append(getNormalizedTypeName(colType));
- b.append("Col");
- b.append(opName);
- b.append(getNormalizedTypeName(scalarType));
- b.append("Scalar");
- return b.toString();
- }
-
- private String getFilterScalarColumnExpressionClassName(String colType, String
- scalarType, String opName) throws HiveException {
- StringBuilder b = new StringBuilder();
- b.append("org.apache.hadoop.hive.ql.exec.vector.expressions.gen.");
- if (opType.equals(OperatorType.FILTER)) {
- b.append("Filter");
- }
- b.append(getNormalizedTypeName(scalarType));
- b.append("Scalar");
- b.append(opName);
- b.append(getNormalizedTypeName(colType));
- b.append("Column");
- return b.toString();
- }
-
- private String getBinaryColumnScalarExpressionClassName(String colType,
- String scalarType, String method) throws HiveException {
- StringBuilder b = new StringBuilder();
- String normColType = getNormalizedTypeName(colType);
- String normScalarType = getNormalizedTypeName(scalarType);
- if (normColType.equalsIgnoreCase("long") && normScalarType.equalsIgnoreCase("long")
- && method.equalsIgnoreCase("divide")) {
- b.append("org.apache.hadoop.hive.ql.exec.vector.expressions.");
- } else {
- b.append("org.apache.hadoop.hive.ql.exec.vector.expressions.gen.");
- }
- b.append(normColType);
- b.append("Col");
- b.append(method);
- b.append(normScalarType);
- b.append("Scalar");
- return b.toString();
- }
-
- private String getBinaryScalarColumnExpressionClassName(String colType,
- String scalarType, String method) throws HiveException {
- StringBuilder b = new StringBuilder();
- String normColType = getNormalizedTypeName(colType);
- String normScalarType = getNormalizedTypeName(scalarType);
- if (normColType.equalsIgnoreCase("long") && normScalarType.equalsIgnoreCase("long")
- && method.equalsIgnoreCase("divide")) {
- b.append("org.apache.hadoop.hive.ql.exec.vector.expressions.");
- } else {
- b.append("org.apache.hadoop.hive.ql.exec.vector.expressions.gen.");
- }
- b.append(normScalarType);
- b.append("Scalar");
- b.append(method);
- b.append(normColType);
- b.append("Column");
- return b.toString();
- }
-
- private String getBinaryColumnColumnExpressionClassName(String colType1,
- String colType2, String method) throws HiveException {
- StringBuilder b = new StringBuilder();
- String normColType1 = getNormalizedTypeName(colType1);
- String normColType2 = getNormalizedTypeName(colType2);
- if (normColType1.equalsIgnoreCase("long") && normColType2.equalsIgnoreCase("long")
- && method.equalsIgnoreCase("divide")) {
- b.append("org.apache.hadoop.hive.ql.exec.vector.expressions.");
- } else {
- b.append("org.apache.hadoop.hive.ql.exec.vector.expressions.gen.");
- }
- b.append(normColType1);
- b.append("Col");
- b.append(method);
- b.append(normColType2);
- b.append("Column");
- return b.toString();
- }
-
- private String getOutputColType(String inputType1, String inputType2, String method)
- throws HiveException {
- validateInputType(inputType1);
- validateInputType(inputType2);
- if (method.equalsIgnoreCase("divide") || inputType1.equalsIgnoreCase("double") ||
- inputType2.equalsIgnoreCase("double") || inputType1.equalsIgnoreCase("float") ||
- inputType2.equalsIgnoreCase("float")) {
- return "double";
- } else {
- if (inputType1.equalsIgnoreCase("string") || inputType2.equalsIgnoreCase("string")) {
- return "string";
- } else {
- return "long";
- }
- }
- }
-
- private void validateInputType(String inputType) throws HiveException {
- if (! (inputType.equalsIgnoreCase("float") ||
- inputType.equalsIgnoreCase("double") ||
- inputType.equalsIgnoreCase("string") ||
- inputType.equalsIgnoreCase("tinyint") ||
- inputType.equalsIgnoreCase("smallint") ||
- inputType.equalsIgnoreCase("short") ||
- inputType.equalsIgnoreCase("byte") ||
- inputType.equalsIgnoreCase("int") ||
- inputType.equalsIgnoreCase("long") ||
- inputType.equalsIgnoreCase("bigint") ||
- inputType.equalsIgnoreCase("boolean") ||
- inputType.equalsIgnoreCase("timestamp") ) ) {
- throw new HiveException("Unsupported input type: "+inputType);
- }
- }
-
- private String getOutputColType(String inputType, String method) throws HiveException {
- validateInputType(inputType);
- if (inputType.equalsIgnoreCase("float") || inputType.equalsIgnoreCase("double")) {
- return "double";
- } else if (inputType.equalsIgnoreCase("string")) {
- return "string";
- } else {
- return "long";
- }
- }
-
static Object[][] aggregatesDefinition = {
{"min", "Long", VectorUDAFMinLong.class},
{"min", "Double", VectorUDAFMinDouble.class},
@@ -2055,7 +836,7 @@ public class VectorizationContext {
for (int i = 0; i< paramDescList.size(); ++i) {
ExprNodeDesc exprDesc = paramDescList.get(i);
- vectorParams[i] = this.getVectorExpression(exprDesc);
+ vectorParams[i] = this.getVectorExpression(exprDesc, Mode.PROJECTION);
}
String aggregateName = desc.getGenericUDAFName();
@@ -2080,12 +861,7 @@ public class VectorizationContext {
vectorParams.length > 0 ? vectorParams[0] : null);
aggExpr.init(desc);
return aggExpr;
- }
- // TODO: change to 1.7 syntax when possible
- //catch (InvocationTargetException | IllegalAccessException
- // | NoSuchMethodException | InstantiationException)
- catch (Exception e)
- {
+ } catch (Exception e) {
throw new HiveException("Internal exception for vector aggregate : \"" +
aggregateName + "\" for type: \"" + inputType + "", e);
}
@@ -2150,4 +926,3 @@ public class VectorizationContext {
this.addToColumnMap(columnName, columnIndex);
}
}
-
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/AbstractFilterStringColLikeStringScalar.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/AbstractFilterStringColLikeStringScalar.java?rev=1535174&r1=1535173&r2=1535174&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/AbstractFilterStringColLikeStringScalar.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/AbstractFilterStringColLikeStringScalar.java Wed Oct 23 20:50:38 2013
@@ -18,10 +18,6 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
-import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.hadoop.io.Text;
-
import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
@@ -32,6 +28,10 @@ import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+
/**
* An abstract class for LIKE and REGEXP expressions. LIKE and REGEXP expression share similar
* functions, but they have different grammars. AbstractFilterStringColLikeStringScalar class
@@ -409,4 +409,18 @@ public abstract class AbstractFilterStri
public void setPattern(String pattern) {
this.pattern = pattern;
}
+
+ @Override
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+ return (new VectorExpressionDescriptor.Builder())
+ .setMode(
+ VectorExpressionDescriptor.Mode.FILTER)
+ .setNumArguments(2)
+ .setArgumentTypes(
+ VectorExpressionDescriptor.ArgumentType.getType("string"),
+ VectorExpressionDescriptor.ArgumentType.getType("string"))
+ .setInputExpressionTypes(
+ VectorExpressionDescriptor.InputExpressionType.COLUMN,
+ VectorExpressionDescriptor.InputExpressionType.SCALAR).build();
+ }
}
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java?rev=1535174&r1=1535173&r2=1535174&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java Wed Oct 23 20:50:38 2013
@@ -18,6 +18,7 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
/**
@@ -164,4 +165,18 @@ public class ColAndCol extends VectorExp
public void setOutputColumn(int outputColumn) {
this.outputColumn = outputColumn;
}
+
+ @Override
+ public VectorExpressionDescriptor.Descriptor getDescriptor() {
+ return (new VectorExpressionDescriptor.Builder())
+ .setMode(
+ VectorExpressionDescriptor.Mode.PROJECTION)
+ .setNumArguments(2)
+ .setArgumentTypes(
+ VectorExpressionDescriptor.ArgumentType.getType("long"),
+ VectorExpressionDescriptor.ArgumentType.getType("long"))
+ .setInputExpressionTypes(
+ VectorExpressionDescriptor.InputExpressionType.COLUMN,
+ VectorExpressionDescriptor.InputExpressionType.COLUMN).build();
+ }
}