You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/10/16 00:36:49 UTC
svn commit: r1532575 - in /hive/trunk: ant/src/org/apache/hadoop/hive/ant/
ql/src/java/org/apache/hadoop/hive/ql/exec/vector/
ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/
ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/ ql/sr...
Author: hashutosh
Date: Tue Oct 15 22:36:48 2013
New Revision: 1532575
URL: http://svn.apache.org/r1532575
Log:
HIVE-4821 : Implement vectorized type casting for all types (Eric Hanson via Ashutosh Chauhan)
Added:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastBooleanToStringViaLongToString.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToString.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java
Modified:
hive/trunk/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathExpr.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
Modified: hive/trunk/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java
URL: http://svn.apache.org/viewvc/hive/trunk/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java?rev=1532575&r1=1532574&r2=1532575&view=diff
==============================================================================
--- hive/trunk/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java (original)
+++ hive/trunk/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java Tue Oct 15 22:36:48 2013
@@ -356,6 +356,26 @@ public class GenVectorCode extends Task
{"ColumnUnaryFunc", "FuncSign", "double", "double", "MathExpr.sign", "", "", ""},
{"ColumnUnaryFunc", "FuncSign", "double", "long", "MathExpr.sign", "(double)", "", ""},
+ // Casts
+ {"ColumnUnaryFunc", "Cast", "long", "double", "", "", "(long)", ""},
+ {"ColumnUnaryFunc", "Cast", "double", "long", "", "", "(double)", ""},
+ {"ColumnUnaryFunc", "CastTimestampToLongVia", "long", "long", "MathExpr.fromTimestamp", "",
+ "", ""},
+ {"ColumnUnaryFunc", "CastTimestampToDoubleVia", "double", "long",
+ "MathExpr.fromTimestampToDouble", "", "", ""},
+ {"ColumnUnaryFunc", "CastDoubleToBooleanVia", "long", "double", "MathExpr.toBool", "",
+ "", ""},
+ {"ColumnUnaryFunc", "CastLongToBooleanVia", "long", "long", "MathExpr.toBool", "",
+ "", ""},
+ {"ColumnUnaryFunc", "CastLongToTimestampVia", "long", "long", "MathExpr.longToTimestamp", "",
+ "", ""},
+ {"ColumnUnaryFunc", "CastDoubleToTimestampVia", "long", "double",
+ "MathExpr.doubleToTimestamp", "", "", ""},
+
+ // Boolean to long is done with an IdentityExpression
+ // Boolean to double is done with standard Long to Double cast
+ // See org.apache.hadoop.hive.ql.exec.vector.expressions for remaining cast VectorExpression
+ // classes
{"ColumnUnaryMinus", "long"},
{"ColumnUnaryMinus", "double"},
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java?rev=1532575&r1=1532574&r2=1532575&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java Tue Oct 15 22:36:48 2013
@@ -39,11 +39,11 @@ import org.apache.hadoop.hive.ql.exec.ve
import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterExprAndExpr;
import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterExprOrExpr;
import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterStringColLikeStringScalar;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterStringColRegExpStringScalar;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression;
import org.apache.hadoop.hive.ql.exec.vector.expressions.FuncRand;
import org.apache.hadoop.hive.ql.exec.vector.expressions.ISetDoubleArg;
import org.apache.hadoop.hive.ql.exec.vector.expressions.ISetLongArg;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterStringColRegExpStringScalar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression;
import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsNotNull;
import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsNull;
import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsTrue;
@@ -75,6 +75,7 @@ import org.apache.hadoop.hive.ql.exec.ve
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarPopLong;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarSampDouble;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarSampLong;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastLongToBooleanViaLongToLong;
import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor;
import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc;
import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -84,11 +85,27 @@ import org.apache.hadoop.hive.ql.plan.Ex
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.plan.api.OperatorType;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFConcat;
+import org.apache.hadoop.hive.ql.udf.UDFAcos;
+import org.apache.hadoop.hive.ql.udf.UDFAsin;
+import org.apache.hadoop.hive.ql.udf.UDFAtan;
+import org.apache.hadoop.hive.ql.udf.UDFBin;
+import org.apache.hadoop.hive.ql.udf.UDFCeil;
+import org.apache.hadoop.hive.ql.udf.UDFConv;
+import org.apache.hadoop.hive.ql.udf.UDFCos;
import org.apache.hadoop.hive.ql.udf.UDFDayOfMonth;
+import org.apache.hadoop.hive.ql.udf.UDFDegrees;
+import org.apache.hadoop.hive.ql.udf.UDFExp;
+import org.apache.hadoop.hive.ql.udf.UDFFloor;
+import org.apache.hadoop.hive.ql.udf.UDFHex;
import org.apache.hadoop.hive.ql.udf.UDFHour;
import org.apache.hadoop.hive.ql.udf.UDFLTrim;
import org.apache.hadoop.hive.ql.udf.UDFLength;
import org.apache.hadoop.hive.ql.udf.UDFLike;
+import org.apache.hadoop.hive.ql.udf.UDFLn;
+import org.apache.hadoop.hive.ql.udf.UDFLog;
+import org.apache.hadoop.hive.ql.udf.UDFLog10;
+import org.apache.hadoop.hive.ql.udf.UDFLog2;
import org.apache.hadoop.hive.ql.udf.UDFMinute;
import org.apache.hadoop.hive.ql.udf.UDFMonth;
import org.apache.hadoop.hive.ql.udf.UDFOPDivide;
@@ -98,31 +115,32 @@ import org.apache.hadoop.hive.ql.udf.UDF
import org.apache.hadoop.hive.ql.udf.UDFOPNegative;
import org.apache.hadoop.hive.ql.udf.UDFOPPlus;
import org.apache.hadoop.hive.ql.udf.UDFOPPositive;
+import org.apache.hadoop.hive.ql.udf.UDFPower;
import org.apache.hadoop.hive.ql.udf.UDFRegExp;
import org.apache.hadoop.hive.ql.udf.UDFRTrim;
+import org.apache.hadoop.hive.ql.udf.UDFRadians;
+import org.apache.hadoop.hive.ql.udf.UDFRand;
+import org.apache.hadoop.hive.ql.udf.UDFRound;
import org.apache.hadoop.hive.ql.udf.UDFSecond;
+import org.apache.hadoop.hive.ql.udf.UDFSign;
+import org.apache.hadoop.hive.ql.udf.UDFSin;
+import org.apache.hadoop.hive.ql.udf.UDFSqrt;
import org.apache.hadoop.hive.ql.udf.UDFSubstr;
+import org.apache.hadoop.hive.ql.udf.UDFTan;
+import org.apache.hadoop.hive.ql.udf.UDFToBoolean;
+import org.apache.hadoop.hive.ql.udf.UDFToByte;
+import org.apache.hadoop.hive.ql.udf.UDFToInteger;
+import org.apache.hadoop.hive.ql.udf.UDFToLong;
+import org.apache.hadoop.hive.ql.udf.UDFToShort;
+import org.apache.hadoop.hive.ql.udf.UDFToFloat;
+import org.apache.hadoop.hive.ql.udf.UDFToDouble;
+import org.apache.hadoop.hive.ql.udf.UDFToString;
import org.apache.hadoop.hive.ql.udf.UDFTrim;
import org.apache.hadoop.hive.ql.udf.UDFWeekOfYear;
import org.apache.hadoop.hive.ql.udf.UDFYear;
-import org.apache.hadoop.hive.ql.udf.UDFAcos;
-import org.apache.hadoop.hive.ql.udf.UDFAsin;
-import org.apache.hadoop.hive.ql.udf.UDFAtan;
-import org.apache.hadoop.hive.ql.udf.UDFBin;
-import org.apache.hadoop.hive.ql.udf.UDFCeil;
-import org.apache.hadoop.hive.ql.udf.UDFConv;
-import org.apache.hadoop.hive.ql.udf.UDFCos;
-import org.apache.hadoop.hive.ql.udf.UDFDegrees;
-import org.apache.hadoop.hive.ql.udf.UDFExp;
-import org.apache.hadoop.hive.ql.udf.UDFFloor;
-import org.apache.hadoop.hive.ql.udf.UDFHex;
-import org.apache.hadoop.hive.ql.udf.UDFLn;
-import org.apache.hadoop.hive.ql.udf.UDFLog;
-import org.apache.hadoop.hive.ql.udf.UDFLog10;
-import org.apache.hadoop.hive.ql.udf.UDFLog2;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFAbs;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFConcat;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFLower;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
@@ -135,18 +153,9 @@ import org.apache.hadoop.hive.ql.udf.gen
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFTimestamp;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUpper;
-import org.apache.hadoop.hive.ql.udf.UDFPower;
-import org.apache.hadoop.hive.ql.udf.UDFRadians;
-import org.apache.hadoop.hive.ql.udf.UDFRand;
-import org.apache.hadoop.hive.ql.udf.UDFRound;
-import org.apache.hadoop.hive.ql.udf.UDFSign;
-import org.apache.hadoop.hive.ql.udf.UDFSin;
-import org.apache.hadoop.hive.ql.udf.UDFSqrt;
-import org.apache.hadoop.hive.ql.udf.UDFTan;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFAbs;
-
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
@@ -321,13 +330,41 @@ public class VectorizationContext {
GenericUDFBridge bridge = (GenericUDFBridge) gudf;
Class<? extends UDF> udfClass = bridge.getUdfClass();
if (udfClass.equals(UDFHex.class)
- || udfClass.equals(UDFConv.class)) {
+ || udfClass.equals(UDFConv.class)
+ || isCastToIntFamily(udfClass) && arg0Type(expr).equals("string")
+ || isCastToFloatFamily(udfClass) && arg0Type(expr).equals("string")
+ || udfClass.equals(UDFToString.class) &&
+ (arg0Type(expr).equals("timestamp")
+ || arg0Type(expr).equals("double")
+ || arg0Type(expr).equals("float"))) {
return true;
}
+ } else if (gudf instanceof GenericUDFTimestamp && arg0Type(expr).equals("string")) {
+ return true;
}
return false;
}
+ public static boolean isCastToIntFamily(Class<? extends UDF> udfClass) {
+ return udfClass.equals(UDFToByte.class)
+ || udfClass.equals(UDFToShort.class)
+ || udfClass.equals(UDFToInteger.class)
+ || udfClass.equals(UDFToLong.class);
+
+ // Boolean is purposely excluded.
+ }
+
+ public static boolean isCastToFloatFamily(Class<? extends UDF> udfClass) {
+ return udfClass.equals(UDFToDouble.class)
+ || udfClass.equals(UDFToFloat.class);
+ }
+
+ // Return the type string of the first argument (argument 0).
+ public static String arg0Type(ExprNodeGenericFuncDesc expr) {
+ String type = expr.getChildExprs().get(0).getTypeString();
+ return type;
+ }
+
// Return true if this is a custom UDF or custom GenericUDF.
// This is for use only in the planner. It will fail in a task.
public static boolean isCustomUDF(ExprNodeGenericFuncDesc expr) {
@@ -490,7 +527,9 @@ public class VectorizationContext {
return expr;
}
- private VectorExpression getUnaryPlusExpression(List<ExprNodeDesc> childExprList)
+ // Used as a fast path for operations that don't modify their input, like unary +
+ // and casting boolean to long.
+ private VectorExpression getIdentityExpression(List<ExprNodeDesc> childExprList)
throws HiveException {
ExprNodeDesc childExpr = childExprList.get(0);
int inputCol;
@@ -550,6 +589,8 @@ public class VectorizationContext {
return getConcatExpression(childExpr);
} else if (udf instanceof GenericUDFAbs) {
return getUnaryAbsExpression(childExpr);
+ } else if (udf instanceof GenericUDFTimestamp) {
+ return getCastToTimestamp(childExpr);
}
throw new HiveException("Udf: "+udf.getClass().getSimpleName()+", is not supported");
@@ -607,7 +648,7 @@ public class VectorizationContext {
} else if (cl.equals(UDFOPNegative.class)) {
return getUnaryMinusExpression(childExpr);
} else if (cl.equals(UDFOPPositive.class)) {
- return getUnaryPlusExpression(childExpr);
+ return getIdentityExpression(childExpr);
} else if (cl.equals(UDFYear.class) ||
cl.equals(UDFMonth.class) ||
cl.equals(UDFWeekOfYear.class) ||
@@ -688,11 +729,116 @@ public class VectorizationContext {
return getRandExpression(childExpr);
} else if (cl.equals(UDFBin.class)) {
return getUnaryStringExpression("FuncBin", "String", childExpr);
+ } else if (isCastToIntFamily(cl)) {
+ return getCastToLongExpression(childExpr);
+ } else if (cl.equals(UDFToBoolean.class)) {
+ return getCastToBoolean(childExpr);
+ } else if (isCastToFloatFamily(cl)) {
+ return getCastToDoubleExpression(childExpr);
+ } else if (cl.equals(UDFToString.class)) {
+ return getCastToString(childExpr);
}
throw new HiveException("Udf: "+udf.getClass().getSimpleName()+", is not supported");
}
+ private VectorExpression getCastToTimestamp(List<ExprNodeDesc> childExpr)
+ throws HiveException {
+ String inputType = childExpr.get(0).getTypeString();
+ if (isIntFamily(inputType)) {
+ return getUnaryFunctionExpression("CastLongToTimestampVia", "Long", childExpr,
+ GENERATED_EXPR_PACKAGE);
+ } else if (isFloatFamily(inputType)) {
+ return getUnaryFunctionExpression("CastDoubleToTimestampVia", "Long", childExpr,
+ GENERATED_EXPR_PACKAGE);
+ }
+ // The string type is deliberately omitted -- it's handled elsewhere. See isLegacyPathUDF.
+
+ throw new HiveException("Unhandled cast input type: " + inputType);
+ }
+
+ private VectorExpression getCastToString(List<ExprNodeDesc> childExpr)
+ throws HiveException {
+ String inputType = childExpr.get(0).getTypeString();
+ if (inputType.equals("boolean")) {
+ // Boolean must come before the integer family. It's a special case.
+ return getUnaryFunctionExpression("CastBooleanToStringVia", "String", childExpr,
+ CUSTOM_EXPR_PACKAGE);
+ } else if (isIntFamily(inputType)) {
+ return getUnaryFunctionExpression("Cast", "String", childExpr,
+ CUSTOM_EXPR_PACKAGE);
+ }
+ /* The string type is deliberately omitted -- the planner removes string to string casts.
+ * Timestamp, float, and double types are handled by the legacy code path. See isLegacyPathUDF.
+ */
+
+ throw new HiveException("Unhandled cast input type: " + inputType);
+ }
+
+ private VectorExpression getCastToDoubleExpression(List<ExprNodeDesc> childExpr)
+ throws HiveException {
+ String inputType = childExpr.get(0).getTypeString();
+ if (isIntFamily(inputType)) {
+ return getUnaryFunctionExpression("Cast", "Double", childExpr,
+ GENERATED_EXPR_PACKAGE);
+ } else if (inputType.equals("timestamp")) {
+ return getUnaryFunctionExpression("CastTimestampToDoubleVia", "Double", childExpr,
+ GENERATED_EXPR_PACKAGE);
+ } else if (isFloatFamily(inputType)) {
+
+ // float types require no conversion, so use a no-op
+ return getIdentityExpression(childExpr);
+ }
+ // The string type is deliberately omitted -- it's handled elsewhere. See isLegacyPathUDF.
+
+ throw new HiveException("Unhandled cast input type: " + inputType);
+ }
+
+ private VectorExpression getCastToBoolean(List<ExprNodeDesc> childExpr)
+ throws HiveException {
+ String inputType = childExpr.get(0).getTypeString();
+ if (isFloatFamily(inputType)) {
+ return getUnaryFunctionExpression("CastDoubleToBooleanVia", "Long", childExpr,
+ GENERATED_EXPR_PACKAGE);
+ } else if (isIntFamily(inputType) || inputType.equals("timestamp")) {
+ return getUnaryFunctionExpression("CastLongToBooleanVia", "Long", childExpr,
+ GENERATED_EXPR_PACKAGE);
+ } else if (inputType.equals("string")) {
+
+ // string casts to false if it is 0 characters long, otherwise true
+ VectorExpression lenExpr = getUnaryStringExpression("StringLength", "Long", childExpr);
+
+ int outputCol = ocm.allocateOutputColumn("integer");
+ VectorExpression lenToBoolExpr =
+ new CastLongToBooleanViaLongToLong(lenExpr.getOutputColumn(), outputCol);
+ lenToBoolExpr.setChildExpressions(new VectorExpression[] {lenExpr});
+ ocm.freeOutputColumn(lenExpr.getOutputColumn());
+ return lenToBoolExpr;
+ }
+ // cast(booleanExpr as boolean) case is omitted because planner removes it as a no-op
+
+ throw new HiveException("Unhandled cast input type: " + inputType);
+ }
+
+ private VectorExpression getCastToLongExpression(List<ExprNodeDesc> childExpr)
+ throws HiveException {
+ String inputType = childExpr.get(0).getTypeString();
+ if (isFloatFamily(inputType)) {
+ return getUnaryFunctionExpression("Cast", "Long", childExpr,
+ GENERATED_EXPR_PACKAGE);
+ } else if (inputType.equals("timestamp")) {
+ return getUnaryFunctionExpression("CastTimestampToLongVia", "Long", childExpr,
+ GENERATED_EXPR_PACKAGE);
+ } else if (isIntFamily(inputType)) {
+
+ // integer and boolean types require no conversion, so use a no-op
+ return getIdentityExpression(childExpr);
+ }
+ // string type is deliberately omitted -- it's handled elsewhere. See isLegacyPathUDF.
+
+ throw new HiveException("Unhandled cast input type: " + inputType);
+ }
+
private VectorExpression getRandExpression(List<ExprNodeDesc> childExpr)
throws HiveException {
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastBooleanToStringViaLongToString.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastBooleanToStringViaLongToString.java?rev=1532575&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastBooleanToStringViaLongToString.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastBooleanToStringViaLongToString.java Tue Oct 15 22:36:48 2013
@@ -0,0 +1,46 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+
+public class CastBooleanToStringViaLongToString extends LongToStringUnaryUDF {
+ private static final long serialVersionUID = 1L;
+ private transient byte[] temp; // space to put date string
+ private static final byte[][] dictionary = { {'F', 'A', 'L', 'S', 'E'}, {'T', 'R', 'U', 'E'} };
+
+ public CastBooleanToStringViaLongToString() {
+ super();
+ temp = new byte[8];
+ }
+
+ public CastBooleanToStringViaLongToString(int inputColumn, int outputColumn) {
+ super(inputColumn, outputColumn);
+ temp = new byte[8];
+ }
+
+ @Override
+ protected void func(BytesColumnVector outV, long[] vector, int i) {
+
+ /* 0 is false and 1 is true in the input vector, so a simple dictionary is used
+ * with two entries. 0 references FALSE and 1 references TRUE in the dictionary.
+ */
+ outV.setVal(i, dictionary[(int) vector[i]], 0, dictionary[(int) vector[i]].length);
+ }
+}
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToString.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToString.java?rev=1532575&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToString.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToString.java Tue Oct 15 22:36:48 2013
@@ -0,0 +1,42 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+
+public class CastLongToString extends LongToStringUnaryUDF {
+ private static final long serialVersionUID = 1L;
+ protected transient byte[] temp; // temporary location for building number string
+
+ public CastLongToString() {
+ super();
+ temp = new byte[20];
+ }
+
+ public CastLongToString(int inputColumn, int outputColumn) {
+ super(inputColumn, outputColumn);
+ temp = new byte[20];
+ }
+
+ @Override
+ protected void func(BytesColumnVector outV, long[] vector, int i) {
+ int len = MathExpr.writeLongToUTF8(temp, vector[i]);
+ outV.setVal(i, temp, 0, len);
+ }
+}
Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java?rev=1532575&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java Tue Oct 15 22:36:48 2013
@@ -0,0 +1,136 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+
+/**
+ * This is a superclass for unary long functions returning strings that operate directly on the
+ * input and set the output.
+ */
+abstract public class LongToStringUnaryUDF extends VectorExpression {
+ private static final long serialVersionUID = 1L;
+ int inputColumn;
+ int outputColumn;
+
+ public LongToStringUnaryUDF(int inputColumn, int outputColumn) {
+ this.inputColumn = inputColumn;
+ this.outputColumn = outputColumn;
+ }
+
+ public LongToStringUnaryUDF() {
+ super();
+ }
+
+ abstract protected void func(BytesColumnVector outV, long[] vector, int i);
+
+ @Override
+ public void evaluate(VectorizedRowBatch batch) {
+
+ if (childExpressions != null) {
+ super.evaluateChildren(batch);
+ }
+
+ LongColumnVector inputColVector = (LongColumnVector) batch.cols[inputColumn];
+ int[] sel = batch.selected;
+ int n = batch.size;
+ long[] vector = inputColVector.vector;
+ BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumn];
+ outV.initBuffer();
+
+ if (n == 0) {
+ //Nothing to do
+ return;
+ }
+
+ if (inputColVector.noNulls) {
+ outV.noNulls = true;
+ if (inputColVector.isRepeating) {
+ outV.isRepeating = true;
+ func(outV, vector, 0);
+ } else if (batch.selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ func(outV, vector, i);
+ }
+ outV.isRepeating = false;
+ } else {
+ for(int i = 0; i != n; i++) {
+ func(outV, vector, i);
+ }
+ outV.isRepeating = false;
+ }
+ } else {
+
+ // Handle case with nulls. Don't do function if the value is null,
+ // because the data may be undefined for a null value.
+ outV.noNulls = false;
+ if (inputColVector.isRepeating) {
+ outV.isRepeating = true;
+ outV.isNull[0] = inputColVector.isNull[0];
+ if (!inputColVector.isNull[0]) {
+ func(outV, vector, 0);
+ }
+ } else if (batch.selectedInUse) {
+ for(int j = 0; j != n; j++) {
+ int i = sel[j];
+ outV.isNull[i] = inputColVector.isNull[i];
+ if (!inputColVector.isNull[i]) {
+ func(outV, vector, i);
+ }
+ }
+ outV.isRepeating = false;
+ } else {
+ System.arraycopy(inputColVector.isNull, 0, outV.isNull, 0, n);
+ for(int i = 0; i != n; i++) {
+ if (!inputColVector.isNull[i]) {
+ func(outV, vector, i);
+ }
+ }
+ outV.isRepeating = false;
+ }
+ }
+ }
+
+
+ @Override
+ public int getOutputColumn() {
+ return outputColumn;
+ }
+
+ public void setOutputColumn(int outputColumn) {
+ this.outputColumn = outputColumn;
+ }
+
+ public int getInputColumn() {
+ return inputColumn;
+ }
+
+ public void setInputColumn(int inputColumn) {
+ this.inputColumn = inputColumn;
+ }
+
+ @Override
+ public String getOutputType() {
+ return "String";
+ }
+
+}
\ No newline at end of file
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathExpr.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathExpr.java?rev=1532575&r1=1532574&r2=1532575&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathExpr.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathExpr.java Tue Oct 15 22:36:48 2013
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector.expressions;
+import java.io.IOException;
+import java.io.OutputStream;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
/**
@@ -51,6 +53,74 @@ public class MathExpr {
return v >= 0 ? 1.0 : -1.0;
}
+ // for casting integral types to boolean
+ public static long toBool(long v) {
+ return v == 0 ? 0 : 1;
+ }
+
+ // for casting floating point types to boolean
+ public static long toBool(double v) {
+ return v == 0.0D ? 0L : 1L;
+ }
+
+ /* Convert an integer value in miliseconds since the epoch to a timestamp value
+ * for use in a long column vector, which is represented in nanoseconds since the epoch.
+ */
+ public static long longToTimestamp(long v) {
+ return v * 1000000;
+ }
+
+ // Convert seconds since the epoch (with fraction) to nanoseconds, as a long integer.
+ public static long doubleToTimestamp(double v) {
+ return (long)( v * 1000000000.0);
+ }
+
+ /* Convert an integer value representing a timestamp in nanoseconds to one
+ * that represents a timestamp in seconds (since the epoch).
+ */
+ public static long fromTimestamp(long v) {
+ return v / 1000000000;
+ }
+
+ /* Convert an integer value representing a timestamp in nanoseconds to one
+ * that represents a timestamp in seconds, with fraction, since the epoch.
+ */
+ public static double fromTimestampToDouble(long v) {
+ return ((double) v) / 1000000000.0;
+ }
+
+ /* Convert a long to a string. The string is output into the argument
+ * byte array, beginning at character 0. The length is returned.
+ */
+ public static int writeLongToUTF8(byte[] result, long i) {
+ if (i == 0) {
+ result[0] = '0';
+ return 1;
+ }
+
+ int current = 0;
+
+ if (i < 0) {
+ result[current++] ='-';
+ } else {
+ // negative range is bigger than positive range, so there is no risk
+ // of overflow here.
+ i = -i;
+ }
+
+ long start = 1000000000000000000L;
+ while (i / start == 0) {
+ start /= 10;
+ }
+
+ while (start > 0) {
+ result[current++] = (byte) ('0' - (i / start % 10));
+ start /= 10;
+ }
+
+ return current;
+ }
+
// Convert all NaN values in vector v to NULL. Should only be used if n > 0.
public static void NaNToNull(DoubleColumnVector v, int[] sel, boolean selectedInUse, int n) {
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java?rev=1532575&r1=1532574&r2=1532575&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java Tue Oct 15 22:36:48 2013
@@ -107,8 +107,15 @@ import org.apache.hadoop.hive.ql.udf.UDF
import org.apache.hadoop.hive.ql.udf.UDFSqrt;
import org.apache.hadoop.hive.ql.udf.UDFSubstr;
import org.apache.hadoop.hive.ql.udf.UDFTan;
+import org.apache.hadoop.hive.ql.udf.UDFToBoolean;
+import org.apache.hadoop.hive.ql.udf.UDFToByte;
+import org.apache.hadoop.hive.ql.udf.UDFToDouble;
+import org.apache.hadoop.hive.ql.udf.UDFToFloat;
+import org.apache.hadoop.hive.ql.udf.UDFToInteger;
+import org.apache.hadoop.hive.ql.udf.UDFToLong;
+import org.apache.hadoop.hive.ql.udf.UDFToShort;
+import org.apache.hadoop.hive.ql.udf.UDFToString;
import org.apache.hadoop.hive.ql.udf.UDFTrim;
-import org.apache.hadoop.hive.ql.udf.UDFUnhex;
import org.apache.hadoop.hive.ql.udf.UDFWeekOfYear;
import org.apache.hadoop.hive.ql.udf.UDFYear;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
@@ -127,6 +134,7 @@ import org.apache.hadoop.hive.ql.udf.gen
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFTimestamp;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUpper;
@@ -222,6 +230,17 @@ public class Vectorizer implements Physi
supportedGenericUDFs.add(GenericUDFConcat.class);
supportedGenericUDFs.add(GenericUDFAbs.class);
+ // For type casts
+ supportedGenericUDFs.add(UDFToLong.class);
+ supportedGenericUDFs.add(UDFToInteger.class);
+ supportedGenericUDFs.add(UDFToShort.class);
+ supportedGenericUDFs.add(UDFToByte.class);
+ supportedGenericUDFs.add(UDFToBoolean.class);
+ supportedGenericUDFs.add(UDFToFloat.class);
+ supportedGenericUDFs.add(UDFToDouble.class);
+ supportedGenericUDFs.add(UDFToString.class);
+ supportedGenericUDFs.add(GenericUDFTimestamp.class);
+
supportedAggregationUdfs.add("min");
supportedAggregationUdfs.add("max");
supportedAggregationUdfs.add("count");
Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java?rev=1532575&view=auto
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java (added)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java Tue Oct 15 22:36:48 2013
@@ -0,0 +1,182 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.io.UnsupportedEncodingException;
+import java.util.Arrays;
+
+import junit.framework.Assert;
+
+import org.apache.hadoop.hbase.client.coprocessor.Batch;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.*;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.*;
+import org.junit.Test;
+
+/**
+ * Test VectorExpression classes for vectorized implementations of type casts.
+ */
+public class TestVectorTypeCasts {
+
+ // Number of nanoseconds in one second
+ private static final long NANOS_PER_SECOND = 1000000000;
+
+ // Number of microseconds in one second
+ private static final long MICROS_PER_SECOND = 1000000;
+
+ @Test
+ public void testVectorCastLongToDouble() {
+ VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchLongInDoubleOut();
+ DoubleColumnVector resultV = (DoubleColumnVector) b.cols[1];
+ b.cols[0].noNulls = true;
+ VectorExpression expr = new CastLongToDouble(0, 1);
+ expr.evaluate(b);
+ Assert.assertEquals(2.0, resultV.vector[4]);
+ }
+
+ @Test
+ public void testVectorCastDoubleToLong() {
+ VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchDoubleInLongOut();
+ LongColumnVector resultV = (LongColumnVector) b.cols[1];
+ b.cols[0].noNulls = true;
+ VectorExpression expr = new CastDoubleToLong(0, 1);
+ expr.evaluate(b);
+ Assert.assertEquals(1, resultV.vector[6]);
+ }
+
+ @Test
+ public void testCastDoubleToBoolean() {
+ VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchDoubleInLongOut();
+ LongColumnVector resultV = (LongColumnVector) b.cols[1];
+ b.cols[0].noNulls = true;
+ VectorExpression expr = new CastDoubleToBooleanViaDoubleToLong(0, 1);
+ expr.evaluate(b);
+ Assert.assertEquals(0, resultV.vector[3]);
+ Assert.assertEquals(1, resultV.vector[4]);
+ }
+
+ @Test
+ public void testCastDoubleToTimestamp() {
+ VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchDoubleInLongOut();
+ LongColumnVector resultV = (LongColumnVector) b.cols[1];
+ b.cols[0].noNulls = true;
+ VectorExpression expr = new CastDoubleToTimestampViaDoubleToLong(0, 1);
+ expr.evaluate(b);
+ Assert.assertEquals(0, resultV.vector[3]);
+ Assert.assertEquals((long) (0.5d * NANOS_PER_SECOND), resultV.vector[4]);
+ }
+
+ @Test
+ public void testCastLongToBoolean() {
+ VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchLongInLongOut();
+ LongColumnVector inV = (LongColumnVector) b.cols[0];
+ inV.vector[0] = 0; // make one entry produce false in result
+ LongColumnVector resultV = (LongColumnVector) b.cols[1];
+ b.cols[0].noNulls = true;
+ VectorExpression expr = new CastLongToBooleanViaLongToLong(0, 1);
+ expr.evaluate(b);
+ Assert.assertEquals(0, resultV.vector[0]);
+ Assert.assertEquals(1, resultV.vector[1]);
+ }
+
+ @Test
+ public void testCastLongToTimestamp() {
+ VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchLongInLongOut();
+ LongColumnVector resultV = (LongColumnVector) b.cols[1];
+ b.cols[0].noNulls = true;
+ VectorExpression expr = new CastLongToTimestampViaLongToLong(0, 1);
+ expr.evaluate(b);
+ Assert.assertEquals(-2 * MICROS_PER_SECOND, resultV.vector[0]);
+ Assert.assertEquals(2 * MICROS_PER_SECOND, resultV.vector[1]);
+ }
+
+ @Test
+ public void testCastTimestampToLong() {
+ VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchLongInLongOut();
+ LongColumnVector inV = (LongColumnVector) b.cols[0];
+ inV.vector[0] = NANOS_PER_SECOND; // Make one entry produce interesting result
+ // (1 sec after epoch).
+
+ LongColumnVector resultV = (LongColumnVector) b.cols[1];
+ b.cols[0].noNulls = true;
+ VectorExpression expr = new CastTimestampToLongViaLongToLong(0, 1);
+ expr.evaluate(b);
+ Assert.assertEquals(1, resultV.vector[0]);
+ }
+
+ @Test
+ public void testCastTimestampToDouble() {
+ VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchLongInDoubleOut();
+ LongColumnVector inV = (LongColumnVector) b.cols[0];
+ DoubleColumnVector resultV = (DoubleColumnVector) b.cols[1];
+ b.cols[0].noNulls = true;
+ VectorExpression expr = new CastTimestampToDoubleViaLongToDouble(0, 1);
+ expr.evaluate(b);
+ Assert.assertEquals(-1E-9D , resultV.vector[1]);
+ Assert.assertEquals(1E-9D, resultV.vector[3]);
+ }
+
+ public byte[] toBytes(String s) {
+ byte[] b = null;
+ try {
+ b = s.getBytes("UTF-8");
+ } catch (Exception e) {
+ throw new RuntimeException("Could not convert string to UTF-8 byte array.");
+ }
+ return b;
+ }
+
+ @Test
+ public void testCastLongToString() {
+ VectorizedRowBatch b = TestVectorMathFunctions.getBatchForStringMath();
+ BytesColumnVector resultV = (BytesColumnVector) b.cols[2];
+ b.cols[1].noNulls = true;
+ VectorExpression expr = new CastLongToString(1, 2);
+ expr.evaluate(b);
+ byte[] num255 = toBytes("255");
+ Assert.assertEquals(0,
+ StringExpr.compare(num255, 0, num255.length,
+ resultV.vector[1], resultV.start[1], resultV.length[1]));
+ }
+
+ @Test
+ public void testCastBooleanToString() {
+ byte[] t = toBytes("TRUE");
+ byte[] f = toBytes("FALSE");
+ VectorizedRowBatch b = TestVectorMathFunctions.getBatchForStringMath();
+ LongColumnVector inV = (LongColumnVector) b.cols[1];
+ BytesColumnVector resultV = (BytesColumnVector) b.cols[2];
+ inV.vector[1] = 1;
+ VectorExpression expr = new CastBooleanToStringViaLongToString(1, 2);
+ expr.evaluate(b);
+ Assert.assertEquals(0,
+ StringExpr.compare(f, 0, f.length,
+ resultV.vector[0], resultV.start[0], resultV.length[0]));
+ Assert.assertEquals(0,
+ StringExpr.compare(t, 0, t.length,
+ resultV.vector[1], resultV.start[1], resultV.length[1]));
+ }
+}