You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/10/16 00:36:49 UTC

svn commit: r1532575 - in /hive/trunk: ant/src/org/apache/hadoop/hive/ant/ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/ ql/sr...

Author: hashutosh
Date: Tue Oct 15 22:36:48 2013
New Revision: 1532575

URL: http://svn.apache.org/r1532575
Log:
HIVE-4821 : Implement vectorized type casting for all types (Eric Hanson via Ashutosh Chauhan)

Added:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastBooleanToStringViaLongToString.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToString.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java
Modified:
    hive/trunk/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathExpr.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java

Modified: hive/trunk/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java
URL: http://svn.apache.org/viewvc/hive/trunk/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java?rev=1532575&r1=1532574&r2=1532575&view=diff
==============================================================================
--- hive/trunk/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java (original)
+++ hive/trunk/ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java Tue Oct 15 22:36:48 2013
@@ -356,6 +356,26 @@ public class GenVectorCode extends Task 
       {"ColumnUnaryFunc", "FuncSign", "double", "double", "MathExpr.sign", "", "", ""},
       {"ColumnUnaryFunc", "FuncSign", "double", "long", "MathExpr.sign", "(double)", "", ""},
 
+      // Casts
+      {"ColumnUnaryFunc", "Cast", "long", "double", "", "", "(long)", ""},
+      {"ColumnUnaryFunc", "Cast", "double", "long", "", "", "(double)", ""},
+      {"ColumnUnaryFunc", "CastTimestampToLongVia", "long", "long", "MathExpr.fromTimestamp", "",
+        "", ""},
+      {"ColumnUnaryFunc", "CastTimestampToDoubleVia", "double", "long",
+          "MathExpr.fromTimestampToDouble", "", "", ""},
+      {"ColumnUnaryFunc", "CastDoubleToBooleanVia", "long", "double", "MathExpr.toBool", "",
+        "", ""},
+      {"ColumnUnaryFunc", "CastLongToBooleanVia", "long", "long", "MathExpr.toBool", "",
+        "", ""},
+      {"ColumnUnaryFunc", "CastLongToTimestampVia", "long", "long", "MathExpr.longToTimestamp", "",
+          "", ""},
+      {"ColumnUnaryFunc", "CastDoubleToTimestampVia", "long", "double",
+         "MathExpr.doubleToTimestamp", "", "", ""},
+
+      // Boolean to long is done with an IdentityExpression
+      // Boolean to double is done with standard Long to Double cast
+      // See org.apache.hadoop.hive.ql.exec.vector.expressions for remaining cast VectorExpression
+      // classes
 
         {"ColumnUnaryMinus", "long"},
         {"ColumnUnaryMinus", "double"},

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java?rev=1532575&r1=1532574&r2=1532575&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java Tue Oct 15 22:36:48 2013
@@ -39,11 +39,11 @@ import org.apache.hadoop.hive.ql.exec.ve
 import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterExprAndExpr;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterExprOrExpr;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterStringColLikeStringScalar;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterStringColRegExpStringScalar;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.FuncRand;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.ISetDoubleArg;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.ISetLongArg;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterStringColRegExpStringScalar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsNotNull;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsNull;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsTrue;
@@ -75,6 +75,7 @@ import org.apache.hadoop.hive.ql.exec.ve
 import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarPopLong;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarSampDouble;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarSampLong;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastLongToBooleanViaLongToLong;
 import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor;
 import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -84,11 +85,27 @@ import org.apache.hadoop.hive.ql.plan.Ex
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
 import org.apache.hadoop.hive.ql.plan.api.OperatorType;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFConcat;
+import org.apache.hadoop.hive.ql.udf.UDFAcos;
+import org.apache.hadoop.hive.ql.udf.UDFAsin;
+import org.apache.hadoop.hive.ql.udf.UDFAtan;
+import org.apache.hadoop.hive.ql.udf.UDFBin;
+import org.apache.hadoop.hive.ql.udf.UDFCeil;
+import org.apache.hadoop.hive.ql.udf.UDFConv;
+import org.apache.hadoop.hive.ql.udf.UDFCos;
 import org.apache.hadoop.hive.ql.udf.UDFDayOfMonth;
+import org.apache.hadoop.hive.ql.udf.UDFDegrees;
+import org.apache.hadoop.hive.ql.udf.UDFExp;
+import org.apache.hadoop.hive.ql.udf.UDFFloor;
+import org.apache.hadoop.hive.ql.udf.UDFHex;
 import org.apache.hadoop.hive.ql.udf.UDFHour;
 import org.apache.hadoop.hive.ql.udf.UDFLTrim;
 import org.apache.hadoop.hive.ql.udf.UDFLength;
 import org.apache.hadoop.hive.ql.udf.UDFLike;
+import org.apache.hadoop.hive.ql.udf.UDFLn;
+import org.apache.hadoop.hive.ql.udf.UDFLog;
+import org.apache.hadoop.hive.ql.udf.UDFLog10;
+import org.apache.hadoop.hive.ql.udf.UDFLog2;
 import org.apache.hadoop.hive.ql.udf.UDFMinute;
 import org.apache.hadoop.hive.ql.udf.UDFMonth;
 import org.apache.hadoop.hive.ql.udf.UDFOPDivide;
@@ -98,31 +115,32 @@ import org.apache.hadoop.hive.ql.udf.UDF
 import org.apache.hadoop.hive.ql.udf.UDFOPNegative;
 import org.apache.hadoop.hive.ql.udf.UDFOPPlus;
 import org.apache.hadoop.hive.ql.udf.UDFOPPositive;
+import org.apache.hadoop.hive.ql.udf.UDFPower;
 import org.apache.hadoop.hive.ql.udf.UDFRegExp;
 import org.apache.hadoop.hive.ql.udf.UDFRTrim;
+import org.apache.hadoop.hive.ql.udf.UDFRadians;
+import org.apache.hadoop.hive.ql.udf.UDFRand;
+import org.apache.hadoop.hive.ql.udf.UDFRound;
 import org.apache.hadoop.hive.ql.udf.UDFSecond;
+import org.apache.hadoop.hive.ql.udf.UDFSign;
+import org.apache.hadoop.hive.ql.udf.UDFSin;
+import org.apache.hadoop.hive.ql.udf.UDFSqrt;
 import org.apache.hadoop.hive.ql.udf.UDFSubstr;
+import org.apache.hadoop.hive.ql.udf.UDFTan;
+import org.apache.hadoop.hive.ql.udf.UDFToBoolean;
+import org.apache.hadoop.hive.ql.udf.UDFToByte;
+import org.apache.hadoop.hive.ql.udf.UDFToInteger;
+import org.apache.hadoop.hive.ql.udf.UDFToLong;
+import org.apache.hadoop.hive.ql.udf.UDFToShort;
+import org.apache.hadoop.hive.ql.udf.UDFToFloat;
+import org.apache.hadoop.hive.ql.udf.UDFToDouble;
+import org.apache.hadoop.hive.ql.udf.UDFToString;
 import org.apache.hadoop.hive.ql.udf.UDFTrim;
 import org.apache.hadoop.hive.ql.udf.UDFWeekOfYear;
 import org.apache.hadoop.hive.ql.udf.UDFYear;
-import org.apache.hadoop.hive.ql.udf.UDFAcos;
-import org.apache.hadoop.hive.ql.udf.UDFAsin;
-import org.apache.hadoop.hive.ql.udf.UDFAtan;
-import org.apache.hadoop.hive.ql.udf.UDFBin;
-import org.apache.hadoop.hive.ql.udf.UDFCeil;
-import org.apache.hadoop.hive.ql.udf.UDFConv;
-import org.apache.hadoop.hive.ql.udf.UDFCos;
-import org.apache.hadoop.hive.ql.udf.UDFDegrees;
-import org.apache.hadoop.hive.ql.udf.UDFExp;
-import org.apache.hadoop.hive.ql.udf.UDFFloor;
-import org.apache.hadoop.hive.ql.udf.UDFHex;
-import org.apache.hadoop.hive.ql.udf.UDFLn;
-import org.apache.hadoop.hive.ql.udf.UDFLog;
-import org.apache.hadoop.hive.ql.udf.UDFLog10;
-import org.apache.hadoop.hive.ql.udf.UDFLog2;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFAbs;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFConcat;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFLower;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
@@ -135,18 +153,9 @@ import org.apache.hadoop.hive.ql.udf.gen
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFTimestamp;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUpper;
-import org.apache.hadoop.hive.ql.udf.UDFPower;
-import org.apache.hadoop.hive.ql.udf.UDFRadians;
-import org.apache.hadoop.hive.ql.udf.UDFRand;
-import org.apache.hadoop.hive.ql.udf.UDFRound;
-import org.apache.hadoop.hive.ql.udf.UDFSign;
-import org.apache.hadoop.hive.ql.udf.UDFSin;
-import org.apache.hadoop.hive.ql.udf.UDFSqrt;
-import org.apache.hadoop.hive.ql.udf.UDFTan;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFAbs;
-
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
@@ -321,13 +330,41 @@ public class VectorizationContext {
       GenericUDFBridge bridge = (GenericUDFBridge) gudf;
       Class<? extends UDF> udfClass = bridge.getUdfClass();
       if (udfClass.equals(UDFHex.class)
-          || udfClass.equals(UDFConv.class)) {
+          || udfClass.equals(UDFConv.class)
+          || isCastToIntFamily(udfClass) && arg0Type(expr).equals("string")
+          || isCastToFloatFamily(udfClass) && arg0Type(expr).equals("string")
+          || udfClass.equals(UDFToString.class) &&
+               (arg0Type(expr).equals("timestamp")
+                   || arg0Type(expr).equals("double")
+                   || arg0Type(expr).equals("float"))) {
         return true;
       }
+    } else if (gudf instanceof GenericUDFTimestamp && arg0Type(expr).equals("string")) {
+      return true;
     }
     return false;
   }
 
+  public static boolean isCastToIntFamily(Class<? extends UDF> udfClass) {
+    return udfClass.equals(UDFToByte.class)
+        || udfClass.equals(UDFToShort.class)
+        || udfClass.equals(UDFToInteger.class)
+        || udfClass.equals(UDFToLong.class);
+
+    // Boolean is purposely excluded.
+  }
+
+  public static boolean isCastToFloatFamily(Class<? extends UDF> udfClass) {
+    return udfClass.equals(UDFToDouble.class)
+        || udfClass.equals(UDFToFloat.class);
+  }
+
+  // Return the type string of the first argument (argument 0).
+  public static String arg0Type(ExprNodeGenericFuncDesc expr) {
+    String type = expr.getChildExprs().get(0).getTypeString();
+    return type;
+  }
+
   // Return true if this is a custom UDF or custom GenericUDF.
   // This is for use only in the planner. It will fail in a task.
   public static boolean isCustomUDF(ExprNodeGenericFuncDesc expr) {
@@ -490,7 +527,9 @@ public class VectorizationContext {
     return expr;
   }
 
-  private VectorExpression getUnaryPlusExpression(List<ExprNodeDesc> childExprList)
+  // Used as a fast path for operations that don't modify their input, like unary +
+  // and casting boolean to long.
+  private VectorExpression getIdentityExpression(List<ExprNodeDesc> childExprList)
       throws HiveException {
     ExprNodeDesc childExpr = childExprList.get(0);
     int inputCol;
@@ -550,6 +589,8 @@ public class VectorizationContext {
       return getConcatExpression(childExpr);
     } else if (udf instanceof GenericUDFAbs) {
       return getUnaryAbsExpression(childExpr);
+    } else if (udf instanceof GenericUDFTimestamp) {
+      return getCastToTimestamp(childExpr);
     }
 
     throw new HiveException("Udf: "+udf.getClass().getSimpleName()+", is not supported");
@@ -607,7 +648,7 @@ public class VectorizationContext {
     } else if (cl.equals(UDFOPNegative.class)) {
       return getUnaryMinusExpression(childExpr);
     } else if (cl.equals(UDFOPPositive.class)) {
-      return getUnaryPlusExpression(childExpr);
+      return getIdentityExpression(childExpr);
     } else if (cl.equals(UDFYear.class) ||
         cl.equals(UDFMonth.class) ||
         cl.equals(UDFWeekOfYear.class) ||
@@ -688,11 +729,116 @@ public class VectorizationContext {
       return getRandExpression(childExpr);
     } else if (cl.equals(UDFBin.class)) {
       return getUnaryStringExpression("FuncBin", "String", childExpr);
+    } else if (isCastToIntFamily(cl)) {
+      return getCastToLongExpression(childExpr);
+    } else if (cl.equals(UDFToBoolean.class)) {
+      return getCastToBoolean(childExpr);
+    } else if (isCastToFloatFamily(cl)) {
+      return getCastToDoubleExpression(childExpr);
+    } else if (cl.equals(UDFToString.class)) {
+      return getCastToString(childExpr);
     }
 
     throw new HiveException("Udf: "+udf.getClass().getSimpleName()+", is not supported");
   }
 
+  private VectorExpression getCastToTimestamp(List<ExprNodeDesc> childExpr)
+      throws HiveException {
+    String inputType = childExpr.get(0).getTypeString();
+    if (isIntFamily(inputType)) {
+      return getUnaryFunctionExpression("CastLongToTimestampVia", "Long", childExpr,
+          GENERATED_EXPR_PACKAGE);
+    } else if (isFloatFamily(inputType)) {
+      return getUnaryFunctionExpression("CastDoubleToTimestampVia", "Long", childExpr,
+          GENERATED_EXPR_PACKAGE);
+    }
+    // The string type is deliberately omitted -- it's handled elsewhere. See isLegacyPathUDF.
+
+    throw new HiveException("Unhandled cast input type: " + inputType);
+  }
+
+  private VectorExpression getCastToString(List<ExprNodeDesc> childExpr)
+      throws HiveException {
+    String inputType = childExpr.get(0).getTypeString();
+    if (inputType.equals("boolean")) {
+      // Boolean must come before the integer family. It's a special case.
+      return getUnaryFunctionExpression("CastBooleanToStringVia", "String", childExpr,
+          CUSTOM_EXPR_PACKAGE);
+    } else if (isIntFamily(inputType)) {
+      return getUnaryFunctionExpression("Cast", "String", childExpr,
+          CUSTOM_EXPR_PACKAGE);
+    }
+    /* The string type is deliberately omitted -- the planner removes string to string casts.
+     * Timestamp, float, and double types are handled by the legacy code path. See isLegacyPathUDF.
+     */
+
+    throw new HiveException("Unhandled cast input type: " + inputType);
+  }
+
+  private VectorExpression getCastToDoubleExpression(List<ExprNodeDesc> childExpr)
+      throws HiveException {
+    String inputType = childExpr.get(0).getTypeString();
+    if (isIntFamily(inputType)) {
+      return getUnaryFunctionExpression("Cast", "Double", childExpr,
+          GENERATED_EXPR_PACKAGE);
+    } else if (inputType.equals("timestamp")) {
+      return getUnaryFunctionExpression("CastTimestampToDoubleVia", "Double", childExpr,
+          GENERATED_EXPR_PACKAGE);
+    } else if (isFloatFamily(inputType)) {
+
+      // float types require no conversion, so use a no-op
+      return getIdentityExpression(childExpr);
+    }
+    // The string type is deliberately omitted -- it's handled elsewhere. See isLegacyPathUDF.
+
+    throw new HiveException("Unhandled cast input type: " + inputType);
+  }
+
+  private VectorExpression getCastToBoolean(List<ExprNodeDesc> childExpr)
+      throws HiveException {
+    String inputType = childExpr.get(0).getTypeString();
+    if (isFloatFamily(inputType)) {
+      return getUnaryFunctionExpression("CastDoubleToBooleanVia", "Long", childExpr,
+          GENERATED_EXPR_PACKAGE);
+    } else if (isIntFamily(inputType) || inputType.equals("timestamp")) {
+      return getUnaryFunctionExpression("CastLongToBooleanVia", "Long", childExpr,
+          GENERATED_EXPR_PACKAGE);
+    } else if (inputType.equals("string")) {
+
+      // string casts to false if it is 0 characters long, otherwise true
+      VectorExpression lenExpr = getUnaryStringExpression("StringLength", "Long", childExpr);
+
+      int outputCol = ocm.allocateOutputColumn("integer");
+      VectorExpression lenToBoolExpr =
+          new CastLongToBooleanViaLongToLong(lenExpr.getOutputColumn(), outputCol);
+      lenToBoolExpr.setChildExpressions(new VectorExpression[] {lenExpr});
+      ocm.freeOutputColumn(lenExpr.getOutputColumn());
+      return lenToBoolExpr;
+    }
+    // cast(booleanExpr as boolean) case is omitted because planner removes it as a no-op
+
+    throw new HiveException("Unhandled cast input type: " + inputType);
+  }
+
+  private VectorExpression getCastToLongExpression(List<ExprNodeDesc> childExpr)
+      throws HiveException {
+    String inputType = childExpr.get(0).getTypeString();
+    if (isFloatFamily(inputType)) {
+      return getUnaryFunctionExpression("Cast", "Long", childExpr,
+          GENERATED_EXPR_PACKAGE);
+    } else if (inputType.equals("timestamp")) {
+      return getUnaryFunctionExpression("CastTimestampToLongVia", "Long", childExpr,
+          GENERATED_EXPR_PACKAGE);
+    } else if (isIntFamily(inputType)) {
+
+      // integer and boolean types require no conversion, so use a no-op
+      return getIdentityExpression(childExpr);
+    }
+    // string type is deliberately omitted -- it's handled elsewhere. See isLegacyPathUDF.
+
+    throw new HiveException("Unhandled cast input type: " + inputType);
+  }
+
   private VectorExpression getRandExpression(List<ExprNodeDesc> childExpr)
     throws HiveException {
 

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastBooleanToStringViaLongToString.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastBooleanToStringViaLongToString.java?rev=1532575&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastBooleanToStringViaLongToString.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastBooleanToStringViaLongToString.java Tue Oct 15 22:36:48 2013
@@ -0,0 +1,46 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+
+public class CastBooleanToStringViaLongToString extends LongToStringUnaryUDF {
+  private static final long serialVersionUID = 1L;
+  private transient byte[] temp; // space to put date string
+  private static final byte[][] dictionary = { {'F', 'A', 'L', 'S', 'E'}, {'T', 'R', 'U', 'E'} };
+
+  public CastBooleanToStringViaLongToString() {
+    super();
+    temp = new byte[8];
+  }
+
+  public CastBooleanToStringViaLongToString(int inputColumn, int outputColumn) {
+    super(inputColumn, outputColumn);
+    temp = new byte[8];
+  }
+
+  @Override
+  protected void func(BytesColumnVector outV, long[] vector, int i) {
+
+    /* 0 is false and 1 is true in the input vector, so a simple dictionary is used
+     * with two entries. 0 references FALSE and 1 references TRUE in the dictionary.
+     */
+    outV.setVal(i, dictionary[(int) vector[i]], 0, dictionary[(int) vector[i]].length);
+  }
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToString.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToString.java?rev=1532575&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToString.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToString.java Tue Oct 15 22:36:48 2013
@@ -0,0 +1,42 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+
+public class CastLongToString extends LongToStringUnaryUDF {
+  private static final long serialVersionUID = 1L;
+  protected transient byte[] temp; // temporary location for building number string
+
+  public CastLongToString() {
+    super();
+    temp = new byte[20];
+  }
+
+  public CastLongToString(int inputColumn, int outputColumn) {
+    super(inputColumn, outputColumn);
+    temp = new byte[20];
+  }
+
+  @Override
+  protected void func(BytesColumnVector outV, long[] vector, int i) {
+    int len = MathExpr.writeLongToUTF8(temp, vector[i]);
+    outV.setVal(i, temp, 0, len);
+  }
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java?rev=1532575&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongToStringUnaryUDF.java Tue Oct 15 22:36:48 2013
@@ -0,0 +1,136 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+
+/**
+ * This is a superclass for unary long functions returning strings that operate directly on the
+ * input and set the output.
+ */
+abstract public class LongToStringUnaryUDF extends VectorExpression {
+  private static final long serialVersionUID = 1L;
+  int inputColumn;
+  int outputColumn;
+
+  public LongToStringUnaryUDF(int inputColumn, int outputColumn) {
+    this.inputColumn = inputColumn;
+    this.outputColumn = outputColumn;
+  }
+
+  public LongToStringUnaryUDF() {
+    super();
+  }
+
+  abstract protected void func(BytesColumnVector outV, long[] vector, int i);
+
+  @Override
+  public void evaluate(VectorizedRowBatch batch) {
+
+    if (childExpressions != null) {
+      super.evaluateChildren(batch);
+    }
+
+    LongColumnVector inputColVector = (LongColumnVector) batch.cols[inputColumn];
+    int[] sel = batch.selected;
+    int n = batch.size;
+    long[] vector = inputColVector.vector;
+    BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumn];
+    outV.initBuffer();
+
+    if (n == 0) {
+      //Nothing to do
+      return;
+    }
+
+    if (inputColVector.noNulls) {
+      outV.noNulls = true;
+      if (inputColVector.isRepeating) {
+        outV.isRepeating = true;
+        func(outV, vector, 0);
+      } else if (batch.selectedInUse) {
+        for(int j = 0; j != n; j++) {
+          int i = sel[j];
+          func(outV, vector, i);
+        }
+        outV.isRepeating = false;
+      } else {
+        for(int i = 0; i != n; i++) {
+          func(outV, vector, i);
+        }
+        outV.isRepeating = false;
+      }
+    } else {
+
+      // Handle case with nulls. Don't do function if the value is null,
+      // because the data may be undefined for a null value.
+      outV.noNulls = false;
+      if (inputColVector.isRepeating) {
+        outV.isRepeating = true;
+        outV.isNull[0] = inputColVector.isNull[0];
+        if (!inputColVector.isNull[0]) {
+          func(outV, vector, 0);
+        }
+      } else if (batch.selectedInUse) {
+        for(int j = 0; j != n; j++) {
+          int i = sel[j];
+          outV.isNull[i] = inputColVector.isNull[i];
+          if (!inputColVector.isNull[i]) {
+            func(outV, vector, i);
+          }
+        }
+        outV.isRepeating = false;
+      } else {
+        System.arraycopy(inputColVector.isNull, 0, outV.isNull, 0, n);
+        for(int i = 0; i != n; i++) {
+          if (!inputColVector.isNull[i]) {
+            func(outV, vector, i);
+          }
+        }
+        outV.isRepeating = false;
+      }
+    }
+  }
+
+
+  @Override
+  public int getOutputColumn() {
+    return outputColumn;
+  }
+
+  public void setOutputColumn(int outputColumn) {
+    this.outputColumn = outputColumn;
+  }
+
+  public int getInputColumn() {
+    return inputColumn;
+  }
+
+  public void setInputColumn(int inputColumn) {
+    this.inputColumn = inputColumn;
+  }
+
+  @Override
+  public String getOutputType() {
+    return "String";
+  }
+
+}
\ No newline at end of file

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathExpr.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathExpr.java?rev=1532575&r1=1532574&r2=1532575&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathExpr.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/MathExpr.java Tue Oct 15 22:36:48 2013
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.hive.ql.exec.vector.expressions;
 
+import java.io.IOException;
+import java.io.OutputStream;
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
 
 /**
@@ -51,6 +53,74 @@ public class MathExpr {
     return v >= 0 ? 1.0 : -1.0;
   }
 
+  // for casting integral types to boolean
+  public static long toBool(long v) {
+    return v == 0 ? 0 : 1;
+  }
+
+  // for casting floating point types to boolean
+  public static long toBool(double v) {
+    return v == 0.0D ? 0L : 1L;
+  }
+
+  /* Convert an integer value in miliseconds since the epoch to a timestamp value
+   * for use in a long column vector, which is represented in nanoseconds since the epoch.
+   */
+  public static long longToTimestamp(long v) {
+    return v * 1000000;
+  }
+
+  // Convert seconds since the epoch (with fraction) to nanoseconds, as a long integer.
+  public static long doubleToTimestamp(double v) {
+    return (long)( v * 1000000000.0);
+  }
+
+  /* Convert an integer value representing a timestamp in nanoseconds to one
+   * that represents a timestamp in seconds (since the epoch).
+   */
+  public static long fromTimestamp(long v) {
+    return v / 1000000000;
+  }
+
+  /* Convert an integer value representing a timestamp in nanoseconds to one
+   * that represents a timestamp in seconds, with fraction, since the epoch.
+   */
+  public static double fromTimestampToDouble(long v) {
+    return ((double) v) / 1000000000.0;
+  }
+
+  /* Convert a long to a string. The string is output into the argument
+   * byte array, beginning at character 0. The length is returned.
+   */
+  public static int writeLongToUTF8(byte[] result, long i) {
+    if (i == 0) {
+      result[0] = '0';
+      return 1;
+    }
+
+    int current = 0;
+
+    if (i < 0) {
+      result[current++] ='-';
+    } else {
+      // negative range is bigger than positive range, so there is no risk
+      // of overflow here.
+      i = -i;
+    }
+
+    long start = 1000000000000000000L;
+    while (i / start == 0) {
+      start /= 10;
+    }
+
+    while (start > 0) {
+      result[current++] = (byte) ('0' - (i / start % 10));
+      start /= 10;
+    }
+
+    return current;
+  }
+
   // Convert all NaN values in vector v to NULL. Should only be used if n > 0.
   public static void NaNToNull(DoubleColumnVector v, int[] sel, boolean selectedInUse, int n) {
 

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java?rev=1532575&r1=1532574&r2=1532575&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java Tue Oct 15 22:36:48 2013
@@ -107,8 +107,15 @@ import org.apache.hadoop.hive.ql.udf.UDF
 import org.apache.hadoop.hive.ql.udf.UDFSqrt;
 import org.apache.hadoop.hive.ql.udf.UDFSubstr;
 import org.apache.hadoop.hive.ql.udf.UDFTan;
+import org.apache.hadoop.hive.ql.udf.UDFToBoolean;
+import org.apache.hadoop.hive.ql.udf.UDFToByte;
+import org.apache.hadoop.hive.ql.udf.UDFToDouble;
+import org.apache.hadoop.hive.ql.udf.UDFToFloat;
+import org.apache.hadoop.hive.ql.udf.UDFToInteger;
+import org.apache.hadoop.hive.ql.udf.UDFToLong;
+import org.apache.hadoop.hive.ql.udf.UDFToShort;
+import org.apache.hadoop.hive.ql.udf.UDFToString;
 import org.apache.hadoop.hive.ql.udf.UDFTrim;
-import org.apache.hadoop.hive.ql.udf.UDFUnhex;
 import org.apache.hadoop.hive.ql.udf.UDFWeekOfYear;
 import org.apache.hadoop.hive.ql.udf.UDFYear;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
@@ -127,6 +134,7 @@ import org.apache.hadoop.hive.ql.udf.gen
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFTimestamp;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUpper;
 
@@ -222,6 +230,17 @@ public class Vectorizer implements Physi
     supportedGenericUDFs.add(GenericUDFConcat.class);
     supportedGenericUDFs.add(GenericUDFAbs.class);
 
+    // For type casts
+    supportedGenericUDFs.add(UDFToLong.class);
+    supportedGenericUDFs.add(UDFToInteger.class);
+    supportedGenericUDFs.add(UDFToShort.class);
+    supportedGenericUDFs.add(UDFToByte.class);
+    supportedGenericUDFs.add(UDFToBoolean.class);
+    supportedGenericUDFs.add(UDFToFloat.class);
+    supportedGenericUDFs.add(UDFToDouble.class);
+    supportedGenericUDFs.add(UDFToString.class);
+    supportedGenericUDFs.add(GenericUDFTimestamp.class);
+
     supportedAggregationUdfs.add("min");
     supportedAggregationUdfs.add("max");
     supportedAggregationUdfs.add("count");

Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java?rev=1532575&view=auto
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java (added)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java Tue Oct 15 22:36:48 2013
@@ -0,0 +1,182 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.io.UnsupportedEncodingException;
+import java.util.Arrays;
+
+import junit.framework.Assert;
+
+import org.apache.hadoop.hbase.client.coprocessor.Batch;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.*;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.*;
+import org.junit.Test;
+
+/**
+ * Test VectorExpression classes for vectorized implementations of type casts.
+ */
+public class TestVectorTypeCasts {
+
+  // Number of nanoseconds in one second
+  private static final long NANOS_PER_SECOND = 1000000000;
+
+  // Number of microseconds in one second
+  private static final long MICROS_PER_SECOND = 1000000;
+
+  @Test
+  public void testVectorCastLongToDouble() {
+    VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchLongInDoubleOut();
+    DoubleColumnVector resultV = (DoubleColumnVector) b.cols[1];
+    b.cols[0].noNulls = true;
+    VectorExpression expr = new CastLongToDouble(0, 1);
+    expr.evaluate(b);
+    Assert.assertEquals(2.0, resultV.vector[4]);
+  }
+
+  @Test
+  public void testVectorCastDoubleToLong() {
+    VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchDoubleInLongOut();
+    LongColumnVector resultV = (LongColumnVector) b.cols[1];
+    b.cols[0].noNulls = true;
+    VectorExpression expr = new CastDoubleToLong(0, 1);
+    expr.evaluate(b);
+    Assert.assertEquals(1, resultV.vector[6]);
+  }
+
+  @Test
+  public void testCastDoubleToBoolean() {
+    VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchDoubleInLongOut();
+    LongColumnVector resultV = (LongColumnVector) b.cols[1];
+    b.cols[0].noNulls = true;
+    VectorExpression expr = new CastDoubleToBooleanViaDoubleToLong(0, 1);
+    expr.evaluate(b);
+    Assert.assertEquals(0, resultV.vector[3]);
+    Assert.assertEquals(1, resultV.vector[4]);
+  }
+
+  @Test
+  public void testCastDoubleToTimestamp() {
+    VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchDoubleInLongOut();
+    LongColumnVector resultV = (LongColumnVector) b.cols[1];
+    b.cols[0].noNulls = true;
+    VectorExpression expr = new CastDoubleToTimestampViaDoubleToLong(0, 1);
+    expr.evaluate(b);
+    Assert.assertEquals(0, resultV.vector[3]);
+    Assert.assertEquals((long) (0.5d * NANOS_PER_SECOND), resultV.vector[4]);
+  }
+
+  @Test
+  public void testCastLongToBoolean() {
+    VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchLongInLongOut();
+    LongColumnVector inV = (LongColumnVector) b.cols[0];
+    inV.vector[0] = 0;  // make one entry produce false in result
+    LongColumnVector resultV = (LongColumnVector) b.cols[1];
+    b.cols[0].noNulls = true;
+    VectorExpression expr = new CastLongToBooleanViaLongToLong(0, 1);
+    expr.evaluate(b);
+    Assert.assertEquals(0, resultV.vector[0]);
+    Assert.assertEquals(1, resultV.vector[1]);
+  }
+
+  @Test
+  public void testCastLongToTimestamp() {
+    VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchLongInLongOut();
+    LongColumnVector resultV = (LongColumnVector) b.cols[1];
+    b.cols[0].noNulls = true;
+    VectorExpression expr = new CastLongToTimestampViaLongToLong(0, 1);
+    expr.evaluate(b);
+    Assert.assertEquals(-2 * MICROS_PER_SECOND, resultV.vector[0]);
+    Assert.assertEquals(2 * MICROS_PER_SECOND, resultV.vector[1]);
+  }
+
+  @Test
+  public void testCastTimestampToLong() {
+    VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchLongInLongOut();
+    LongColumnVector inV = (LongColumnVector) b.cols[0];
+    inV.vector[0] = NANOS_PER_SECOND;  // Make one entry produce interesting result
+      // (1 sec after epoch).
+
+    LongColumnVector resultV = (LongColumnVector) b.cols[1];
+    b.cols[0].noNulls = true;
+    VectorExpression expr = new CastTimestampToLongViaLongToLong(0, 1);
+    expr.evaluate(b);
+    Assert.assertEquals(1, resultV.vector[0]);
+  }
+
+  @Test
+  public void testCastTimestampToDouble() {
+    VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchLongInDoubleOut();
+    LongColumnVector inV = (LongColumnVector) b.cols[0];
+    DoubleColumnVector resultV = (DoubleColumnVector) b.cols[1];
+    b.cols[0].noNulls = true;
+    VectorExpression expr = new CastTimestampToDoubleViaLongToDouble(0, 1);
+    expr.evaluate(b);
+    Assert.assertEquals(-1E-9D , resultV.vector[1]);
+    Assert.assertEquals(1E-9D, resultV.vector[3]);
+  }
+
+  public byte[] toBytes(String s) {
+    byte[] b = null;
+    try {
+      b = s.getBytes("UTF-8");
+    } catch (Exception e) {
+      throw new RuntimeException("Could not convert string to UTF-8 byte array.");
+    }
+    return b;
+  }
+
+  @Test
+  public void testCastLongToString() {
+    VectorizedRowBatch b = TestVectorMathFunctions.getBatchForStringMath();
+    BytesColumnVector resultV = (BytesColumnVector) b.cols[2];
+    b.cols[1].noNulls = true;
+    VectorExpression expr = new CastLongToString(1, 2);
+    expr.evaluate(b);
+    byte[] num255 = toBytes("255");
+    Assert.assertEquals(0,
+        StringExpr.compare(num255, 0, num255.length,
+            resultV.vector[1], resultV.start[1], resultV.length[1]));
+  }
+
+  @Test
+  public void testCastBooleanToString() {
+    byte[] t = toBytes("TRUE");
+    byte[] f = toBytes("FALSE");
+    VectorizedRowBatch b = TestVectorMathFunctions.getBatchForStringMath();
+    LongColumnVector inV = (LongColumnVector) b.cols[1];
+    BytesColumnVector resultV = (BytesColumnVector) b.cols[2];
+    inV.vector[1] = 1;
+    VectorExpression expr = new CastBooleanToStringViaLongToString(1, 2);
+    expr.evaluate(b);
+    Assert.assertEquals(0,
+        StringExpr.compare(f, 0, f.length,
+            resultV.vector[0], resultV.start[0], resultV.length[0]));
+    Assert.assertEquals(0,
+        StringExpr.compare(t, 0, t.length,
+            resultV.vector[1], resultV.start[1], resultV.length[1]));
+  }
+}