You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/09/20 19:13:44 UTC

svn commit: r1525057 - in /hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql: exec/vector/ exec/vector/expressions/ optimizer/physical/

Author: hashutosh
Date: Fri Sep 20 17:13:44 2013
New Revision: 1525057

URL: http://svn.apache.org/r1525057
Log:
HIVE-4512 : The vectorized plan is not picking right expression class for string concatenation. (Eric Hanson via Ashutosh Chauhan)

Modified:
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringConcatColScalar.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringConcatScalarCol.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java?rev=1525057&r1=1525056&r2=1525057&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java Fri Sep 20 17:13:44 2013
@@ -43,6 +43,9 @@ import org.apache.hadoop.hive.ql.exec.ve
 import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsNotNull;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsNull;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsTrue;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.StringConcatColCol;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.StringConcatColScalar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.StringConcatScalarCol;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFUnixTimeStampLong;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression;
@@ -75,6 +78,7 @@ import org.apache.hadoop.hive.ql.plan.Ex
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
 import org.apache.hadoop.hive.ql.plan.api.OperatorType;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFConcat;
 import org.apache.hadoop.hive.ql.udf.UDFDayOfMonth;
 import org.apache.hadoop.hive.ql.udf.UDFHour;
 import org.apache.hadoop.hive.ql.udf.UDFLength;
@@ -448,6 +452,8 @@ public class VectorizationContext {
       return getUnaryStringExpression("StringLower", "String", childExpr);
     } else if (udf instanceof GenericUDFUpper) {
       return getUnaryStringExpression("StringUpper", "String", childExpr);
+    } else if (udf instanceof GenericUDFConcat) {
+      return getConcatExpression(childExpr);
     }
     throw new HiveException("Udf: "+udf.getClass().getSimpleName()+", is not supported");
   }
@@ -508,6 +514,92 @@ public class VectorizationContext {
     throw new HiveException("Udf: "+udf.getClass().getSimpleName()+", is not supported");
   }
 
+  /* Return a vector expression for string concatenation, including the column-scalar,
+   * scalar-column, and column-column cases.
+   */
+  private VectorExpression getConcatExpression(List<ExprNodeDesc> childExprList)
+      throws HiveException {
+    ExprNodeDesc left = childExprList.get(0);
+    ExprNodeDesc right = childExprList.get(1);
+    int inputColLeft = -1;
+    int inputColRight = -1;
+    VectorExpression vLeft = null;
+    VectorExpression vRight = null;
+    VectorExpression expr = null;
+
+    // Generate trees to evaluate non-leaf inputs, if there are any.
+    if (left instanceof ExprNodeGenericFuncDesc) {
+      vLeft = getVectorExpression(left);
+      inputColLeft = vLeft.getOutputColumn();
+    }
+
+    if (right instanceof ExprNodeGenericFuncDesc) {
+      vRight = getVectorExpression(right);
+      inputColRight = vRight.getOutputColumn();
+    }
+
+    // Handle case for left input a column and right input a constant
+    if ((left instanceof ExprNodeColumnDesc || inputColLeft != -1) &&
+        right instanceof ExprNodeConstantDesc) {
+      if (inputColLeft == -1) {
+        inputColLeft = getInputColumnIndex(((ExprNodeColumnDesc) left).getColumn());
+      }
+      int outputCol = ocm.allocateOutputColumn("String");
+      byte[] constant = (byte[]) getScalarValue((ExprNodeConstantDesc) right);
+      expr = new StringConcatColScalar(inputColLeft, outputCol, constant);
+      if (vLeft != null) {
+        expr.setChildExpressions(new VectorExpression [] {vLeft});
+      }
+    }
+
+    // Handle case for left input a constant and right input a column
+    else if ((left instanceof ExprNodeConstantDesc) &&
+        (right instanceof ExprNodeColumnDesc || inputColRight != -1)) {
+      if (inputColRight == -1) {
+        inputColRight = getInputColumnIndex(((ExprNodeColumnDesc) right).getColumn());
+      }
+      int outputCol = ocm.allocateOutputColumn("String");
+      byte[] constant = (byte[]) getScalarValue((ExprNodeConstantDesc) left);
+      expr = new StringConcatScalarCol(constant, inputColRight, outputCol);
+      if (vRight != null) {
+        expr.setChildExpressions(new VectorExpression [] {vRight});
+      }
+    }
+
+    // Handle case where both left and right inputs are columns
+    else if ((left instanceof ExprNodeColumnDesc || inputColLeft != -1) &&
+        (right instanceof ExprNodeColumnDesc || inputColRight != -1)) {
+      if (inputColLeft == -1) {
+        inputColLeft = getInputColumnIndex(((ExprNodeColumnDesc) left).getColumn());
+      }
+      if (inputColRight == -1) {
+        inputColRight = getInputColumnIndex(((ExprNodeColumnDesc) right).getColumn());
+      }
+      int outputCol = ocm.allocateOutputColumn("String");
+      expr = new StringConcatColCol(inputColLeft, inputColRight, outputCol);
+      if (vLeft == null && vRight != null) {
+        expr.setChildExpressions(new VectorExpression [] {vRight});
+      } else if (vLeft != null && vRight == null) {
+        expr.setChildExpressions(new VectorExpression [] {vLeft});
+      } else if (vLeft != null && vRight != null) {
+
+        // Both left and right have child expressions
+        expr.setChildExpressions(new VectorExpression [] {vLeft, vRight});
+      }
+    } else {
+      throw new HiveException("Failed to vectorize CONCAT()");
+    }
+
+    // Free output columns if inputs have non-leaf expression trees.
+    if (vLeft != null) {
+      ocm.freeOutputColumn(vLeft.getOutputColumn());
+    }
+    if (vRight != null) {
+      ocm.freeOutputColumn(vRight.getOutputColumn());
+    }
+    return expr;
+  }
+
   /*
    * Return vector expression for a custom (i.e. not built-in) UDF.
    */
@@ -963,9 +1055,15 @@ public class VectorizationContext {
     }
   }
 
-  private Object getScalarValue(ExprNodeConstantDesc constDesc) {
+  private Object getScalarValue(ExprNodeConstantDesc constDesc)
+      throws HiveException {
     if (constDesc.getTypeString().equalsIgnoreCase("String")) {
-      return ((String) constDesc.getValue()).getBytes();
+      try {
+         byte[] bytes = ((String) constDesc.getValue()).getBytes("UTF-8");
+         return bytes;
+      } catch (Exception ex) {
+        throw new HiveException(ex);
+      }
     } else {
       return constDesc.getValue();
     }

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringConcatColScalar.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringConcatColScalar.java?rev=1525057&r1=1525056&r2=1525057&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringConcatColScalar.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringConcatColScalar.java Fri Sep 20 17:13:44 2013
@@ -31,7 +31,7 @@ public class StringConcatColScalar exten
   private int outputColumn;
   private byte[] value;
 
-  StringConcatColScalar(int colNum, int outputColumn, byte[] value) {
+  public StringConcatColScalar(int colNum, int outputColumn, byte[] value) {
     this();
     this.colNum = colNum;
     this.outputColumn = outputColumn;

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringConcatScalarCol.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringConcatScalarCol.java?rev=1525057&r1=1525056&r2=1525057&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringConcatScalarCol.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringConcatScalarCol.java Fri Sep 20 17:13:44 2013
@@ -31,7 +31,7 @@ public class StringConcatScalarCol exten
   private int outputColumn;
   private byte[] value;
 
-  StringConcatScalarCol(byte[] value, int colNum, int outputColumn) {
+  public StringConcatScalarCol(byte[] value, int colNum, int outputColumn) {
     this();
     this.colNum = colNum;
     this.outputColumn = outputColumn;

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java?rev=1525057&r1=1525056&r2=1525057&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java Fri Sep 20 17:13:44 2013
@@ -85,6 +85,7 @@ import org.apache.hadoop.hive.ql.udf.UDF
 import org.apache.hadoop.hive.ql.udf.UDFYear;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFConcat;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFLower;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
@@ -160,6 +161,7 @@ public class Vectorizer implements Physi
     supportedGenericUDFs.add(UDFLike.class);
     supportedGenericUDFs.add(GenericUDFLower.class);
     supportedGenericUDFs.add(GenericUDFUpper.class);
+    supportedGenericUDFs.add(GenericUDFConcat.class);
 
     supportedAggregationUdfs.add("min");
     supportedAggregationUdfs.add("max");