You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/09/20 19:13:44 UTC
svn commit: r1525057 - in
/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql:
exec/vector/ exec/vector/expressions/ optimizer/physical/
Author: hashutosh
Date: Fri Sep 20 17:13:44 2013
New Revision: 1525057
URL: http://svn.apache.org/r1525057
Log:
HIVE-4512 : The vectorized plan is not picking right expression class for string concatenation. (Eric Hanson via Ashutosh Chauhan)
Modified:
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringConcatColScalar.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringConcatScalarCol.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java?rev=1525057&r1=1525056&r2=1525057&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java Fri Sep 20 17:13:44 2013
@@ -43,6 +43,9 @@ import org.apache.hadoop.hive.ql.exec.ve
import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsNotNull;
import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsNull;
import org.apache.hadoop.hive.ql.exec.vector.expressions.SelectColumnIsTrue;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.StringConcatColCol;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.StringConcatColScalar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.StringConcatScalarCol;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFUnixTimeStampLong;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression;
@@ -75,6 +78,7 @@ import org.apache.hadoop.hive.ql.plan.Ex
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.plan.api.OperatorType;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFConcat;
import org.apache.hadoop.hive.ql.udf.UDFDayOfMonth;
import org.apache.hadoop.hive.ql.udf.UDFHour;
import org.apache.hadoop.hive.ql.udf.UDFLength;
@@ -448,6 +452,8 @@ public class VectorizationContext {
return getUnaryStringExpression("StringLower", "String", childExpr);
} else if (udf instanceof GenericUDFUpper) {
return getUnaryStringExpression("StringUpper", "String", childExpr);
+ } else if (udf instanceof GenericUDFConcat) {
+ return getConcatExpression(childExpr);
}
throw new HiveException("Udf: "+udf.getClass().getSimpleName()+", is not supported");
}
@@ -508,6 +514,92 @@ public class VectorizationContext {
throw new HiveException("Udf: "+udf.getClass().getSimpleName()+", is not supported");
}
+ /* Return a vector expression for string concatenation, including the column-scalar,
+ * scalar-column, and column-column cases.
+ */
+ private VectorExpression getConcatExpression(List<ExprNodeDesc> childExprList)
+ throws HiveException {
+ ExprNodeDesc left = childExprList.get(0);
+ ExprNodeDesc right = childExprList.get(1);
+ int inputColLeft = -1;
+ int inputColRight = -1;
+ VectorExpression vLeft = null;
+ VectorExpression vRight = null;
+ VectorExpression expr = null;
+
+ // Generate trees to evaluate non-leaf inputs, if there are any.
+ if (left instanceof ExprNodeGenericFuncDesc) {
+ vLeft = getVectorExpression(left);
+ inputColLeft = vLeft.getOutputColumn();
+ }
+
+ if (right instanceof ExprNodeGenericFuncDesc) {
+ vRight = getVectorExpression(right);
+ inputColRight = vRight.getOutputColumn();
+ }
+
+ // Handle case for left input a column and right input a constant
+ if ((left instanceof ExprNodeColumnDesc || inputColLeft != -1) &&
+ right instanceof ExprNodeConstantDesc) {
+ if (inputColLeft == -1) {
+ inputColLeft = getInputColumnIndex(((ExprNodeColumnDesc) left).getColumn());
+ }
+ int outputCol = ocm.allocateOutputColumn("String");
+ byte[] constant = (byte[]) getScalarValue((ExprNodeConstantDesc) right);
+ expr = new StringConcatColScalar(inputColLeft, outputCol, constant);
+ if (vLeft != null) {
+ expr.setChildExpressions(new VectorExpression [] {vLeft});
+ }
+ }
+
+ // Handle case for left input a constant and right input a column
+ else if ((left instanceof ExprNodeConstantDesc) &&
+ (right instanceof ExprNodeColumnDesc || inputColRight != -1)) {
+ if (inputColRight == -1) {
+ inputColRight = getInputColumnIndex(((ExprNodeColumnDesc) right).getColumn());
+ }
+ int outputCol = ocm.allocateOutputColumn("String");
+ byte[] constant = (byte[]) getScalarValue((ExprNodeConstantDesc) left);
+ expr = new StringConcatScalarCol(constant, inputColRight, outputCol);
+ if (vRight != null) {
+ expr.setChildExpressions(new VectorExpression [] {vRight});
+ }
+ }
+
+ // Handle case where both left and right inputs are columns
+ else if ((left instanceof ExprNodeColumnDesc || inputColLeft != -1) &&
+ (right instanceof ExprNodeColumnDesc || inputColRight != -1)) {
+ if (inputColLeft == -1) {
+ inputColLeft = getInputColumnIndex(((ExprNodeColumnDesc) left).getColumn());
+ }
+ if (inputColRight == -1) {
+ inputColRight = getInputColumnIndex(((ExprNodeColumnDesc) right).getColumn());
+ }
+ int outputCol = ocm.allocateOutputColumn("String");
+ expr = new StringConcatColCol(inputColLeft, inputColRight, outputCol);
+ if (vLeft == null && vRight != null) {
+ expr.setChildExpressions(new VectorExpression [] {vRight});
+ } else if (vLeft != null && vRight == null) {
+ expr.setChildExpressions(new VectorExpression [] {vLeft});
+ } else if (vLeft != null && vRight != null) {
+
+ // Both left and right have child expressions
+ expr.setChildExpressions(new VectorExpression [] {vLeft, vRight});
+ }
+ } else {
+ throw new HiveException("Failed to vectorize CONCAT()");
+ }
+
+ // Free output columns if inputs have non-leaf expression trees.
+ if (vLeft != null) {
+ ocm.freeOutputColumn(vLeft.getOutputColumn());
+ }
+ if (vRight != null) {
+ ocm.freeOutputColumn(vRight.getOutputColumn());
+ }
+ return expr;
+ }
+
/*
* Return vector expression for a custom (i.e. not built-in) UDF.
*/
@@ -963,9 +1055,15 @@ public class VectorizationContext {
}
}
- private Object getScalarValue(ExprNodeConstantDesc constDesc) {
+ private Object getScalarValue(ExprNodeConstantDesc constDesc)
+ throws HiveException {
if (constDesc.getTypeString().equalsIgnoreCase("String")) {
- return ((String) constDesc.getValue()).getBytes();
+ try {
+ byte[] bytes = ((String) constDesc.getValue()).getBytes("UTF-8");
+ return bytes;
+ } catch (Exception ex) {
+ throw new HiveException(ex);
+ }
} else {
return constDesc.getValue();
}
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringConcatColScalar.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringConcatColScalar.java?rev=1525057&r1=1525056&r2=1525057&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringConcatColScalar.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringConcatColScalar.java Fri Sep 20 17:13:44 2013
@@ -31,7 +31,7 @@ public class StringConcatColScalar exten
private int outputColumn;
private byte[] value;
- StringConcatColScalar(int colNum, int outputColumn, byte[] value) {
+ public StringConcatColScalar(int colNum, int outputColumn, byte[] value) {
this();
this.colNum = colNum;
this.outputColumn = outputColumn;
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringConcatScalarCol.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringConcatScalarCol.java?rev=1525057&r1=1525056&r2=1525057&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringConcatScalarCol.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringConcatScalarCol.java Fri Sep 20 17:13:44 2013
@@ -31,7 +31,7 @@ public class StringConcatScalarCol exten
private int outputColumn;
private byte[] value;
- StringConcatScalarCol(byte[] value, int colNum, int outputColumn) {
+ public StringConcatScalarCol(byte[] value, int colNum, int outputColumn) {
this();
this.colNum = colNum;
this.outputColumn = outputColumn;
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java?rev=1525057&r1=1525056&r2=1525057&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java Fri Sep 20 17:13:44 2013
@@ -85,6 +85,7 @@ import org.apache.hadoop.hive.ql.udf.UDF
import org.apache.hadoop.hive.ql.udf.UDFYear;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFConcat;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFLower;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
@@ -160,6 +161,7 @@ public class Vectorizer implements Physi
supportedGenericUDFs.add(UDFLike.class);
supportedGenericUDFs.add(GenericUDFLower.class);
supportedGenericUDFs.add(GenericUDFUpper.class);
+ supportedGenericUDFs.add(GenericUDFConcat.class);
supportedAggregationUdfs.add("min");
supportedAggregationUdfs.add("max");