You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/05/03 16:39:29 UTC
svn commit: r1478816 - in /hive/branches/vectorization/ql/src:
java/org/apache/hadoop/hive/ql/exec/vector/
java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/
test/org/apache/hadoop/hive/ql/exec/vector/
test/org/apache/hadoop/hive/ql/exec...
Author: hashutosh
Date: Fri May 3 14:39:29 2013
New Revision: 1478816
URL: http://svn.apache.org/r1478816
Log:
HIVE-4481 : Vectorized row batch should be initialized with additional columns to hold intermediate output. (Jitendra Nath Pandey via Ashutosh Chauhan)
Added:
hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java
Modified:
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/CodeGen.java
hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterExpressions.java
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java?rev=1478816&r1=1478815&r2=1478816&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java Fri May 3 14:39:29 2013
@@ -96,6 +96,8 @@ public class VectorMapOperator extends O
new HashMap<Operator<? extends OperatorDesc>, MapOpCtx>();
private ArrayList<Operator<? extends OperatorDesc>> extraChildrenToClose = null;
+ private VectorizationContext vectorizationContext = null;
+ private boolean outputColumnsInitialized = false;;
private static class MapInputPath {
String path;
@@ -500,9 +502,7 @@ public class VectorMapOperator extends O
Path onepath = new Path(new Path(onefile).toUri().getPath());
List<String> aliases = conf.getPathToAliases().get(onefile);
- VectorizationContext vectorizationContext = new VectorizationContext
- (columnMap,
- columnCount);
+ vectorizationContext = new VectorizationContext(columnMap, columnCount);
for (String onealias : aliases) {
Operator<? extends OperatorDesc> op = conf.getAliasToWork().get(
@@ -785,6 +785,21 @@ public class VectorMapOperator extends O
// So, use tblOI (and not partOI) for forwarding
try {
if (value instanceof VectorizedRowBatch) {
+ if (!outputColumnsInitialized ) {
+ VectorizedRowBatch vrg = (VectorizedRowBatch) value;
+ Map<Integer, String> outputColumnTypes =
+ vectorizationContext.getOutputColumnTypeMap();
+ if (!outputColumnTypes.isEmpty()) {
+ int origNumCols = vrg.numCols;
+ int newNumCols = vrg.cols.length+outputColumnTypes.keySet().size();
+ vrg.cols = Arrays.copyOf(vrg.cols, newNumCols);
+ for (int i = origNumCols; i < newNumCols; i++) {
+ vrg.cols[i] = vectorizationContext.allocateColumnVector(outputColumnTypes.get(i),
+ VectorizedRowBatch.DEFAULT_SIZE);
+ }
+ }
+ outputColumnsInitialized = true;
+ }
forward(value, null);
} else {
Object row = null;
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java?rev=1478816&r1=1478815&r2=1478816&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java Fri May 3 14:39:29 2013
@@ -20,14 +20,16 @@ package org.apache.hadoop.hive.ql.exec.v
import java.lang.reflect.Constructor;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.List;
import java.util.Map;
+import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.exec.UDF;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.ColumnExpression;
import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterExprAndExpr;
import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterExprOrExpr;
import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterNotExpr;
@@ -83,8 +85,6 @@ import org.apache.hadoop.hive.ql.udf.gen
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
/**
@@ -99,24 +99,80 @@ public class VectorizationContext {
//columnName to column position map
private final Map<String, Integer> columnMap;
- //Next column to be used for intermediate output
- private int nextOutputColumn;
+ private final int firstOutputColumnIndex;
+
private OperatorType opType;
//Map column number to type
- private final Map<Integer, String> outputColumnTypes;
+ private final OutputColumnManager ocm;
public VectorizationContext(Map<String, Integer> columnMap,
int initialOutputCol) {
this.columnMap = columnMap;
- this.nextOutputColumn = initialOutputCol;
- this.outputColumnTypes = new HashMap<Integer, String>();
+ this.ocm = new OutputColumnManager(initialOutputCol);
+ this.firstOutputColumnIndex = initialOutputCol;
}
-
- public int allocateOutputColumn (String columnName, String columnType) {
- int newColumnIndex = nextOutputColumn++;
- columnMap.put(columnName, newColumnIndex);
- outputColumnTypes.put(newColumnIndex, columnType);
- return newColumnIndex;
+
+
+ private class OutputColumnManager {
+ private final int initialOutputCol;
+ private int outputColCount = 0;
+
+ OutputColumnManager(int initialOutputCol) {
+ this.initialOutputCol = initialOutputCol;
+ }
+
+ //The complete list of output columns. These should be added to the
+ //Vectorized row batch for processing. The index in the row batch is
+ //equal to the index in this array plus initialOutputCol.
+ //Start with size 100 and double when needed.
+ private String [] outputColumnsTypes = new String[100];
+
+ private final Set<Integer> usedOutputColumns = new HashSet<Integer>();
+
+ int allocateOutputColumn(String columnType) {
+ return initialOutputCol + allocateOutputColumnInternal(columnType);
+ }
+
+ private int allocateOutputColumnInternal(String columnType) {
+ for (int i = 0; i < outputColCount; i++) {
+ if (usedOutputColumns.contains(i) ||
+ !(outputColumnsTypes)[i].equals(columnType)) {
+ continue;
+ }
+ //Use i
+ usedOutputColumns.add(i);
+ return i;
+ }
+ //Out of allocated columns
+ if (outputColCount < outputColumnsTypes.length) {
+ int newIndex = outputColCount;
+ outputColumnsTypes[outputColCount++] = columnType;
+ usedOutputColumns.add(newIndex);
+ return newIndex;
+ } else {
+ //Expand the array
+ outputColumnsTypes = Arrays.copyOf(outputColumnsTypes, 2*outputColCount);
+ int newIndex = outputColCount;
+ outputColumnsTypes[outputColCount++] = columnType;
+ usedOutputColumns.add(newIndex);
+ return newIndex;
+ }
+ }
+
+ void freeOutputColumn(int index) {
+ int colIndex = index-initialOutputCol;
+ if (colIndex >= 0) {
+ usedOutputColumns.remove(index-initialOutputCol);
+ }
+ }
+
+ String getOutputColumnType(int index) {
+ return outputColumnsTypes[index-initialOutputCol];
+ }
+
+ int getNumOfOutputColumn() {
+ return outputColCount;
+ }
}
public void setOperatorType(OperatorType opType) {
@@ -151,23 +207,30 @@ public class VectorizationContext {
return ret;
}
+ /**
+ * Returns a vector expression for a given expression
+ * description.
+ * @param exprDesc, Expression description
+ * @return {@link VectorExpression}
+ */
public VectorExpression getVectorExpression(ExprNodeDesc exprDesc) {
+ VectorExpression ve = null;
if (exprDesc instanceof ExprNodeColumnDesc) {
- return getVectorExpression((ExprNodeColumnDesc) exprDesc);
+ ve = getVectorExpression((ExprNodeColumnDesc) exprDesc);
} else if (exprDesc instanceof ExprNodeGenericFuncDesc) {
ExprNodeGenericFuncDesc expr = (ExprNodeGenericFuncDesc) exprDesc;
- return getVectorExpression(expr.getGenericUDF(),
+ ve = getVectorExpression(expr.getGenericUDF(),
expr.getChildExprs());
}
- return null;
+ System.out.println("VectorExpression = "+ve.toString());
+ return ve;
}
- public VectorExpression getUnaryMinusExpression(List<ExprNodeDesc> childExprList) {
+ private VectorExpression getUnaryMinusExpression(List<ExprNodeDesc> childExprList) {
ExprNodeDesc childExpr = childExprList.get(0);
int inputCol;
String colType;
VectorExpression v1 = null;
- int outputCol = this.nextOutputColumn++;
if (childExpr instanceof ExprNodeGenericFuncDesc) {
v1 = getVectorExpression(childExpr);
inputCol = v1.getOutputColumn();
@@ -179,8 +242,8 @@ public class VectorizationContext {
} else {
throw new RuntimeException("Expression not supported: "+childExpr);
}
+ int outputCol = ocm.allocateOutputColumn(colType);
String className = getNormalizedTypeName(colType) + "colUnaryMinus";
- this.nextOutputColumn = outputCol+1;
VectorExpression expr;
try {
expr = (VectorExpression) Class.forName(className).
@@ -190,11 +253,12 @@ public class VectorizationContext {
}
if (v1 != null) {
expr.setChildExpressions(new VectorExpression [] {v1});
+ ocm.freeOutputColumn(v1.getOutputColumn());
}
return expr;
}
- public VectorExpression getUnaryPlusExpression(List<ExprNodeDesc> childExprList) {
+ private VectorExpression getUnaryPlusExpression(List<ExprNodeDesc> childExprList) {
ExprNodeDesc childExpr = childExprList.get(0);
int inputCol;
String colType;
@@ -274,6 +338,9 @@ public class VectorizationContext {
ExprNodeDesc leftExpr = childExpr.get(0);
ExprNodeDesc rightExpr = childExpr.get(1);
+ VectorExpression v1 = null;
+ VectorExpression v2 = null;
+
VectorExpression expr = null;
if ( (leftExpr instanceof ExprNodeColumnDesc) &&
(rightExpr instanceof ExprNodeConstantDesc) ) {
@@ -284,7 +351,8 @@ public class VectorizationContext {
String scalarType = constDesc.getTypeString();
String className = getBinaryColumnScalarExpressionClassName(colType,
scalarType, method);
- int outputCol = this.nextOutputColumn++;
+ int outputCol = ocm.allocateOutputColumn(getOutputColType(colType,
+ scalarType, method));
try {
expr = (VectorExpression) Class.forName(className).
getDeclaredConstructors()[0].newInstance(inputCol,
@@ -301,7 +369,8 @@ public class VectorizationContext {
String scalarType = constDesc.getTypeString();
String className = getBinaryColumnScalarExpressionClassName(colType,
scalarType, method);
- int outputCol = this.nextOutputColumn++;
+ String outputColType = getOutputColType(colType, scalarType, method);
+ int outputCol = ocm.allocateOutputColumn(outputColType);
try {
expr = (VectorExpression) Class.forName(className).
getDeclaredConstructors()[0].newInstance(inputCol,
@@ -317,9 +386,10 @@ public class VectorizationContext {
int inputCol2 = columnMap.get(leftColDesc.getColumn());
String colType1 = rightColDesc.getTypeString();
String colType2 = leftColDesc.getTypeString();
+ String outputColType = getOutputColType(colType1, colType2, method);
String className = getBinaryColumnColumnExpressionClassName(colType1,
colType2, method);
- int outputCol = this.nextOutputColumn++;
+ int outputCol = ocm.allocateOutputColumn(outputColType);
try {
expr = (VectorExpression) Class.forName(className).
getDeclaredConstructors()[0].newInstance(inputCol1, inputCol2,
@@ -330,15 +400,15 @@ public class VectorizationContext {
} else if ((leftExpr instanceof ExprNodeGenericFuncDesc)
&& (rightExpr instanceof ExprNodeColumnDesc)) {
ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) rightExpr;
- int outputCol = this.nextOutputColumn++;
- VectorExpression v1 = getVectorExpression(leftExpr);
+ v1 = getVectorExpression(leftExpr);
int inputCol1 = v1.getOutputColumn();
int inputCol2 = columnMap.get(colDesc.getColumn());
String colType1 = v1.getOutputType();
String colType2 = colDesc.getTypeString();
+ String outputColType = getOutputColType(colType1, colType2, method);
String className = getBinaryColumnColumnExpressionClassName(colType1,
colType2, method);
- this.nextOutputColumn = outputCol+1;
+ int outputCol = ocm.allocateOutputColumn(outputColType);
try {
expr = (VectorExpression) Class.forName(className).
getDeclaredConstructors()[0].newInstance(inputCol1, inputCol2,
@@ -350,14 +420,14 @@ public class VectorizationContext {
} else if ((leftExpr instanceof ExprNodeGenericFuncDesc)
&& (rightExpr instanceof ExprNodeConstantDesc)) {
ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) rightExpr;
- int outputCol = this.nextOutputColumn++;
- VectorExpression v1 = getVectorExpression(leftExpr);
+ v1 = getVectorExpression(leftExpr);
int inputCol1 = v1.getOutputColumn();
String colType1 = v1.getOutputType();
String scalarType = constDesc.getTypeString();
+ String outputColType = getOutputColType(colType1, scalarType, method);
+ int outputCol = ocm.allocateOutputColumn(outputColType);
String className = getBinaryColumnScalarExpressionClassName(colType1,
scalarType, method);
- this.nextOutputColumn = outputCol+1;
try {
expr = (VectorExpression) Class.forName(className).
getDeclaredConstructors()[0].newInstance(inputCol1,
@@ -369,15 +439,15 @@ public class VectorizationContext {
} else if ((leftExpr instanceof ExprNodeColumnDesc)
&& (rightExpr instanceof ExprNodeGenericFuncDesc)) {
ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) leftExpr;
- int outputCol = this.nextOutputColumn++;
- VectorExpression v2 = getVectorExpression(rightExpr);
+ v2 = getVectorExpression(rightExpr);
int inputCol1 = columnMap.get(colDesc.getColumn());
int inputCol2 = v2.getOutputColumn();
String colType1 = colDesc.getTypeString();
String colType2 = v2.getOutputType();
+ String outputColType = getOutputColType(colType1, colType2, method);
+ int outputCol = ocm.allocateOutputColumn(outputColType);
String className = getBinaryColumnColumnExpressionClassName(colType1,
colType2, method);
- this.nextOutputColumn = outputCol+1;
try {
expr = (VectorExpression) Class.forName(className).
getDeclaredConstructors()[0].newInstance(inputCol1, inputCol2,
@@ -389,14 +459,14 @@ public class VectorizationContext {
} else if ((leftExpr instanceof ExprNodeConstantDesc)
&& (rightExpr instanceof ExprNodeGenericFuncDesc)) {
ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) leftExpr;
- int outputCol = this.nextOutputColumn++;
- VectorExpression v2 = getVectorExpression(rightExpr);
+ v2 = getVectorExpression(rightExpr);
int inputCol2 = v2.getOutputColumn();
String colType2 = v2.getOutputType();
String scalarType = constDesc.getTypeString();
+ String outputColType = getOutputColType(colType2, scalarType, method);
+ int outputCol = ocm.allocateOutputColumn(outputColType);
String className = getBinaryScalarColumnExpressionClassName(colType2,
scalarType, method);
- this.nextOutputColumn = outputCol+1;
try {
expr = (VectorExpression) Class.forName(className).
getDeclaredConstructors()[0].newInstance(inputCol2,
@@ -409,17 +479,16 @@ public class VectorizationContext {
&& (rightExpr instanceof ExprNodeGenericFuncDesc)) {
//For arithmetic expression, the child expressions must be materializing
//columns
- int outputCol = this.nextOutputColumn++;
- VectorExpression v1 = getVectorExpression(leftExpr);
- VectorExpression v2 = getVectorExpression(rightExpr);
+ v1 = getVectorExpression(leftExpr);
+ v2 = getVectorExpression(rightExpr);
int inputCol1 = v1.getOutputColumn();
int inputCol2 = v2.getOutputColumn();
String colType1 = v1.getOutputType();
String colType2 = v2.getOutputType();
+ String outputColType = getOutputColType(colType1, colType2, method);
+ int outputCol = ocm.allocateOutputColumn(outputColType);
String className = getBinaryColumnColumnExpressionClassName(colType1,
colType2, method);
- //Reclaim the output columns
- this.nextOutputColumn = outputCol+1;
try {
expr = (VectorExpression) Class.forName(className).
getDeclaredConstructors()[0].newInstance(inputCol1, inputCol2,
@@ -429,8 +498,14 @@ public class VectorizationContext {
}
expr.setChildExpressions(new VectorExpression [] {v1, v2});
}
+ //Reclaim output columns of children to be re-used later
+ if (v1 != null) {
+ ocm.freeOutputColumn(v1.getOutputColumn());
+ }
+ if (v2 != null) {
+ ocm.freeOutputColumn(v2.getOutputColumn());
+ }
return expr;
-
}
private VectorExpression getVectorExpression(GenericUDFOPOr udf,
@@ -543,6 +618,8 @@ public class VectorizationContext {
ExprNodeDesc rightExpr = childExpr.get(1);
VectorExpression expr = null;
+ VectorExpression v1 = null;
+ VectorExpression v2 = null;
if ( (leftExpr instanceof ExprNodeColumnDesc) &&
(rightExpr instanceof ExprNodeConstantDesc) ) {
ExprNodeColumnDesc leftColDesc = (ExprNodeColumnDesc) leftExpr;
@@ -593,14 +670,16 @@ public class VectorizationContext {
}
} else if ( (leftExpr instanceof ExprNodeGenericFuncDesc) &&
(rightExpr instanceof ExprNodeColumnDesc) ) {
- VectorExpression v1 = getVectorExpression((ExprNodeGenericFuncDesc) leftExpr);
- ExprNodeColumnDesc leftColDesc = (ExprNodeColumnDesc) leftExpr;
+ v1 = getVectorExpression((ExprNodeGenericFuncDesc) leftExpr);
+ ExprNodeColumnDesc leftColDesc = (ExprNodeColumnDesc) rightExpr;
int inputCol1 = v1.getOutputColumn();
int inputCol2 = columnMap.get(leftColDesc.getColumn());
String colType1 = v1.getOutputType();
String colType2 = leftColDesc.getTypeString();
String className = getFilterColumnColumnExpressionClassName(colType1,
colType2, opName);
+ System.out.println("In the context, Input column 1: "+inputCol1+
+ ", column 2: "+inputCol2);
try {
expr = (VectorExpression) Class.forName(className).
getDeclaredConstructors()[0].newInstance(inputCol1, inputCol2);
@@ -611,7 +690,7 @@ public class VectorizationContext {
} else if ( (leftExpr instanceof ExprNodeColumnDesc) &&
(rightExpr instanceof ExprNodeGenericFuncDesc) ) {
ExprNodeColumnDesc rightColDesc = (ExprNodeColumnDesc) leftExpr;
- VectorExpression v2 = getVectorExpression((ExprNodeGenericFuncDesc) rightExpr);
+ v2 = getVectorExpression((ExprNodeGenericFuncDesc) rightExpr);
int inputCol1 = columnMap.get(rightColDesc.getColumn());
int inputCol2 = v2.getOutputColumn();
String colType1 = rightColDesc.getTypeString();
@@ -627,8 +706,8 @@ public class VectorizationContext {
expr.setChildExpressions(new VectorExpression [] {v2});
} else if ( (leftExpr instanceof ExprNodeGenericFuncDesc) &&
(rightExpr instanceof ExprNodeConstantDesc) ) {
- VectorExpression v1 = getVectorExpression((ExprNodeGenericFuncDesc) leftExpr);
- ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) leftExpr;
+ v1 = getVectorExpression((ExprNodeGenericFuncDesc) leftExpr);
+ ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) rightExpr;
int inputCol1 = v1.getOutputColumn();
String colType1 = v1.getOutputType();
String scalarType = constDesc.getTypeString();
@@ -645,7 +724,7 @@ public class VectorizationContext {
} else if ( (leftExpr instanceof ExprNodeConstantDesc) &&
(rightExpr instanceof ExprNodeGenericFuncDesc) ) {
ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) leftExpr;
- VectorExpression v2 = getVectorExpression((ExprNodeGenericFuncDesc) rightExpr);
+ v2 = getVectorExpression((ExprNodeGenericFuncDesc) rightExpr);
int inputCol2 = v2.getOutputColumn();
String scalarType = constDesc.getTypeString();
String colType = v2.getOutputType();
@@ -662,8 +741,8 @@ public class VectorizationContext {
} else {
//For comparison expression, the child expressions must be materializing
//columns
- VectorExpression v1 = getVectorExpression(leftExpr);
- VectorExpression v2 = getVectorExpression(rightExpr);
+ v1 = getVectorExpression(leftExpr);
+ v2 = getVectorExpression(rightExpr);
int inputCol1 = v1.getOutputColumn();
int inputCol2 = v2.getOutputColumn();
String colType1 = v1.getOutputType();
@@ -678,6 +757,12 @@ public class VectorizationContext {
}
expr.setChildExpressions(new VectorExpression [] {v1, v2});
}
+ if (v1 != null) {
+ ocm.freeOutputColumn(v1.getOutputColumn());
+ }
+ if (v2 != null) {
+ ocm.freeOutputColumn(v2.getOutputColumn());
+ }
return expr;
}
@@ -774,6 +859,29 @@ public class VectorizationContext {
return b.toString();
}
+ private String getOutputColType(String inputType1, String inputType2, String method) {
+ if (method.equalsIgnoreCase("divide") || inputType1.equalsIgnoreCase("double") ||
+ inputType2.equalsIgnoreCase("double")) {
+ return "double";
+ } else {
+ if (inputType1.equalsIgnoreCase("string") || inputType2.equalsIgnoreCase("string")) {
+ return "string";
+ } else {
+ return "long";
+ }
+ }
+ }
+
+ private String getOutputColType(String inputType, String method) {
+ if (inputType.equalsIgnoreCase("float") || inputType.equalsIgnoreCase("double")) {
+ return "double";
+ } else if (inputType.equalsIgnoreCase("string")) {
+ return "string";
+ } else {
+ return "long";
+ }
+ }
+
static Object[][] aggregatesDefinition = {
{"min", "Long", VectorUDAFMinLong.class},
{"min", "Double", VectorUDAFMinDouble.class},
@@ -800,8 +908,8 @@ public class VectorizationContext {
{"stddev_samp","Long", VectorUDAFStdSampLong.class},
{"stddev_samp","Double",VectorUDAFStdSampDouble.class},
};
-
- public VectorAggregateExpression getAggregatorExpression(AggregationDesc desc)
+
+ public VectorAggregateExpression getAggregatorExpression(AggregationDesc desc)
throws HiveException {
ArrayList<ExprNodeDesc> paramDescList = desc.getParameters();
VectorExpression[] vectorParams = new VectorExpression[paramDescList.size()];
@@ -810,7 +918,7 @@ public class VectorizationContext {
ExprNodeDesc exprDesc = paramDescList.get(i);
vectorParams[i] = this.getVectorExpression(exprDesc);
}
-
+
String aggregateName = desc.getGenericUDAFName();
List<ExprNodeDesc> params = desc.getParameters();
//TODO: handle length != 1
@@ -821,43 +929,41 @@ public class VectorizationContext {
for (Object[] aggDef : aggregatesDefinition) {
if (aggDef[0].equals (aggregateName) &&
aggDef[1].equals(inputType)) {
- Class<? extends VectorAggregateExpression> aggClass =
+ Class<? extends VectorAggregateExpression> aggClass =
(Class<? extends VectorAggregateExpression>) (aggDef[2]);
try
{
- Constructor<? extends VectorAggregateExpression> ctor =
+ Constructor<? extends VectorAggregateExpression> ctor =
aggClass.getConstructor(VectorExpression.class);
VectorAggregateExpression aggExpr = ctor.newInstance(vectorParams[0]);
return aggExpr;
}
// TODO: change to 1.7 syntax when possible
- //catch (InvocationTargetException | IllegalAccessException
+ //catch (InvocationTargetException | IllegalAccessException
// | NoSuchMethodException | InstantiationException)
catch (Exception e)
{
- throw new HiveException("Internal exception for vector aggregate : \"" +
+ throw new HiveException("Internal exception for vector aggregate : \"" +
aggregateName + "\" for type: \"" + inputType + "", e);
}
}
}
- throw new HiveException("Vector aggregate not implemented: \"" + aggregateName +
+ throw new HiveException("Vector aggregate not implemented: \"" + aggregateName +
"\" for type: \"" + inputType + "");
}
-
+
static Object[][] columnTypes = {
{"Double", DoubleColumnVector.class},
{"Long", LongColumnVector.class},
{"String", BytesColumnVector.class},
};
- public VectorizedRowBatch allocateRowBatch(int rowCount) throws HiveException {
- VectorizedRowBatch ret = new VectorizedRowBatch(nextOutputColumn, rowCount);
- for (int i=0; i < nextOutputColumn; ++i) {
- if (false == outputColumnTypes.containsKey(i)) {
- continue;
- }
- String columnTypeName = outputColumnTypes.get(i);
+ private VectorizedRowBatch allocateRowBatch(int rowCount) throws HiveException {
+ int columnCount = firstOutputColumnIndex + ocm.getNumOfOutputColumn();
+ VectorizedRowBatch ret = new VectorizedRowBatch(columnCount, rowCount);
+ for (int i=0; i < columnCount; ++i) {
+ String columnTypeName = ocm.getOutputColumnType(i);
for (Object[] columnType: columnTypes) {
if (columnTypeName.equalsIgnoreCase((String)columnType[0])) {
Class<? extends ColumnVector> columnTypeClass = (Class<? extends ColumnVector>)columnType[1];
@@ -883,26 +989,23 @@ public class VectorizationContext {
{"long", PrimitiveObjectInspectorFactory.writableLongObjectInspector},
};
- public ObjectInspector getVectorRowObjectInspector(List<String> columnNames) throws HiveException {
- List<ObjectInspector> oids = new ArrayList<ObjectInspector>();
- for(String columnName: columnNames) {
- int columnIndex = columnMap.get(columnName);
- String outputType = outputColumnTypes.get(columnIndex);
- ObjectInspector oi = null;
- for(Object[] moi: mapObjectInspectors) {
- if (outputType.equalsIgnoreCase((String) moi[0])) {
- oi = (ObjectInspector) moi[1];
- break;
- }
- }
- if (oi == null) {
- throw new HiveException(String.format("Unsuported type: %s for column %d:%s",
- outputType, columnIndex, columnName));
- }
- oids.add(oi);
+ public Map<Integer, String> getOutputColumnTypeMap() {
+ Map<Integer, String> map = new HashMap<Integer, String>();
+ for (int i = 0; i < ocm.outputColCount; i++) {
+ String type = ocm.outputColumnsTypes[i];
+ map.put(i+this.firstOutputColumnIndex, type);
+ }
+ return map;
+ }
+
+ public ColumnVector allocateColumnVector(String type, int defaultSize) {
+ if (type.equalsIgnoreCase("double")) {
+ return new DoubleColumnVector(defaultSize);
+ } else if (type.equalsIgnoreCase("string")) {
+ return new BytesColumnVector(defaultSize);
+ } else {
+ return new LongColumnVector(defaultSize);
}
-
- return ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, oids);
}
}
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/CodeGen.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/CodeGen.java?rev=1478816&r1=1478815&r2=1478816&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/CodeGen.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/CodeGen.java Fri May 3 14:39:29 2013
@@ -260,14 +260,6 @@ public class CodeGen {
generateColumnArithmeticColumn(tdesc);
} else if (tdesc[0].equals("ColumnUnaryMinus")) {
generateColumnUnaryMinus(tdesc);
- } else if (tdesc[0].equals("VectorUDAFCount")) {
- generateVectorUDAFCount(tdesc);
- } else if (tdesc[0].equals("VectorUDAFSum")) {
- generateVectorUDAFSum(tdesc);
- } else if (tdesc[0].equals("VectorUDAFAvg")) {
- generateVectorUDAFAvg(tdesc);
- } else if (tdesc[0].equals("VectorUDAFVar")) {
- generateVectorUDAFVar(tdesc);
} else if (tdesc[0].equals("FilterStringColumnCompareScalar")) {
generateFilterStringColumnCompareScalar(tdesc);
} else {
Added: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java?rev=1478816&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java (added)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java Fri May 3 14:39:29 2013
@@ -0,0 +1,145 @@
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringColGreaterStringScalar;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColAddLongColumn;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColModuloLongColumn;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColMultiplyLongColumn;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.LongColSubtractLongColumn;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.plan.api.OperatorType;
+import org.apache.hadoop.hive.ql.udf.UDFOPMinus;
+import org.apache.hadoop.hive.ql.udf.UDFOPMod;
+import org.apache.hadoop.hive.ql.udf.UDFOPMultiply;
+import org.apache.hadoop.hive.ql.udf.UDFOPPlus;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan;
+import org.junit.Test;
+
+public class TestVectorizationContext {
+
+ @Test
+ public void testArithmeticExpressionVectorization() {
+ /**
+ * Create original expression tree for following
+ * (plus (minus (plus col1 col2) col3) (multiply col4 (mod col5 col6)) )
+ */
+ GenericUDFBridge udf1 = new GenericUDFBridge("+", true, UDFOPPlus.class);
+ GenericUDFBridge udf2 = new GenericUDFBridge("-", true, UDFOPMinus.class);
+ GenericUDFBridge udf3 = new GenericUDFBridge("*", true, UDFOPMultiply.class);
+ GenericUDFBridge udf4 = new GenericUDFBridge("+", true, UDFOPPlus.class);
+ GenericUDFBridge udf5 = new GenericUDFBridge("%", true, UDFOPMod.class);
+
+ ExprNodeGenericFuncDesc sumExpr = new ExprNodeGenericFuncDesc();
+ sumExpr.setGenericUDF(udf1);
+ ExprNodeGenericFuncDesc minusExpr = new ExprNodeGenericFuncDesc();
+ minusExpr.setGenericUDF(udf2);
+ ExprNodeGenericFuncDesc multiplyExpr = new ExprNodeGenericFuncDesc();
+ multiplyExpr.setGenericUDF(udf3);
+ ExprNodeGenericFuncDesc sum2Expr = new ExprNodeGenericFuncDesc();
+ sum2Expr.setGenericUDF(udf4);
+ ExprNodeGenericFuncDesc modExpr = new ExprNodeGenericFuncDesc();
+ modExpr.setGenericUDF(udf5);
+
+ ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Long.class, "col1", "table", false);
+ ExprNodeColumnDesc col2Expr = new ExprNodeColumnDesc(Long.class, "col2", "table", false);
+ ExprNodeColumnDesc col3Expr = new ExprNodeColumnDesc(Long.class, "col3", "table", false);
+ ExprNodeColumnDesc col4Expr = new ExprNodeColumnDesc(Long.class, "col4", "table", false);
+ ExprNodeColumnDesc col5Expr = new ExprNodeColumnDesc(Long.class, "col5", "table", false);
+ ExprNodeColumnDesc col6Expr = new ExprNodeColumnDesc(Long.class, "col6", "table", false);
+
+ List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>(2);
+ List<ExprNodeDesc> children2 = new ArrayList<ExprNodeDesc>(2);
+ List<ExprNodeDesc> children3 = new ArrayList<ExprNodeDesc>(2);
+ List<ExprNodeDesc> children4 = new ArrayList<ExprNodeDesc>(2);
+ List<ExprNodeDesc> children5 = new ArrayList<ExprNodeDesc>(2);
+
+ children1.add(minusExpr);
+ children1.add(multiplyExpr);
+ sumExpr.setChildExprs(children1);
+
+ children2.add(sum2Expr);
+ children2.add(col3Expr);
+ minusExpr.setChildExprs(children2);
+
+ children3.add(col1Expr);
+ children3.add(col2Expr);
+ sum2Expr.setChildExprs(children3);
+
+ children4.add(col4Expr);
+ children4.add(modExpr);
+ multiplyExpr.setChildExprs(children4);
+
+ children5.add(col5Expr);
+ children5.add(col6Expr);
+ modExpr.setChildExprs(children5);
+
+ Map<String, Integer> columnMap = new HashMap<String, Integer>();
+ columnMap.put("col1", 1);
+ columnMap.put("col2", 2);
+ columnMap.put("col3", 3);
+ columnMap.put("col4", 4);
+ columnMap.put("col5", 5);
+ columnMap.put("col6", 6);
+
+ //Generate vectorized expression
+ VectorizationContext vc = new VectorizationContext(columnMap, 6);
+
+ VectorExpression ve = vc.getVectorExpression(sumExpr);
+
+ //Verify vectorized expression
+ assertTrue(ve instanceof LongColAddLongColumn);
+ assertEquals(2, ve.getChildExpressions().length);
+ VectorExpression childExpr1 = ve.getChildExpressions()[0];
+ VectorExpression childExpr2 = ve.getChildExpressions()[1];
+ assertEquals(6, ve.getOutputColumn());
+
+ assertTrue(childExpr1 instanceof LongColSubtractLongColumn);
+ assertEquals(1, childExpr1.getChildExpressions().length);
+ assertTrue(childExpr1.getChildExpressions()[0] instanceof LongColAddLongColumn);
+ assertEquals(7, childExpr1.getOutputColumn());
+ assertEquals(6, childExpr1.getChildExpressions()[0].getOutputColumn());
+
+ assertTrue(childExpr2 instanceof LongColMultiplyLongColumn);
+ assertEquals(1, childExpr2.getChildExpressions().length);
+ assertTrue(childExpr2.getChildExpressions()[0] instanceof LongColModuloLongColumn);
+ assertEquals(8, childExpr2.getOutputColumn());
+ assertEquals(6, childExpr2.getChildExpressions()[0].getOutputColumn());
+ }
+
+ @Test
+ public void testStringFilterExpressions() {
+ ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(String.class, "col1", "table", false);
+ ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc("Alpha");
+
+ GenericUDFOPGreaterThan udf = new GenericUDFOPGreaterThan();
+ ExprNodeGenericFuncDesc exprDesc = new ExprNodeGenericFuncDesc();
+ exprDesc.setGenericUDF(udf);
+ List<ExprNodeDesc> children1 = new ArrayList<ExprNodeDesc>(2);
+ children1.add(col1Expr);
+ children1.add(constDesc);
+ exprDesc.setChildExprs(children1);
+
+ Map<String, Integer> columnMap = new HashMap<String, Integer>();
+ columnMap.put("col1", 1);
+ columnMap.put("col2", 2);
+
+ VectorizationContext vc = new VectorizationContext(columnMap, 2);
+ vc.setOperatorType(OperatorType.FILTER);
+
+ VectorExpression ve = vc.getVectorExpression(exprDesc);
+
+ assertTrue(ve instanceof FilterStringColGreaterStringScalar);
+ }
+}
Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterExpressions.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterExpressions.java?rev=1478816&r1=1478815&r2=1478816&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterExpressions.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterExpressions.java Fri May 3 14:39:29 2013
@@ -152,7 +152,7 @@ public class TestVectorFilterExpressions
//Basic case
lcv0.vector[0] = 10;
lcv0.vector[1] = 20;
- lcv0.vector[2] = 10;
+ lcv0.vector[2] = 9;
lcv0.vector[3] = 20;
lcv0.vector[4] = 10;
@@ -162,16 +162,9 @@ public class TestVectorFilterExpressions
lcv1.vector[3] = 10;
lcv1.vector[4] = 20;
- childExpr.evaluate(vrg);
-
- assertEquals(20, lcv2.vector[0]);
- assertEquals(30, lcv2.vector[1]);
- assertEquals(20, lcv2.vector[2]);
- assertEquals(30, lcv2.vector[3]);
- assertEquals(20, lcv2.vector[4]);
-
expr.evaluate(vrg);
- assertEquals(0, vrg.size);
+ assertEquals(1, vrg.size);
+ assertEquals(2, vrg.selected[0]);
}
}