You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/05/10 10:16:09 UTC
svn commit: r1480922 - in
/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec:
ExecDriver.java vector/VectorMapOperator.java
vector/VectorizationContext.java vector/VectorizedInputFormatInterface.java
Author: hashutosh
Date: Fri May 10 08:16:09 2013
New Revision: 1480922
URL: http://svn.apache.org/r1480922
Log:
HIVE-4457 : Queries not supported by vectorized code path should fall back to non vector path. (Jitendra Nath Pandey via Ashutosh Chauhan)
Added:
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedInputFormatInterface.java
Modified:
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java?rev=1480922&r1=1480921&r2=1480922&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java Fri May 10 08:16:09 2013
@@ -26,6 +26,7 @@ import java.io.Serializable;
import java.lang.management.ManagementFactory;
import java.lang.management.MemoryMXBean;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Enumeration;
@@ -56,6 +57,9 @@ import org.apache.hadoop.hive.ql.ErrorMs
import org.apache.hadoop.hive.ql.QueryPlan;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter;
import org.apache.hadoop.hive.ql.exec.vector.VectorExecMapper;
+import org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface;
import org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
import org.apache.hadoop.hive.ql.io.HiveKey;
import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
@@ -280,8 +284,14 @@ public class ExecDriver extends Task<Map
HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED);
if (vectorPath) {
- System.out.println("Going down the vectorization path");
- job.setMapperClass(VectorExecMapper.class);
+ if (validateVectorPath()) {
+ System.out.println("Going down the vectorization path");
+ job.setMapperClass(VectorExecMapper.class);
+ } else {
+ //fall back to non-vector mode
+ HiveConf.setBoolVar(job, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, false);
+ job.setMapperClass(ExecMapper.class);
+ }
} else {
job.setMapperClass(ExecMapper.class);
}
@@ -522,6 +532,68 @@ public class ExecDriver extends Task<Map
return (returnVal);
}
+ private boolean validateVectorPath() {
+ System.out.println("Validating if vectorized execution is applicable");
+ LOG.info("Validating if vectorized execution is applicable");
+ MapredWork thePlan = this.getWork();
+
+ for (String path : thePlan.getPathToPartitionInfo().keySet()) {
+ PartitionDesc pd = thePlan.getPathToPartitionInfo().get(path);
+ List<Class<?>> interfaceList =
+ Arrays.asList(pd.getInputFileFormatClass().getInterfaces());
+ if (!interfaceList.contains(VectorizedInputFormatInterface.class)) {
+ System.out.println("Input format: " + pd.getInputFileFormatClassName()
+ + ", doesn't provide vectorized input");
+ LOG.info("Input format: " + pd.getInputFileFormatClassName()
+ + ", doesn't provide vectorized input");
+ return false;
+ }
+ }
+ VectorizationContext vc = new VectorizationContext(null, 0);
+ for (String onefile : thePlan.getPathToAliases().keySet()) {
+ List<String> aliases = thePlan.getPathToAliases().get(onefile);
+ for (String onealias : aliases) {
+ Operator<? extends OperatorDesc> op = thePlan.getAliasToWork().get(
+ onealias);
+ Operator<? extends OperatorDesc> vectorOp = null;
+ try {
+ vectorOp = VectorMapOperator.vectorizeOperator(op, vc);
+ } catch (Exception e) {
+ LOG.info("Cannot vectorize the plan", e);
+ System.out.println("Cannot vectorize the plan: "+ e);
+ return false;
+ }
+ if (vectorOp == null) {
+ LOG.info("Cannot vectorize the plan");
+ System.out.println("Cannot vectorize the plan");
+ return false;
+ }
+ //verify the expressions contained in the operators
+ try {
+ validateVectorOperator(vectorOp);
+ } catch (HiveException e) {
+ LOG.info("Cannot vectorize the plan", e);
+ System.out.println("Cannot vectorize the plan");
+ return false;
+ }
+ }
+ }
+ System.out.println("Query can be vectorized");
+ return true;
+ }
+
+ private void validateVectorOperator(Operator<? extends OperatorDesc> vectorOp)
+ throws HiveException {
+ if (!vectorOp.getName().equals("TS")) {
+ vectorOp.initialize(job, null);
+ }
+ if (vectorOp.getChildOperators() != null) {
+ for (Operator<? extends OperatorDesc> vop : vectorOp.getChildOperators()) {
+ validateVectorOperator(vop);
+ }
+ }
+ }
+
/**
* Set hive input format, and input format file if necessary.
*/
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java?rev=1480922&r1=1480921&r2=1480922&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java Fri May 10 08:16:09 2013
@@ -35,14 +35,12 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.ExecMapperContext;
import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
import org.apache.hadoop.hive.ql.plan.MapredWork;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.PartitionDesc;
import org.apache.hadoop.hive.ql.plan.TableDesc;
-import org.apache.hadoop.hive.ql.plan.TableScanDesc;
import org.apache.hadoop.hive.ql.plan.api.OperatorType;
import org.apache.hadoop.hive.serde2.Deserializer;
import org.apache.hadoop.hive.serde2.SerDeException;
@@ -54,7 +52,6 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
@@ -149,10 +146,10 @@ public class VectorMapOperator extends O
private final boolean isPartitioned;
private final StructObjectInspector tblRawRowObjectInspector; // without partition
private final StructObjectInspector partObjectInspector; // partition
- private StructObjectInspector rowObjectInspector;
+ private final StructObjectInspector rowObjectInspector;
private final Converter partTblObjectInspectorConverter;
private final Object[] rowWithPart;
- private Object[] rowWithPartAndVC;
+ private final Object[] rowWithPartAndVC;
private final Deserializer deserializer;
private String tableName;
private String partName;
@@ -325,73 +322,6 @@ public class VectorMapOperator extends O
return opCtx;
}
- /**
- * Set the inspectors given a input. Since a mapper can span multiple partitions, the inspectors
- * need to be changed if the input changes
- **/
- private void setInspectorInput(MapInputPath inp) {
- Operator<? extends OperatorDesc> op = inp.getOp();
-
- deserializer = opCtxMap.get(inp).getDeserializer();
- isPartitioned = opCtxMap.get(inp).isPartitioned();
- rowWithPart = opCtxMap.get(inp).getRowWithPart();
- rowWithPartAndVC = opCtxMap.get(inp).getRowWithPartAndVC();
- tblRowObjectInspector = opCtxMap.get(inp).getRowObjectInspector();
- partTblObjectInspectorConverter = opCtxMap.get(inp).getPartTblObjectInspectorConverter();
- if (listInputPaths.contains(inp)) {
- return;
- }
-
- listInputPaths.add(inp);
-
- // The op may not be a TableScan for mapjoins
- // Consider the query: select /*+MAPJOIN(a)*/ count(*) FROM T1 a JOIN T2 b ON a.key = b.key;
- // In that case, it will be a Select, but the rowOI need not be ammended
- if (op instanceof TableScanOperator) {
- StructObjectInspector tblRawRowObjectInspector =
- opCtxMap.get(inp).getTblRawRowObjectInspector();
- StructObjectInspector partObjectInspector = opCtxMap.get(inp).partObjectInspector;
- TableScanOperator tsOp = (TableScanOperator) op;
- TableScanDesc tsDesc = tsOp.getConf();
- if (tsDesc != null) {
- this.vcs = tsDesc.getVirtualCols();
- if (vcs != null && vcs.size() > 0) {
- List<String> vcNames = new ArrayList<String>(vcs.size());
- this.vcValues = new Writable[vcs.size()];
- List<ObjectInspector> vcsObjectInspectors = new ArrayList<ObjectInspector>(vcs.size());
- for (int i = 0; i < vcs.size(); i++) {
- VirtualColumn vc = vcs.get(i);
- vcsObjectInspectors.add(
- PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
- ((PrimitiveTypeInfo) vc.getTypeInfo()).getPrimitiveCategory()));
- vcNames.add(vc.getName());
- }
- StructObjectInspector vcStructObjectInspector = ObjectInspectorFactory
- .getStandardStructObjectInspector(vcNames,
- vcsObjectInspectors);
- if (isPartitioned) {
- this.rowWithPartAndVC = new Object[3];
- this.rowWithPartAndVC[1] = this.rowWithPart[1];
- } else {
- this.rowWithPartAndVC = new Object[2];
- }
- if (partObjectInspector == null) {
- this.tblRowObjectInspector = ObjectInspectorFactory.getUnionStructObjectInspector(Arrays
- .asList(new StructObjectInspector[] {
- tblRowObjectInspector, vcStructObjectInspector}));
- } else {
- this.tblRowObjectInspector = ObjectInspectorFactory.getUnionStructObjectInspector(Arrays
- .asList(new StructObjectInspector[] {
- tblRawRowObjectInspector, partObjectInspector,
- vcStructObjectInspector}));
- }
- opCtxMap.get(inp).rowObjectInspector = this.tblRowObjectInspector;
- opCtxMap.get(inp).rowWithPartAndVC = this.rowWithPartAndVC;
- }
- }
- }
- }
-
// Return the mapping for table descriptor to the expected table OI
/**
* Traverse all the partitions for a table, and get the OI for the table.
@@ -532,7 +462,6 @@ public class VectorMapOperator extends O
LOG.info("dump " + op.getName() + " "
+ opCtxMap.get(inp).getRowObjectInspector().getTypeName());
}
- setInspectorInput(inp);
}
}
@@ -551,9 +480,9 @@ public class VectorMapOperator extends O
}
}
- private Operator<? extends OperatorDesc> vectorizeOperator
+ public static Operator<? extends OperatorDesc> vectorizeOperator
(Operator<? extends OperatorDesc> op, VectorizationContext
- vectorizationContext) throws HiveException {
+ vectorizationContext) throws HiveException, CloneNotSupportedException {
Operator<? extends OperatorDesc> vectorOp;
boolean recursive = true;
@@ -574,7 +503,7 @@ public class VectorMapOperator extends O
op.getConf());
break;
case TABLESCAN:
- vectorOp = op;
+ vectorOp = op.clone();
break;
default:
throw new HiveException("Operator: " + op.getName() + ", " +
@@ -587,8 +516,8 @@ public class VectorMapOperator extends O
List<Operator<? extends OperatorDesc>> vectorizedChildren = new
ArrayList<Operator<? extends OperatorDesc>>(children.size());
for (Operator<? extends OperatorDesc> childOp : children) {
- Operator<? extends OperatorDesc> vectorizedChild = this
- .vectorizeOperator(childOp, vectorizationContext);
+ Operator<? extends OperatorDesc> vectorizedChild =
+ vectorizeOperator(childOp, vectorizationContext);
List<Operator<? extends OperatorDesc>> parentList =
new ArrayList<Operator<? extends OperatorDesc>>();
parentList.add(vectorOp);
@@ -695,25 +624,7 @@ public class VectorMapOperator extends O
// multiple files/partitions.
@Override
public void cleanUpInputFileChangedOp() throws HiveException {
- Path fpath = new Path((new Path(this.getExecContext().getCurrentInputFile()))
- .toUri().getPath());
-
- for (String onefile : conf.getPathToAliases().keySet()) {
- Path onepath = new Path(new Path(onefile).toUri().getPath());
- // check for the operators who will process rows coming to this Map
- // Operator
- if (!onepath.toUri().relativize(fpath.toUri()).equals(fpath.toUri())) {
- String onealias = conf.getPathToAliases().get(onefile).get(0);
- Operator<? extends OperatorDesc> op =
- conf.getAliasToWork().get(onealias);
-
- LOG.info("Processing alias " + onealias + " for file " + onefile);
- MapInputPath inp = new MapInputPath(onefile, onealias, op);
- setInspectorInput(inp);
- break;
- }
- }
}
public static Writable[] populateVirtualColumnValues(ExecMapperContext ctx,
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java?rev=1480922&r1=1480921&r2=1480922&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java Fri May 10 08:16:09 2013
@@ -112,6 +112,14 @@ public class VectorizationContext {
this.firstOutputColumnIndex = initialOutputCol;
}
+ private int getInputColumnIndex(String name) {
+ if (columnMap == null) {
+ //Null is treated as test call, is used for validation test.
+ return 0;
+ } else {
+ return columnMap.get(name);
+ }
+ }
private class OutputColumnManager {
private final int initialOutputCol;
@@ -182,7 +190,7 @@ public class VectorizationContext {
private VectorExpression getVectorExpression(ExprNodeColumnDesc
exprDesc) {
- int columnNum = columnMap.get(exprDesc.getColumn());
+ int columnNum = getInputColumnIndex(exprDesc.getColumn());
VectorExpression expr = null;
switch (opType) {
case FILTER:
@@ -198,7 +206,7 @@ public class VectorizationContext {
return expr;
}
- public VectorExpression[] getVectorExpressions(List<ExprNodeDesc> exprNodes) {
+ public VectorExpression[] getVectorExpressions(List<ExprNodeDesc> exprNodes) throws HiveException {
int i = 0;
VectorExpression[] ret = new VectorExpression[exprNodes.size()];
for (ExprNodeDesc e : exprNodes) {
@@ -212,8 +220,9 @@ public class VectorizationContext {
* description.
* @param exprDesc, Expression description
* @return {@link VectorExpression}
+ * @throws HiveException
*/
- public VectorExpression getVectorExpression(ExprNodeDesc exprDesc) {
+ public VectorExpression getVectorExpression(ExprNodeDesc exprDesc) throws HiveException {
VectorExpression ve = null;
if (exprDesc instanceof ExprNodeColumnDesc) {
ve = getVectorExpression((ExprNodeColumnDesc) exprDesc);
@@ -221,12 +230,22 @@ public class VectorizationContext {
ExprNodeGenericFuncDesc expr = (ExprNodeGenericFuncDesc) exprDesc;
ve = getVectorExpression(expr.getGenericUDF(),
expr.getChildExprs());
+ } else if (exprDesc instanceof ExprNodeConstantDesc) {
+ ve = getConstantVectorExpression((ExprNodeConstantDesc) exprDesc);
+ }
+ if (ve == null) {
+ throw new HiveException("Could not vectorize expression: "+exprDesc.getName());
}
- System.out.println("VectorExpression = "+ve.toString());
return ve;
}
- private VectorExpression getUnaryMinusExpression(List<ExprNodeDesc> childExprList) {
+ private VectorExpression getConstantVectorExpression(ExprNodeConstantDesc exprDesc)
+ throws HiveException {
+ return null;
+ }
+
+ private VectorExpression getUnaryMinusExpression(List<ExprNodeDesc> childExprList)
+ throws HiveException {
ExprNodeDesc childExpr = childExprList.get(0);
int inputCol;
String colType;
@@ -240,7 +259,7 @@ public class VectorizationContext {
inputCol = columnMap.get(colDesc.getColumn());
colType = colDesc.getTypeString();
} else {
- throw new RuntimeException("Expression not supported: "+childExpr);
+ throw new HiveException("Expression not supported: "+childExpr);
}
int outputCol = ocm.allocateOutputColumn(colType);
String className = getNormalizedTypeName(colType) + "colUnaryMinus";
@@ -249,7 +268,7 @@ public class VectorizationContext {
expr = (VectorExpression) Class.forName(className).
getDeclaredConstructors()[0].newInstance(inputCol, outputCol);
} catch (Exception ex) {
- throw new RuntimeException((ex));
+ throw new HiveException(ex);
}
if (v1 != null) {
expr.setChildExpressions(new VectorExpression [] {v1});
@@ -258,7 +277,8 @@ public class VectorizationContext {
return expr;
}
- private VectorExpression getUnaryPlusExpression(List<ExprNodeDesc> childExprList) {
+ private VectorExpression getUnaryPlusExpression(List<ExprNodeDesc> childExprList)
+ throws HiveException {
ExprNodeDesc childExpr = childExprList.get(0);
int inputCol;
String colType;
@@ -269,10 +289,10 @@ public class VectorizationContext {
colType = v1.getOutputType();
} else if (childExpr instanceof ExprNodeColumnDesc) {
ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) childExpr;
- inputCol = columnMap.get(colDesc.getColumn());
+ inputCol = getInputColumnIndex(colDesc.getColumn());
colType = colDesc.getTypeString();
} else {
- throw new RuntimeException("Expression not supported: "+childExpr);
+ throw new HiveException("Expression not supported: "+childExpr);
}
VectorExpression expr = new IdentityExpression(inputCol, colType);
if (v1 != null) {
@@ -282,7 +302,7 @@ public class VectorizationContext {
}
private VectorExpression getVectorExpression(GenericUDF udf,
- List<ExprNodeDesc> childExpr) {
+ List<ExprNodeDesc> childExpr) throws HiveException {
if (udf instanceof GenericUDFOPLessThan) {
return getVectorBinaryComparisonFilterExpression("Less", childExpr);
} else if (udf instanceof GenericUDFOPEqualOrLessThan) {
@@ -308,11 +328,11 @@ public class VectorizationContext {
} else if (udf instanceof GenericUDFBridge) {
return getVectorExpression((GenericUDFBridge) udf, childExpr);
}
- return null;
+ throw new HiveException("Udf: "+udf.getClass().getSimpleName()+", is not supported");
}
private VectorExpression getVectorExpression(GenericUDFBridge udf,
- List<ExprNodeDesc> childExpr) {
+ List<ExprNodeDesc> childExpr) throws HiveException {
Class<? extends UDF> cl = udf.getUdfClass();
// (UDFBaseNumericOp.class.isAssignableFrom(cl)) == true
if (cl.equals(UDFOPPlus.class)) {
@@ -330,11 +350,11 @@ public class VectorizationContext {
} else if (cl.equals(UDFOPPositive.class)) {
return getUnaryPlusExpression(childExpr);
}
- return null;
+ throw new HiveException("Udf: "+udf.getClass().getSimpleName()+", is not supported");
}
private VectorExpression getBinaryArithmeticExpression(String method,
- List<ExprNodeDesc> childExpr) {
+ List<ExprNodeDesc> childExpr) throws HiveException {
ExprNodeDesc leftExpr = childExpr.get(0);
ExprNodeDesc rightExpr = childExpr.get(1);
@@ -346,7 +366,7 @@ public class VectorizationContext {
(rightExpr instanceof ExprNodeConstantDesc) ) {
ExprNodeColumnDesc leftColDesc = (ExprNodeColumnDesc) leftExpr;
ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) rightExpr;
- int inputCol = columnMap.get(leftColDesc.getColumn());
+ int inputCol = getInputColumnIndex(leftColDesc.getColumn());
String colType = leftColDesc.getTypeString();
String scalarType = constDesc.getTypeString();
String className = getBinaryColumnScalarExpressionClassName(colType,
@@ -358,13 +378,13 @@ public class VectorizationContext {
getDeclaredConstructors()[0].newInstance(inputCol,
getScalarValue(constDesc), outputCol);
} catch (Exception ex) {
- throw new RuntimeException((ex));
+ throw new HiveException(ex);
}
} else if ( (rightExpr instanceof ExprNodeColumnDesc) &&
(leftExpr instanceof ExprNodeConstantDesc) ) {
ExprNodeColumnDesc rightColDesc = (ExprNodeColumnDesc) rightExpr;
ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) leftExpr;
- int inputCol = columnMap.get(rightColDesc.getColumn());
+ int inputCol = getInputColumnIndex(rightColDesc.getColumn());
String colType = rightColDesc.getTypeString();
String scalarType = constDesc.getTypeString();
String className = getBinaryColumnScalarExpressionClassName(colType,
@@ -376,14 +396,14 @@ public class VectorizationContext {
getDeclaredConstructors()[0].newInstance(inputCol,
getScalarValue(constDesc), outputCol);
} catch (Exception ex) {
- throw new RuntimeException((ex));
+ throw new HiveException(ex);
}
} else if ( (rightExpr instanceof ExprNodeColumnDesc) &&
(leftExpr instanceof ExprNodeColumnDesc) ) {
ExprNodeColumnDesc rightColDesc = (ExprNodeColumnDesc) rightExpr;
ExprNodeColumnDesc leftColDesc = (ExprNodeColumnDesc) leftExpr;
- int inputCol1 = columnMap.get(rightColDesc.getColumn());
- int inputCol2 = columnMap.get(leftColDesc.getColumn());
+ int inputCol1 = getInputColumnIndex(rightColDesc.getColumn());
+ int inputCol2 = getInputColumnIndex(leftColDesc.getColumn());
String colType1 = rightColDesc.getTypeString();
String colType2 = leftColDesc.getTypeString();
String outputColType = getOutputColType(colType1, colType2, method);
@@ -395,14 +415,14 @@ public class VectorizationContext {
getDeclaredConstructors()[0].newInstance(inputCol1, inputCol2,
outputCol);
} catch (Exception ex) {
- throw new RuntimeException((ex));
+ throw new HiveException(ex);
}
} else if ((leftExpr instanceof ExprNodeGenericFuncDesc)
&& (rightExpr instanceof ExprNodeColumnDesc)) {
ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) rightExpr;
v1 = getVectorExpression(leftExpr);
int inputCol1 = v1.getOutputColumn();
- int inputCol2 = columnMap.get(colDesc.getColumn());
+ int inputCol2 = getInputColumnIndex(colDesc.getColumn());
String colType1 = v1.getOutputType();
String colType2 = colDesc.getTypeString();
String outputColType = getOutputColType(colType1, colType2, method);
@@ -414,7 +434,7 @@ public class VectorizationContext {
getDeclaredConstructors()[0].newInstance(inputCol1, inputCol2,
outputCol);
} catch (Exception ex) {
- throw new RuntimeException((ex));
+ throw new HiveException((ex));
}
expr.setChildExpressions(new VectorExpression [] {v1});
} else if ((leftExpr instanceof ExprNodeGenericFuncDesc)
@@ -433,14 +453,14 @@ public class VectorizationContext {
getDeclaredConstructors()[0].newInstance(inputCol1,
getScalarValue(constDesc), outputCol);
} catch (Exception ex) {
- throw new RuntimeException((ex));
+ throw new HiveException((ex));
}
expr.setChildExpressions(new VectorExpression [] {v1});
} else if ((leftExpr instanceof ExprNodeColumnDesc)
&& (rightExpr instanceof ExprNodeGenericFuncDesc)) {
ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) leftExpr;
v2 = getVectorExpression(rightExpr);
- int inputCol1 = columnMap.get(colDesc.getColumn());
+ int inputCol1 = getInputColumnIndex(colDesc.getColumn());
int inputCol2 = v2.getOutputColumn();
String colType1 = colDesc.getTypeString();
String colType2 = v2.getOutputType();
@@ -453,7 +473,7 @@ public class VectorizationContext {
getDeclaredConstructors()[0].newInstance(inputCol1, inputCol2,
outputCol);
} catch (Exception ex) {
- throw new RuntimeException((ex));
+ throw new HiveException(ex);
}
expr.setChildExpressions(new VectorExpression [] {v2});
} else if ((leftExpr instanceof ExprNodeConstantDesc)
@@ -472,7 +492,7 @@ public class VectorizationContext {
getDeclaredConstructors()[0].newInstance(inputCol2,
getScalarValue(constDesc), outputCol);
} catch (Exception ex) {
- throw new RuntimeException((ex));
+ throw new HiveException(ex);
}
expr.setChildExpressions(new VectorExpression [] {v2});
} else if ((leftExpr instanceof ExprNodeGenericFuncDesc)
@@ -494,7 +514,7 @@ public class VectorizationContext {
getDeclaredConstructors()[0].newInstance(inputCol1, inputCol2,
outputCol);
} catch (Exception ex) {
- throw new RuntimeException((ex));
+ throw new HiveException(ex);
}
expr.setChildExpressions(new VectorExpression [] {v1, v2});
}
@@ -509,7 +529,7 @@ public class VectorizationContext {
}
private VectorExpression getVectorExpression(GenericUDFOPOr udf,
- List<ExprNodeDesc> childExpr) {
+ List<ExprNodeDesc> childExpr) throws HiveException {
ExprNodeDesc leftExpr = childExpr.get(0);
ExprNodeDesc rightExpr = childExpr.get(1);
@@ -517,7 +537,7 @@ public class VectorizationContext {
VectorExpression ve2;
if (leftExpr instanceof ExprNodeColumnDesc) {
ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) leftExpr;
- int inputCol = columnMap.get(colDesc.getColumn());
+ int inputCol = getInputColumnIndex(colDesc.getColumn());
ve1 = new SelectColumnIsTrue(inputCol);
} else {
ve1 = getVectorExpression(leftExpr);
@@ -525,7 +545,7 @@ public class VectorizationContext {
if (rightExpr instanceof ExprNodeColumnDesc) {
ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) rightExpr;
- int inputCol = columnMap.get(colDesc.getColumn());
+ int inputCol = getInputColumnIndex(colDesc.getColumn());
ve2 = new SelectColumnIsTrue(inputCol);
} else {
ve2 = getVectorExpression(leftExpr);
@@ -535,22 +555,21 @@ public class VectorizationContext {
}
private VectorExpression getVectorExpression(GenericUDFOPNot udf,
- List<ExprNodeDesc> childExpr) {
+ List<ExprNodeDesc> childExpr) throws HiveException {
ExprNodeDesc expr = childExpr.get(0);
if (expr instanceof ExprNodeColumnDesc) {
ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) expr;
- int inputCol = columnMap.get(colDesc.getColumn());
+ int inputCol = getInputColumnIndex(colDesc.getColumn());
VectorExpression ve = new SelectColumnIsFalse(inputCol);
return ve;
} else {
VectorExpression ve = getVectorExpression(expr);
- new FilterNotExpr(ve);
+ return new FilterNotExpr(ve);
}
- return null;
}
private VectorExpression getVectorExpression(GenericUDFOPAnd udf,
- List<ExprNodeDesc> childExpr) {
+ List<ExprNodeDesc> childExpr) throws HiveException {
ExprNodeDesc leftExpr = childExpr.get(0);
ExprNodeDesc rightExpr = childExpr.get(1);
@@ -558,7 +577,7 @@ public class VectorizationContext {
VectorExpression ve2;
if (leftExpr instanceof ExprNodeColumnDesc) {
ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) leftExpr;
- int inputCol = columnMap.get(colDesc.getColumn());
+ int inputCol = getInputColumnIndex(colDesc.getColumn());
ve1 = new SelectColumnIsTrue(inputCol);
} else {
ve1 = getVectorExpression(leftExpr);
@@ -566,7 +585,7 @@ public class VectorizationContext {
if (rightExpr instanceof ExprNodeColumnDesc) {
ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) rightExpr;
- int inputCol = columnMap.get(colDesc.getColumn());
+ int inputCol = getInputColumnIndex(colDesc.getColumn());
ve2 = new SelectColumnIsTrue(inputCol);
} else {
ve2 = getVectorExpression(leftExpr);
@@ -576,31 +595,30 @@ public class VectorizationContext {
}
private VectorExpression getVectorExpression(GenericUDFOPNull udf,
- List<ExprNodeDesc> childExpr) {
+ List<ExprNodeDesc> childExpr) throws HiveException {
ExprNodeDesc expr = childExpr.get(0);
VectorExpression ve = null;
if (expr instanceof ExprNodeColumnDesc) {
ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) expr;
- int inputCol = columnMap.get(colDesc.getColumn());
+ int inputCol = getInputColumnIndex(colDesc.getColumn());
ve = new SelectColumnIsNull(inputCol);
} else {
- //TODO
+ throw new HiveException("Not supported");
}
return ve;
}
private VectorExpression getVectorExpression(GenericUDFOPNotNull udf,
- List<ExprNodeDesc> childExpr) {
+ List<ExprNodeDesc> childExpr) throws HiveException {
ExprNodeDesc expr = childExpr.get(0);
if (expr instanceof ExprNodeColumnDesc) {
ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) expr;
- int inputCol = columnMap.get(colDesc.getColumn());
+ int inputCol = getInputColumnIndex(colDesc.getColumn());
VectorExpression ve = new SelectColumnIsNotNull(inputCol);
return ve;
} else {
- //TODO
+ throw new HiveException("Not supported");
}
- return null;
}
private Object getScalarValue(ExprNodeConstantDesc constDesc) {
@@ -612,7 +630,7 @@ public class VectorizationContext {
}
private VectorExpression getVectorBinaryComparisonFilterExpression(String
- opName, List<ExprNodeDesc> childExpr) {
+ opName, List<ExprNodeDesc> childExpr) throws HiveException {
ExprNodeDesc leftExpr = childExpr.get(0);
ExprNodeDesc rightExpr = childExpr.get(1);
@@ -624,7 +642,7 @@ public class VectorizationContext {
(rightExpr instanceof ExprNodeConstantDesc) ) {
ExprNodeColumnDesc leftColDesc = (ExprNodeColumnDesc) leftExpr;
ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) rightExpr;
- int inputCol = columnMap.get(leftColDesc.getColumn());
+ int inputCol = getInputColumnIndex(leftColDesc.getColumn());
String colType = leftColDesc.getTypeString();
String scalarType = constDesc.getTypeString();
String className = getFilterColumnScalarExpressionClassName(colType,
@@ -634,13 +652,13 @@ public class VectorizationContext {
getDeclaredConstructors()[0].newInstance(inputCol,
getScalarValue(constDesc));
} catch (Exception ex) {
- throw new RuntimeException((ex));
+ throw new HiveException(ex);
}
} else if ( (rightExpr instanceof ExprNodeColumnDesc) &&
(leftExpr instanceof ExprNodeConstantDesc) ) {
ExprNodeColumnDesc rightColDesc = (ExprNodeColumnDesc) rightExpr;
ExprNodeConstantDesc constDesc = (ExprNodeConstantDesc) leftExpr;
- int inputCol = columnMap.get(rightColDesc.getColumn());
+ int inputCol = getInputColumnIndex(rightColDesc.getColumn());
String colType = rightColDesc.getTypeString();
String scalarType = constDesc.getTypeString();
String className = getFilterColumnScalarExpressionClassName(colType,
@@ -650,14 +668,14 @@ public class VectorizationContext {
getDeclaredConstructors()[0].newInstance(inputCol,
getScalarValue(constDesc));
} catch (Exception ex) {
- throw new RuntimeException((ex));
+ throw new HiveException(ex);
}
} else if ( (rightExpr instanceof ExprNodeColumnDesc) &&
(leftExpr instanceof ExprNodeColumnDesc) ) {
ExprNodeColumnDesc rightColDesc = (ExprNodeColumnDesc) rightExpr;
ExprNodeColumnDesc leftColDesc = (ExprNodeColumnDesc) leftExpr;
- int inputCol1 = columnMap.get(rightColDesc.getColumn());
- int inputCol2 = columnMap.get(leftColDesc.getColumn());
+ int inputCol1 = getInputColumnIndex(rightColDesc.getColumn());
+ int inputCol2 = getInputColumnIndex(leftColDesc.getColumn());
String colType1 = rightColDesc.getTypeString();
String colType2 = leftColDesc.getTypeString();
String className = getFilterColumnColumnExpressionClassName(colType1,
@@ -666,14 +684,14 @@ public class VectorizationContext {
expr = (VectorExpression) Class.forName(className).
getDeclaredConstructors()[0].newInstance(inputCol1, inputCol2);
} catch (Exception ex) {
- throw new RuntimeException((ex));
+ throw new HiveException(ex);
}
} else if ( (leftExpr instanceof ExprNodeGenericFuncDesc) &&
(rightExpr instanceof ExprNodeColumnDesc) ) {
v1 = getVectorExpression((ExprNodeGenericFuncDesc) leftExpr);
ExprNodeColumnDesc leftColDesc = (ExprNodeColumnDesc) rightExpr;
int inputCol1 = v1.getOutputColumn();
- int inputCol2 = columnMap.get(leftColDesc.getColumn());
+ int inputCol2 = getInputColumnIndex(leftColDesc.getColumn());
String colType1 = v1.getOutputType();
String colType2 = leftColDesc.getTypeString();
String className = getFilterColumnColumnExpressionClassName(colType1,
@@ -684,14 +702,14 @@ public class VectorizationContext {
expr = (VectorExpression) Class.forName(className).
getDeclaredConstructors()[0].newInstance(inputCol1, inputCol2);
} catch (Exception ex) {
- throw new RuntimeException((ex));
+ throw new HiveException(ex);
}
expr.setChildExpressions(new VectorExpression [] {v1});
} else if ( (leftExpr instanceof ExprNodeColumnDesc) &&
(rightExpr instanceof ExprNodeGenericFuncDesc) ) {
ExprNodeColumnDesc rightColDesc = (ExprNodeColumnDesc) leftExpr;
v2 = getVectorExpression((ExprNodeGenericFuncDesc) rightExpr);
- int inputCol1 = columnMap.get(rightColDesc.getColumn());
+ int inputCol1 = getInputColumnIndex(rightColDesc.getColumn());
int inputCol2 = v2.getOutputColumn();
String colType1 = rightColDesc.getTypeString();
String colType2 = v2.getOutputType();
@@ -701,7 +719,7 @@ public class VectorizationContext {
expr = (VectorExpression) Class.forName(className).
getDeclaredConstructors()[0].newInstance(inputCol1, inputCol2);
} catch (Exception ex) {
- throw new RuntimeException((ex));
+ throw new HiveException(ex);
}
expr.setChildExpressions(new VectorExpression [] {v2});
} else if ( (leftExpr instanceof ExprNodeGenericFuncDesc) &&
@@ -718,7 +736,7 @@ public class VectorizationContext {
getDeclaredConstructors()[0].newInstance(inputCol1,
getScalarValue(constDesc));
} catch (Exception ex) {
- throw new RuntimeException((ex));
+ throw new HiveException(ex);
}
expr.setChildExpressions(new VectorExpression [] {v1});
} else if ( (leftExpr instanceof ExprNodeConstantDesc) &&
@@ -735,7 +753,7 @@ public class VectorizationContext {
getDeclaredConstructors()[0].newInstance(inputCol2,
getScalarValue(constDesc));
} catch (Exception ex) {
- throw new RuntimeException((ex));
+ throw new HiveException(ex);
}
expr.setChildExpressions(new VectorExpression [] {v2});
} else {
@@ -753,7 +771,7 @@ public class VectorizationContext {
expr = (VectorExpression) Class.forName(className).
getDeclaredConstructors()[0].newInstance(inputCol1, inputCol2);
} catch (Exception ex) {
- throw new RuntimeException((ex));
+ throw new HiveException(ex);
}
expr.setChildExpressions(new VectorExpression [] {v1, v2});
}
Added: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedInputFormatInterface.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedInputFormatInterface.java?rev=1480922&view=auto
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedInputFormatInterface.java (added)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedInputFormatInterface.java Fri May 10 08:16:09 2013
@@ -0,0 +1,27 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+/**
+ * Marker interface to indicate a given input format supports
+ * vectorization input.
+ */
+public interface VectorizedInputFormatInterface {
+
+}