You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by br...@apache.org on 2014/08/13 02:25:34 UTC
svn commit: r1617633 [2/6] - in /hive/branches/spark: ./ ant/
ant/src/org/apache/hadoop/hive/ant/ beeline/
beeline/src/java/org/apache/hive/beeline/ beeline/src/main/resources/
cli/src/java/org/apache/hadoop/hive/cli/
common/src/java/org/apache/hadoop/...
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java?rev=1617633&r1=1617632&r2=1617633&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java Wed Aug 13 00:25:32 2014
@@ -80,6 +80,7 @@ import org.apache.hadoop.hive.ql.plan.Ex
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc;
import org.apache.hadoop.hive.ql.udf.SettableUDF;
import org.apache.hadoop.hive.ql.udf.UDFConv;
import org.apache.hadoop.hive.ql.udf.UDFHex;
@@ -323,10 +324,12 @@ public class VectorizationContext {
ve = getGenericUdfVectorExpression(expr.getGenericUDF(),
childExpressions, mode, exprDesc.getTypeInfo());
}
+ } else if (exprDesc instanceof ExprNodeNullDesc) {
+ ve = getConstantVectorExpression(null, exprDesc.getTypeInfo(), mode);
} else if (exprDesc instanceof ExprNodeConstantDesc) {
ve = getConstantVectorExpression(((ExprNodeConstantDesc) exprDesc).getValue(), exprDesc.getTypeInfo(),
mode);
- }
+ }
if (ve == null) {
throw new HiveException("Could not vectorize expression: "+exprDesc.getName());
}
@@ -410,8 +413,8 @@ public class VectorizationContext {
}
}
} else {
- for (ExprNodeDesc child : children) {
- ExprNodeDesc castExpression = getImplicitCastExpression(genericUDF, child, commonType);
+ for (ExprNodeDesc child : children) {
+ ExprNodeDesc castExpression = getImplicitCastExpression(genericUDF, child, commonType);
if (castExpression != null) {
atleastOneCastNeeded = true;
childrenWithCasts.add(castExpression);
@@ -652,69 +655,70 @@ public class VectorizationContext {
}
/**
- * Handles only the special case of unary operators on a constant.
+ * Handles only the special cases of cast/+ve/-ve operator on a constant.
* @param exprDesc
- * @return The same expression if no folding done, else return the constant
+ * @return The same expression if no evaluation done, else return the constant
* expression.
* @throws HiveException
*/
- ExprNodeDesc foldConstantsForUnaryExpression(ExprNodeDesc exprDesc) throws HiveException {
- if (!(exprDesc instanceof ExprNodeGenericFuncDesc)) {
- return exprDesc;
- }
-
- if (exprDesc.getChildren() == null || (exprDesc.getChildren().size() != 1) ) {
- return exprDesc;
- }
-
- ExprNodeConstantDesc foldedChild = null;
- if (!( exprDesc.getChildren().get(0) instanceof ExprNodeConstantDesc)) {
-
- // try recursive folding
- ExprNodeDesc expr = foldConstantsForUnaryExpression(exprDesc.getChildren().get(0));
- if (expr instanceof ExprNodeConstantDesc) {
- foldedChild = (ExprNodeConstantDesc) expr;
- }
- } else {
- foldedChild = (ExprNodeConstantDesc) exprDesc.getChildren().get(0);
- }
-
- if (foldedChild == null) {
- return exprDesc;
- }
-
- ObjectInspector childoi = foldedChild.getWritableObjectInspector();
- GenericUDF gudf = ((ExprNodeGenericFuncDesc) exprDesc).getGenericUDF();
-
- if (gudf instanceof GenericUDFOPNegative || gudf instanceof GenericUDFOPPositive
- || castExpressionUdfs.contains(gudf.getClass())
- || ((gudf instanceof GenericUDFBridge)
- && castExpressionUdfs.contains(((GenericUDFBridge) gudf).getUdfClass()))) {
- ExprNodeEvaluator<?> evaluator = ExprNodeEvaluatorFactory.get(exprDesc);
- ObjectInspector output = evaluator.initialize(childoi);
- Object constant = evaluator.evaluate(null);
- Object java = ObjectInspectorUtils.copyToStandardJavaObject(constant, output);
- return new ExprNodeConstantDesc(exprDesc.getTypeInfo(), java);
- }
-
- return exprDesc;
+ ExprNodeDesc evaluateCastOnConstants(ExprNodeDesc exprDesc) throws HiveException {
+ if (!(exprDesc instanceof ExprNodeGenericFuncDesc)) {
+ return exprDesc;
+ }
+
+ if (exprDesc.getChildren() == null || (exprDesc.getChildren().size() != 1) ) {
+ return exprDesc;
+ }
+
+ ExprNodeConstantDesc foldedChild = null;
+ if (!( exprDesc.getChildren().get(0) instanceof ExprNodeConstantDesc)) {
+
+ // try recursive folding
+ ExprNodeDesc expr = evaluateCastOnConstants(exprDesc.getChildren().get(0));
+ if (expr instanceof ExprNodeConstantDesc) {
+ foldedChild = (ExprNodeConstantDesc) expr;
+ }
+ } else {
+ foldedChild = (ExprNodeConstantDesc) exprDesc.getChildren().get(0);
+ }
+
+ if (foldedChild == null) {
+ return exprDesc;
+ }
+
+ ObjectInspector childoi = foldedChild.getWritableObjectInspector();
+ GenericUDF gudf = ((ExprNodeGenericFuncDesc) exprDesc).getGenericUDF();
+
+ // Only evaluate +ve/-ve or cast on constant or recursive casting.
+ if (gudf instanceof GenericUDFOPNegative || gudf instanceof GenericUDFOPPositive ||
+ castExpressionUdfs.contains(gudf.getClass())
+ || ((gudf instanceof GenericUDFBridge)
+ && castExpressionUdfs.contains(((GenericUDFBridge) gudf).getUdfClass()))) {
+ ExprNodeEvaluator<?> evaluator = ExprNodeEvaluatorFactory.get(exprDesc);
+ ObjectInspector output = evaluator.initialize(childoi);
+ Object constant = evaluator.evaluate(null);
+ Object java = ObjectInspectorUtils.copyToStandardJavaObject(constant, output);
+ return new ExprNodeConstantDesc(exprDesc.getTypeInfo(), java);
+ }
+
+ return exprDesc;
}
-
- /* Fold simple unary expressions in all members of the input list and return new list
+
+ /* For cast on constant operator in all members of the input list and return new list
* containing results.
*/
- private List<ExprNodeDesc> foldConstantsForUnaryExprs(List<ExprNodeDesc> childExpr)
- throws HiveException {
- List<ExprNodeDesc> constantFoldedChildren = new ArrayList<ExprNodeDesc>();
- if (childExpr != null) {
- for (ExprNodeDesc expr : childExpr) {
- expr = this.foldConstantsForUnaryExpression(expr);
- constantFoldedChildren.add(expr);
- }
- }
- return constantFoldedChildren;
+ private List<ExprNodeDesc> evaluateCastOnConstants(List<ExprNodeDesc> childExpr)
+ throws HiveException {
+ List<ExprNodeDesc> evaluatedChildren = new ArrayList<ExprNodeDesc>();
+ if (childExpr != null) {
+ for (ExprNodeDesc expr : childExpr) {
+ expr = this.evaluateCastOnConstants(expr);
+ evaluatedChildren.add(expr);
+ }
+ }
+ return evaluatedChildren;
}
-
+
private VectorExpression getConstantVectorExpression(Object constantValue, TypeInfo typeInfo,
Mode mode) throws HiveException {
String type = typeInfo.getTypeName();
@@ -903,8 +907,9 @@ public class VectorizationContext {
private VectorExpression getGenericUdfVectorExpression(GenericUDF udf,
List<ExprNodeDesc> childExpr, Mode mode, TypeInfo returnType) throws HiveException {
- List<ExprNodeDesc> constantFoldedChildren = foldConstantsForUnaryExprs(childExpr);
- childExpr = constantFoldedChildren;
+ List<ExprNodeDesc> castedChildren = evaluateCastOnConstants(childExpr);
+ childExpr = castedChildren;
+
//First handle special cases
if (udf instanceof GenericUDFBetween) {
return getBetweenFilterExpression(childExpr, mode, returnType);
@@ -928,15 +933,15 @@ public class VectorizationContext {
}
} else if (udf instanceof GenericUDFToDecimal) {
return getCastToDecimal(childExpr, returnType);
- }
-
+ }
+
// Now do a general lookup
Class<?> udfClass = udf.getClass();
if (udf instanceof GenericUDFBridge) {
udfClass = ((GenericUDFBridge) udf).getUdfClass();
}
- VectorExpression ve = getVectorExpressionForUdf(udfClass, constantFoldedChildren, mode, returnType);
+ VectorExpression ve = getVectorExpressionForUdf(udfClass, castedChildren, mode, returnType);
if (ve == null) {
throw new HiveException("Udf: "+udf.getClass().getSimpleName()+", is not supported");
@@ -998,7 +1003,7 @@ public class VectorizationContext {
}
}
}
-
+
/**
* Create a filter or boolean-valued expression for column IN ( <list-of-constants> )
*/
@@ -1006,13 +1011,11 @@ public class VectorizationContext {
throws HiveException {
ExprNodeDesc colExpr = childExpr.get(0);
- TypeInfo colTypeInfo = colExpr.getTypeInfo();
String colType = colExpr.getTypeString();
// prepare arguments for createVectorExpression
- List<ExprNodeDesc> childrenForInList =
- foldConstantsForUnaryExprs(childExpr.subList(1, childExpr.size()));
-
+ List<ExprNodeDesc> childrenForInList = evaluateCastOnConstants(childExpr.subList(1, childExpr.size()));
+
/* This method assumes that the IN list has no NULL entries. That is enforced elsewhere,
* in the Vectorizer class. If NULL is passed in as a list entry, behavior is not defined.
* If in the future, NULL values are allowed in the IN list, be sure to handle 3-valued
@@ -1107,16 +1110,116 @@ public class VectorizationContext {
return getCastToString(childExpr, returnType);
}
return null;
- }
-
+ }
+
+ private Decimal128 castConstantToDecimal(Object scalar, TypeInfo type) throws HiveException {
+ PrimitiveTypeInfo ptinfo = (PrimitiveTypeInfo) type;
+ String typename = type.getTypeName();
+ Decimal128 d = new Decimal128();
+ int scale = HiveDecimalUtils.getScaleForType(ptinfo);
+ switch (ptinfo.getPrimitiveCategory()) {
+ case FLOAT:
+ float floatVal = ((Float) scalar).floatValue();
+ d.update(floatVal, (short) scale);
+ break;
+ case DOUBLE:
+ double doubleVal = ((Double) scalar).doubleValue();
+ d.update(doubleVal, (short) scale);
+ break;
+ case BYTE:
+ byte byteVal = ((Byte) scalar).byteValue();
+ d.update(byteVal, (short) scale);
+ break;
+ case SHORT:
+ short shortVal = ((Short) scalar).shortValue();
+ d.update(shortVal, (short) scale);
+ break;
+ case INT:
+ int intVal = ((Integer) scalar).intValue();
+ d.update(intVal, (short) scale);
+ break;
+ case LONG:
+ long longVal = ((Long) scalar).longValue();
+ d.update(longVal, (short) scale);
+ break;
+ case DECIMAL:
+ HiveDecimal decimalVal = (HiveDecimal) scalar;
+ d.update(decimalVal.unscaledValue(), (short) scale);
+ break;
+ default:
+ throw new HiveException("Unsupported type "+typename+" for cast to Decimal128");
+ }
+ return d;
+ }
+
+ private String castConstantToString(Object scalar, TypeInfo type) throws HiveException {
+ PrimitiveTypeInfo ptinfo = (PrimitiveTypeInfo) type;
+ String typename = type.getTypeName();
+ switch (ptinfo.getPrimitiveCategory()) {
+ case FLOAT:
+ case DOUBLE:
+ case BYTE:
+ case SHORT:
+ case INT:
+ case LONG:
+ return ((Number) scalar).toString();
+ case DECIMAL:
+ HiveDecimal decimalVal = (HiveDecimal) scalar;
+ return decimalVal.toString();
+ default:
+ throw new HiveException("Unsupported type "+typename+" for cast to String");
+ }
+ }
+
+ private Double castConstantToDouble(Object scalar, TypeInfo type) throws HiveException {
+ PrimitiveTypeInfo ptinfo = (PrimitiveTypeInfo) type;
+ String typename = type.getTypeName();
+ switch (ptinfo.getPrimitiveCategory()) {
+ case FLOAT:
+ case DOUBLE:
+ case BYTE:
+ case SHORT:
+ case INT:
+ case LONG:
+ return ((Number) scalar).doubleValue();
+ case DECIMAL:
+ HiveDecimal decimalVal = (HiveDecimal) scalar;
+ return decimalVal.doubleValue();
+ default:
+ throw new HiveException("Unsupported type "+typename+" for cast to Double");
+ }
+ }
+
+ private Long castConstantToLong(Object scalar, TypeInfo type) throws HiveException {
+ PrimitiveTypeInfo ptinfo = (PrimitiveTypeInfo) type;
+ String typename = type.getTypeName();
+ switch (ptinfo.getPrimitiveCategory()) {
+ case FLOAT:
+ case DOUBLE:
+ case BYTE:
+ case SHORT:
+ case INT:
+ case LONG:
+ return ((Number) scalar).longValue();
+ case DECIMAL:
+ HiveDecimal decimalVal = (HiveDecimal) scalar;
+ return decimalVal.longValue();
+ default:
+ throw new HiveException("Unsupported type "+typename+" for cast to Long");
+ }
+ }
+
private VectorExpression getCastToDecimal(List<ExprNodeDesc> childExpr, TypeInfo returnType)
throws HiveException {
ExprNodeDesc child = childExpr.get(0);
String inputType = childExpr.get(0).getTypeString();
if (child instanceof ExprNodeConstantDesc) {
- // Don't do constant folding here. Wait until the optimizer is changed to do it.
- // Family of related JIRAs: HIVE-7421, HIVE-7422, and HIVE-7424.
- return null;
+ // Return a constant vector expression
+ Object constantValue = ((ExprNodeConstantDesc) child).getValue();
+ Decimal128 decimalValue = castConstantToDecimal(constantValue, child.getTypeInfo());
+ return getConstantVectorExpression(decimalValue, returnType, Mode.PROJECTION);
+ } else if (child instanceof ExprNodeNullDesc) {
+ return getConstantVectorExpression(null, returnType, Mode.PROJECTION);
}
if (isIntFamily(inputType)) {
return createVectorExpression(CastLongToDecimal.class, childExpr, Mode.PROJECTION, returnType);
@@ -1131,16 +1234,19 @@ public class VectorizationContext {
return createVectorExpression(CastTimestampToDecimal.class, childExpr, Mode.PROJECTION, returnType);
}
throw new HiveException("Unhandled cast input type: " + inputType);
- }
-
+ }
+
private VectorExpression getCastToString(List<ExprNodeDesc> childExpr, TypeInfo returnType)
throws HiveException {
ExprNodeDesc child = childExpr.get(0);
String inputType = childExpr.get(0).getTypeString();
if (child instanceof ExprNodeConstantDesc) {
- // Don't do constant folding here. Wait until the optimizer is changed to do it.
- // Family of related JIRAs: HIVE-7421, HIVE-7422, and HIVE-7424.
- return null;
+ // Return a constant vector expression
+ Object constantValue = ((ExprNodeConstantDesc) child).getValue();
+ String strValue = castConstantToString(constantValue, child.getTypeInfo());
+ return getConstantVectorExpression(strValue, returnType, Mode.PROJECTION);
+ } else if (child instanceof ExprNodeNullDesc) {
+ return getConstantVectorExpression(null, returnType, Mode.PROJECTION);
}
if (inputType.equals("boolean")) {
// Boolean must come before the integer family. It's a special case.
@@ -1164,9 +1270,12 @@ public class VectorizationContext {
ExprNodeDesc child = childExpr.get(0);
String inputType = childExpr.get(0).getTypeString();
if (child instanceof ExprNodeConstantDesc) {
- // Don't do constant folding here. Wait until the optimizer is changed to do it.
- // Family of related JIRAs: HIVE-7421, HIVE-7422, and HIVE-7424.
- return null;
+ // Return a constant vector expression
+ Object constantValue = ((ExprNodeConstantDesc) child).getValue();
+ Double doubleValue = castConstantToDouble(constantValue, child.getTypeInfo());
+ return getConstantVectorExpression(doubleValue, returnType, Mode.PROJECTION);
+ } else if (child instanceof ExprNodeNullDesc) {
+ return getConstantVectorExpression(null, returnType, Mode.PROJECTION);
}
if (isIntFamily(inputType)) {
return createVectorExpression(CastLongToDouble.class, childExpr, Mode.PROJECTION, returnType);
@@ -1191,6 +1300,8 @@ public class VectorizationContext {
// Don't do constant folding here. Wait until the optimizer is changed to do it.
// Family of related JIRAs: HIVE-7421, HIVE-7422, and HIVE-7424.
return null;
+ } else if (child instanceof ExprNodeNullDesc) {
+ return getConstantVectorExpression(null, TypeInfoFactory.booleanTypeInfo, Mode.PROJECTION);
}
// Long and double are handled using descriptors, string needs to be specially handled.
if (inputType.equals("string")) {
@@ -1215,9 +1326,12 @@ public class VectorizationContext {
ExprNodeDesc child = childExpr.get(0);
String inputType = childExpr.get(0).getTypeString();
if (child instanceof ExprNodeConstantDesc) {
- // Don't do constant folding here. Wait until the optimizer is changed to do it.
- // Family of related JIRAs: HIVE-7421, HIVE-7422, and HIVE-7424.
- return null;
+ // Return a constant vector expression
+ Object constantValue = ((ExprNodeConstantDesc) child).getValue();
+ Long longValue = castConstantToLong(constantValue, child.getTypeInfo());
+ return getConstantVectorExpression(longValue, TypeInfoFactory.longTypeInfo, Mode.PROJECTION);
+ } else if (child instanceof ExprNodeNullDesc) {
+ return getConstantVectorExpression(null, TypeInfoFactory.longTypeInfo, Mode.PROJECTION);
}
// Float family, timestamp are handled via descriptor based lookup, int family needs
// special handling.
@@ -1281,7 +1395,7 @@ public class VectorizationContext {
String colType = commonType.getTypeName();
// prepare arguments for createVectorExpression
- List<ExprNodeDesc> childrenAfterNot = foldConstantsForUnaryExprs(castChildren);
+ List<ExprNodeDesc> childrenAfterNot = evaluateCastOnConstants(castChildren);
// determine class
Class<?> cl = null;
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java?rev=1617633&r1=1617632&r2=1617633&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java Wed Aug 13 00:25:32 2014
@@ -187,13 +187,14 @@ public class ConstantVectorExpression ex
public void setTypeString(String typeString) {
this.outputType = typeString;
- if ("string".equalsIgnoreCase(typeString)) {
+ if (VectorizationContext.isStringFamily(typeString)) {
this.type = Type.BYTES;
- } else if ("double".equalsIgnoreCase(typeString)) {
+ } else if (VectorizationContext.isFloatFamily(typeString)) {
this.type = Type.DOUBLE;
- } else if (VectorizationContext.decimalTypePattern.matcher(typeString).matches()){
+ } else if (VectorizationContext.isDecimalFamily(typeString)){
this.type = Type.DECIMAL;
} else {
+ // everything else that does not belong to string, double, decimal is treated as long.
this.type = Type.LONG;
}
}
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java?rev=1617633&r1=1617632&r2=1617633&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java Wed Aug 13 00:25:32 2014
@@ -50,25 +50,12 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveVarcharObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableIntObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableLongObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveVarcharObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableTimestampObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableShortObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableStringObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableTimestampObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.VoidObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
-import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo;
-import org.apache.hadoop.io.BooleanWritable;
-import org.apache.hadoop.io.FloatWritable;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
/**
* VectorExpressionWritableFactory helper class for generating VectorExpressionWritable objects.
@@ -364,7 +351,6 @@ public final class VectorExpressionWrite
*/
public static VectorExpressionWriter genVectorExpressionWritable(ExprNodeDesc nodeDesc)
throws HiveException {
- String nodeType = nodeDesc.getTypeString();
ObjectInspector objectInspector = nodeDesc.getWritableObjectInspector();
if (null == objectInspector) {
objectInspector = TypeInfoUtils
@@ -408,6 +394,9 @@ public final class VectorExpressionWrite
case LONG:
return genVectorExpressionWritableLong(
(SettableLongObjectInspector) fieldObjInspector);
+ case VOID:
+ return genVectorExpressionWritableVoid(
+ (VoidObjectInspector) fieldObjInspector);
case BINARY:
return genVectorExpressionWritableBinary(
(SettableBinaryObjectInspector) fieldObjInspector);
@@ -722,6 +711,39 @@ public final class VectorExpressionWrite
}.init(fieldObjInspector);
}
+ private static VectorExpressionWriter genVectorExpressionWritableVoid(
+ VoidObjectInspector fieldObjInspector) throws HiveException {
+ return new VectorExpressionWriterLong() {
+ private Object obj;
+
+ public VectorExpressionWriter init(VoidObjectInspector objInspector)
+ throws HiveException {
+ super.init(objInspector);
+ this.obj = initValue(null);
+ return this;
+ }
+
+ @Override
+ public Object writeValue(long value) throws HiveException {
+ return this.obj;
+ }
+
+ @Override
+ public Object setValue(Object field, long value) throws HiveException {
+ if (null == field) {
+ field = initValue(null);
+ }
+ return field;
+ }
+
+ @Override
+ public Object initValue(Object ignored) {
+ return ((VoidObjectInspector) this.objectInspector).copyObject(null);
+ }
+ }.init(fieldObjInspector);
+ }
+
+
private static VectorExpressionWriter genVectorExpressionWritableInt(
SettableIntObjectInspector fieldObjInspector) throws HiveException {
return new VectorExpressionWriterLong() {
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/index/AggregateIndexHandler.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/index/AggregateIndexHandler.java?rev=1617633&r1=1617632&r2=1617633&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/index/AggregateIndexHandler.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/index/AggregateIndexHandler.java Wed Aug 13 00:25:32 2014
@@ -23,7 +23,6 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
-import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.Index;
@@ -46,12 +45,9 @@ import org.apache.hadoop.hive.ql.plan.Pa
*/
public class AggregateIndexHandler extends CompactIndexHandler {
- private static Index index = null;
-
@Override
- public void analyzeIndexDefinition(Table baseTable, Index idx,
+ public void analyzeIndexDefinition(Table baseTable, Index index,
Table indexTable) throws HiveException {
- index = idx;
StorageDescriptor storageDesc = index.getSd();
if (this.usesIndexTable() && indexTable != null) {
StorageDescriptor indexTableSd = storageDesc.deepCopy();
@@ -92,10 +88,11 @@ public class AggregateIndexHandler exten
@Override
protected Task<?> getIndexBuilderMapRedTask(Set<ReadEntity> inputs,
Set<WriteEntity> outputs,
- List<FieldSchema> indexField, boolean partitioned,
+ Index index, boolean partitioned,
PartitionDesc indexTblPartDesc, String indexTableName,
PartitionDesc baseTablePartDesc, String baseTableName, String dbName) {
+ List<FieldSchema> indexField = index.getSd().getCols();
String indexCols = HiveUtils.getUnparsedColumnNamesFromFieldSchema(indexField);
//form a new insert overwrite query.
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/index/TableBasedIndexHandler.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/index/TableBasedIndexHandler.java?rev=1617633&r1=1617632&r2=1617633&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/index/TableBasedIndexHandler.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/index/TableBasedIndexHandler.java Wed Aug 13 00:25:32 2014
@@ -29,6 +29,7 @@ import org.apache.hadoop.conf.Configurat
import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.Index;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.hooks.ReadEntity;
@@ -64,7 +65,7 @@ public abstract class TableBasedIndexHan
if (!baseTbl.isPartitioned()) {
// the table does not have any partition, then create index for the
// whole table
- Task<?> indexBuilder = getIndexBuilderMapRedTask(inputs, outputs, index.getSd().getCols(), false,
+ Task<?> indexBuilder = getIndexBuilderMapRedTask(inputs, outputs, index, false,
new PartitionDesc(desc, null), indexTbl.getTableName(),
new PartitionDesc(Utilities.getTableDesc(baseTbl), null),
baseTbl.getTableName(), indexTbl.getDbName());
@@ -88,7 +89,7 @@ public abstract class TableBasedIndexHan
"Partitions of base table and index table are inconsistent.");
}
// for each partition, spawn a map reduce task.
- Task<?> indexBuilder = getIndexBuilderMapRedTask(inputs, outputs, index.getSd().getCols(), true,
+ Task<?> indexBuilder = getIndexBuilderMapRedTask(inputs, outputs, index, true,
new PartitionDesc(indexPart), indexTbl.getTableName(),
new PartitionDesc(basePart), baseTbl.getTableName(), indexTbl.getDbName());
indexBuilderTasks.add(indexBuilder);
@@ -100,10 +101,20 @@ public abstract class TableBasedIndexHan
}
}
- abstract protected Task<?> getIndexBuilderMapRedTask(Set<ReadEntity> inputs, Set<WriteEntity> outputs,
+ protected Task<?> getIndexBuilderMapRedTask(Set<ReadEntity> inputs, Set<WriteEntity> outputs,
+ Index index, boolean partitioned,
+ PartitionDesc indexTblPartDesc, String indexTableName,
+ PartitionDesc baseTablePartDesc, String baseTableName, String dbName) throws HiveException {
+ return getIndexBuilderMapRedTask(inputs, outputs, index.getSd().getCols(),
+ partitioned, indexTblPartDesc, indexTableName, baseTablePartDesc, baseTableName, dbName);
+ }
+
+ protected Task<?> getIndexBuilderMapRedTask(Set<ReadEntity> inputs, Set<WriteEntity> outputs,
List<FieldSchema> indexField, boolean partitioned,
PartitionDesc indexTblPartDesc, String indexTableName,
- PartitionDesc baseTablePartDesc, String baseTableName, String dbName) throws HiveException;
+ PartitionDesc baseTablePartDesc, String baseTableName, String dbName) throws HiveException {
+ return null;
+ }
protected void setStatsDir(HiveConf builderConf) {
String statsDir;
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java?rev=1617633&r1=1617632&r2=1617633&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java Wed Aug 13 00:25:32 2014
@@ -409,6 +409,12 @@ public class Hive {
}
}
+ public void alterIndex(String baseTableName, String indexName, Index newIdx)
+ throws InvalidOperationException, HiveException {
+ String[] names = Utilities.getDbTableName(baseTableName);
+ alterIndex(names[0], names[1], indexName, newIdx);
+ }
+
/**
* Updates the existing index metadata with the new metadata.
*
@@ -667,17 +673,16 @@ public class Hive {
throws HiveException {
try {
- String dbName = SessionState.get().getCurrentDatabase();
Index old_index = null;
try {
- old_index = getIndex(dbName, tableName, indexName);
+ old_index = getIndex(tableName, indexName);
} catch (Exception e) {
}
if (old_index != null) {
- throw new HiveException("Index " + indexName + " already exists on table " + tableName + ", db=" + dbName);
+ throw new HiveException("Index " + indexName + " already exists on table " + tableName);
}
- org.apache.hadoop.hive.metastore.api.Table baseTbl = getMSC().getTable(dbName, tableName);
+ org.apache.hadoop.hive.metastore.api.Table baseTbl = getTable(tableName).getTTable();
if (baseTbl.getTableType() == TableType.VIRTUAL_VIEW.toString()) {
throw new HiveException("tableName="+ tableName +" is a VIRTUAL VIEW. Index on VIRTUAL VIEW is not supported.");
}
@@ -686,17 +691,13 @@ public class Hive {
+ " is a TEMPORARY TABLE. Index on TEMPORARY TABLE is not supported.");
}
- if (indexTblName == null) {
- indexTblName = MetaStoreUtils.getIndexTableName(dbName, tableName, indexName);
- } else {
- org.apache.hadoop.hive.metastore.api.Table temp = null;
- try {
- temp = getMSC().getTable(dbName, indexTblName);
- } catch (Exception e) {
- }
- if (temp != null) {
- throw new HiveException("Table name " + indexTblName + " already exists. Choose another name.");
- }
+ org.apache.hadoop.hive.metastore.api.Table temp = null;
+ try {
+ temp = getTable(indexTblName).getTTable();
+ } catch (Exception e) {
+ }
+ if (temp != null) {
+ throw new HiveException("Table name " + indexTblName + " already exists. Choose another name.");
}
org.apache.hadoop.hive.metastore.api.StorageDescriptor storageDescriptor = baseTbl.getSd().deepCopy();
@@ -774,7 +775,9 @@ public class Hive {
HiveIndexHandler indexHandler = HiveUtils.getIndexHandler(this.getConf(), indexHandlerClass);
if (indexHandler.usesIndexTable()) {
- tt = new org.apache.hadoop.hive.ql.metadata.Table(dbName, indexTblName).getTTable();
+ String idname = Utilities.getDatabaseName(indexTblName);
+ String itname = Utilities.getTableName(indexTblName);
+ tt = new org.apache.hadoop.hive.ql.metadata.Table(idname, itname).getTTable();
List<FieldSchema> partKeys = baseTbl.getPartitionKeys();
tt.setPartitionKeys(partKeys);
tt.setTableType(TableType.INDEX_TABLE.toString());
@@ -798,7 +801,9 @@ public class Hive {
throw new RuntimeException("Please specify deferred rebuild using \" WITH DEFERRED REBUILD \".");
}
- Index indexDesc = new Index(indexName, indexHandlerClass, dbName, tableName, time, time, indexTblName,
+ String tdname = Utilities.getDatabaseName(tableName);
+ String ttname = Utilities.getTableName(tableName);
+ Index indexDesc = new Index(indexName, indexHandlerClass, tdname, ttname, time, time, indexTblName,
storageDescriptor, params, deferredRebuild);
if (indexComment != null) {
indexDesc.getParameters().put("comment", indexComment);
@@ -818,19 +823,6 @@ public class Hive {
}
}
- public Index getIndex(String qualifiedIndexName) throws HiveException {
- String[] names = getQualifiedNames(qualifiedIndexName);
- switch (names.length) {
- case 3:
- return getIndex(names[0], names[1], names[2]);
- case 2:
- return getIndex(SessionState.get().getCurrentDatabase(),
- names[0], names[1]);
- default:
- throw new HiveException("Invalid index name:" + qualifiedIndexName);
- }
- }
-
public Index getIndex(String baseTableName, String indexName) throws HiveException {
String[] names = Utilities.getDbTableName(baseTableName);
return this.getIndex(names[0], names[1], indexName);
@@ -845,6 +837,11 @@ public class Hive {
}
}
+ public boolean dropIndex(String baseTableName, String index_name, boolean deleteData) throws HiveException {
+ String[] names = Utilities.getDbTableName(baseTableName);
+ return dropIndex(names[0], names[1], index_name, deleteData);
+ }
+
public boolean dropIndex(String db_name, String tbl_name, String index_name, boolean deleteData) throws HiveException {
try {
return getMSC().dropIndex(db_name, tbl_name, index_name, deleteData);
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java?rev=1617633&r1=1617632&r2=1617633&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java Wed Aug 13 00:25:32 2014
@@ -41,7 +41,6 @@ import org.apache.hadoop.hive.ql.exec.Ta
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.parse.ParseContext;
@@ -267,15 +266,7 @@ abstract public class AbstractBucketJoin
Table tbl = topToTable.get(tso);
if (tbl.isPartitioned()) {
- PrunedPartitionList prunedParts;
- try {
- prunedParts = pGraphContext.getPrunedPartitions(alias, tso);
- } catch (HiveException e) {
- // Has to use full name to make sure it does not conflict with
- // org.apache.commons.lang.StringUtils
- LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
- throw new SemanticException(e.getMessage(), e);
- }
+ PrunedPartitionList prunedParts = pGraphContext.getPrunedPartitions(alias, tso);
List<Partition> partitions = prunedParts.getNotDeniedPartns();
// construct a mapping of (Partition->bucket file names) and (Partition -> bucket number)
if (partitions.isEmpty()) {
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractSMBJoinProc.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractSMBJoinProc.java?rev=1617633&r1=1617632&r2=1617633&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractSMBJoinProc.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractSMBJoinProc.java Wed Aug 13 00:25:32 2014
@@ -41,7 +41,6 @@ import org.apache.hadoop.hive.ql.exec.Ta
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.parse.ParseContext;
@@ -316,13 +315,7 @@ abstract public class AbstractSMBJoinPro
Table tbl = topToTable.get(tso);
if (tbl.isPartitioned()) {
- PrunedPartitionList prunedParts;
- try {
- prunedParts = pGraphContext.getPrunedPartitions(alias, tso);
- } catch (HiveException e) {
- LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
- throw new SemanticException(e.getMessage(), e);
- }
+ PrunedPartitionList prunedParts = pGraphContext.getPrunedPartitions(alias, tso);
List<Partition> partitions = prunedParts.getNotDeniedPartns();
// Populate the names and order of columns for the first partition of the
// first table
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java?rev=1617633&r1=1617632&r2=1617633&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java Wed Aug 13 00:25:32 2014
@@ -497,7 +497,8 @@ public class BucketingSortingReduceSinkO
}
if (srcTable.isPartitioned()) {
- PrunedPartitionList prunedParts = pGraphContext.getOpToPartList().get(ts);
+ PrunedPartitionList prunedParts =
+ pGraphContext.getPrunedPartitions(srcTable.getTableName(), ts);
List<Partition> partitions = prunedParts.getNotDeniedPartns();
// Support for dynamic partitions can be added later
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java?rev=1617633&r1=1617632&r2=1617633&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java Wed Aug 13 00:25:32 2014
@@ -372,52 +372,57 @@ public final class ColumnPrunerProcFacto
cppCtx.getPrunedColLists().put((Operator<? extends OperatorDesc>) nd,
cols);
- List<Integer> neededColumnIds = new ArrayList<Integer>();
- List<String> neededColumnNames = new ArrayList<String>();
- List<String> referencedColumnNames = new ArrayList<String>();
RowResolver inputRR = cppCtx.getOpToParseCtxMap().get(scanOp).getRowResolver();
- TableScanDesc desc = scanOp.getConf();
- List<VirtualColumn> virtualCols = desc.getVirtualCols();
- List<VirtualColumn> newVirtualCols = new ArrayList<VirtualColumn>();
-
- // add virtual columns for ANALYZE TABLE
- if(scanOp.getConf().isGatherStats()) {
- cols.add(VirtualColumn.RAWDATASIZE.getName());
- }
+ setupNeededColumns(scanOp, inputRR, cols);
+ return null;
+ }
+ }
- for (String column : cols) {
- String[] tabCol = inputRR.reverseLookup(column);
- if (tabCol == null) {
- continue;
- }
- referencedColumnNames.add(column);
- ColumnInfo colInfo = inputRR.get(tabCol[0], tabCol[1]);
- if (colInfo.getIsVirtualCol()) {
- // part is also a virtual column, but part col should not in this
- // list.
- for (int j = 0; j < virtualCols.size(); j++) {
- VirtualColumn vc = virtualCols.get(j);
- if (vc.getName().equals(colInfo.getInternalName())) {
- newVirtualCols.add(vc);
- }
+ public static void setupNeededColumns(TableScanOperator scanOp, RowResolver inputRR,
+ List<String> cols) throws SemanticException {
+ List<Integer> neededColumnIds = new ArrayList<Integer>();
+ List<String> neededColumnNames = new ArrayList<String>();
+ List<String> referencedColumnNames = new ArrayList<String>();
+ TableScanDesc desc = scanOp.getConf();
+ List<VirtualColumn> virtualCols = desc.getVirtualCols();
+ List<VirtualColumn> newVirtualCols = new ArrayList<VirtualColumn>();
+
+ // add virtual columns for ANALYZE TABLE
+ if(scanOp.getConf().isGatherStats()) {
+ cols.add(VirtualColumn.RAWDATASIZE.getName());
+ }
+
+ for (String column : cols) {
+ String[] tabCol = inputRR.reverseLookup(column);
+ if (tabCol == null) {
+ continue;
+ }
+ referencedColumnNames.add(column);
+ ColumnInfo colInfo = inputRR.get(tabCol[0], tabCol[1]);
+ if (colInfo.getIsVirtualCol()) {
+ // part is also a virtual column, but part col should not in this
+ // list.
+ for (int j = 0; j < virtualCols.size(); j++) {
+ VirtualColumn vc = virtualCols.get(j);
+ if (vc.getName().equals(colInfo.getInternalName())) {
+ newVirtualCols.add(vc);
}
- //no need to pass virtual columns to reader.
- continue;
- }
- int position = inputRR.getPosition(column);
- if (position >= 0) {
- // get the needed columns by id and name
- neededColumnIds.add(position);
- neededColumnNames.add(column);
}
+ //no need to pass virtual columns to reader.
+ continue;
+ }
+ int position = inputRR.getPosition(column);
+ if (position >= 0) {
+ // get the needed columns by id and name
+ neededColumnIds.add(position);
+ neededColumnNames.add(column);
}
-
- desc.setVirtualCols(newVirtualCols);
- scanOp.setNeededColumnIDs(neededColumnIds);
- scanOp.setNeededColumns(neededColumnNames);
- scanOp.setReferencedColumns(referencedColumnNames);
- return null;
}
+
+ desc.setVirtualCols(newVirtualCols);
+ scanOp.setNeededColumnIDs(neededColumnIds);
+ scanOp.setNeededColumns(neededColumnNames);
+ scanOp.setReferencedColumns(referencedColumnNames);
}
/**
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java?rev=1617633&r1=1617632&r2=1617633&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java Wed Aug 13 00:25:32 2014
@@ -77,12 +77,6 @@ public class ConstantPropagate implement
*/
@Override
public ParseContext transform(ParseContext pactx) throws SemanticException {
- if (pactx.getConf().getBoolVar(ConfVars.HIVE_VECTORIZATION_ENABLED)) {
- // Constant propagate is currently conflict with vectorizer, disabling constant propagate
- // if the later is enabled.
- return pactx;
- }
-
pGraphContext = pactx;
opToParseCtxMap = pGraphContext.getOpParseCtx();
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java?rev=1617633&r1=1617632&r2=1617633&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java Wed Aug 13 00:25:32 2014
@@ -46,7 +46,6 @@ import org.apache.hadoop.hive.ql.lib.Nod
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
import org.apache.hadoop.hive.ql.lib.Rule;
import org.apache.hadoop.hive.ql.lib.RuleRegExp;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.parse.ParseContext;
@@ -61,7 +60,6 @@ import org.apache.hadoop.hive.ql.plan.Gr
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.SelectDesc;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode;
-import org.apache.hadoop.util.StringUtils;
/**
* This transformation does group by optimization. If the grouping key is a superset
@@ -388,13 +386,8 @@ public class GroupByOptimizer implements
List<String> bucketCols = table.getBucketCols();
return matchBucketSortCols(groupByCols, bucketCols, sortCols);
} else {
- PrunedPartitionList partsList;
- try {
- partsList = pGraphContext.getPrunedPartitions(table.getTableName(), tableScanOp);
- } catch (HiveException e) {
- LOG.error(StringUtils.stringifyException(e));
- throw new SemanticException(e.getMessage(), e);
- }
+ PrunedPartitionList partsList =
+ pGraphContext.getPrunedPartitions(table.getTableName(), tableScanOp);
List<Partition> notDeniedPartns = partsList.getNotDeniedPartns();
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IndexUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IndexUtils.java?rev=1617633&r1=1617632&r2=1617633&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IndexUtils.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IndexUtils.java Wed Aug 13 00:25:32 2014
@@ -23,7 +23,6 @@ import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
-import java.util.Map;
import java.util.Set;
import org.apache.commons.logging.Log;
@@ -36,6 +35,7 @@ import org.apache.hadoop.hive.ql.Driver;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.exec.TaskFactory;
+import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.hooks.ReadEntity;
import org.apache.hadoop.hive.ql.hooks.WriteEntity;
import org.apache.hadoop.hive.ql.index.IndexMetadataChangeTask;
@@ -57,7 +57,6 @@ import org.apache.hadoop.hive.ql.parse.S
public final class IndexUtils {
private static final Log LOG = LogFactory.getLog(IndexWhereProcessor.class.getName());
- private static final Map<Index, Table> indexToIndexTable = new HashMap<Index, Table>();
private IndexUtils(){
}
@@ -71,9 +70,7 @@ public final class IndexUtils {
* @throws HiveException
*/
public static Set<Partition> checkPartitionsCoveredByIndex(TableScanOperator tableScan,
- ParseContext pctx,
- Map<Table, List<Index>> indexes)
- throws HiveException {
+ ParseContext pctx, List<Index> indexes) throws HiveException {
Hive hive = Hive.get(pctx.getConf());
// make sure each partition exists on the index table
PrunedPartitionList queryPartitionList = pctx.getOpToPartList().get(tableScan);
@@ -83,7 +80,6 @@ public final class IndexUtils {
}
for (Partition part : queryPartitions) {
- List<Table> sourceIndexTables = getIndexTables(hive, part, indexes);
if (!containsPartition(hive, part, indexes)) {
return null; // problem if it doesn't contain the partition
}
@@ -93,63 +89,24 @@ public final class IndexUtils {
}
/**
- * return index tables associated with a given base table
- */
- private List<Table> getIndexTables(Hive hive, Table table,
- Map<Table, List<Index>> indexes) throws
- HiveException {
- List<Table> indexTables = new ArrayList<Table>();
- if (indexes == null || indexes.get(table) == null) {
- return indexTables;
- }
- for (Index index : indexes.get(table)) {
- Table indexTable = hive.getTable(index.getIndexTableName());
- indexToIndexTable.put(index, indexTable);
- indexTables.add(indexTable);
- }
- return indexTables;
- }
-
- /**
- * return index tables associated with the base table of the partition
- */
- private static List<Table> getIndexTables(Hive hive, Partition part,
- Map<Table, List<Index>> indexes) throws HiveException {
- List<Table> indexTables = new ArrayList<Table>();
- Table partitionedTable = part.getTable();
- if (indexes == null || indexes.get(partitionedTable) == null) {
- return indexTables;
- }
- for (Index index : indexes.get(partitionedTable)) {
- Table indexTable = hive.getTable(index.getIndexTableName());
- indexToIndexTable.put(index, indexTable);
- indexTables.add(indexTable);
- }
- return indexTables;
- }
-
- /**
* check that every index table contains the given partition and is fresh
*/
- private static boolean containsPartition(Hive hive, Partition part,
- Map<Table, List<Index>> indexes)
- throws HiveException {
+ private static boolean containsPartition(Hive hive, Partition part, List<Index> indexes)
+ throws HiveException {
HashMap<String, String> partSpec = part.getSpec();
-
- if (indexes == null || indexes.get(part.getTable()) == null) {
- return false;
- }
-
if (partSpec.isEmpty()) {
// empty specs come from non-partitioned tables
- return isIndexTableFresh(hive, indexes.get(part.getTable()), part.getTable());
+ return isIndexTableFresh(hive, indexes, part.getTable());
}
- for (Index index : indexes.get(part.getTable())) {
- Table indexTable = indexToIndexTable.get(index);
+ for (Index index : indexes) {
+ // index.getDbName() is used as a default database, which is database of target table,
+ // if index.getIndexTableName() does not contain database name
+ String[] qualified = Utilities.getDbTableName(index.getDbName(), index.getIndexTableName());
+ Table indexTable = hive.getTable(qualified[0], qualified[1]);
// get partitions that match the spec
- List<Partition> matchingPartitions = hive.getPartitions(indexTable, partSpec);
- if (matchingPartitions == null || matchingPartitions.size() == 0) {
+ Partition matchingPartition = hive.getPartition(indexTable, partSpec, false);
+ if (matchingPartition == null) {
LOG.info("Index table " + indexTable + "did not contain built partition that matched " + partSpec);
return false;
} else if (!isIndexPartitionFresh(hive, index, part)) {
@@ -160,7 +117,7 @@ public final class IndexUtils {
}
/**
- * Check the index partitions on a parttioned table exist and are fresh
+ * Check the index partitions on a partitioned table exist and are fresh
*/
private static boolean isIndexPartitionFresh(Hive hive, Index index,
Partition part) throws HiveException {
@@ -187,7 +144,7 @@ public final class IndexUtils {
}
/**
- * Check that the indexes on the unpartioned table exist and are fresh
+ * Check that the indexes on the un-partitioned table exist and are fresh
*/
private static boolean isIndexTableFresh(Hive hive, List<Index> indexes, Table src)
throws HiveException {
@@ -227,8 +184,8 @@ public final class IndexUtils {
public static List<Index> getIndexes(Table baseTableMetaData, List<String> matchIndexTypes)
throws SemanticException {
List<Index> matchingIndexes = new ArrayList<Index>();
- List<Index> indexesOnTable = null;
+ List<Index> indexesOnTable;
try {
indexesOnTable = baseTableMetaData.getAllIndexes((short) -1); // get all indexes
} catch (HiveException e) {
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java?rev=1617633&r1=1617632&r2=1617633&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java Wed Aug 13 00:25:32 2014
@@ -26,6 +26,7 @@ import java.util.Stack;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.exec.HashTableDummyOperator;
import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
@@ -33,21 +34,28 @@ import org.apache.hadoop.hive.ql.exec.Op
import org.apache.hadoop.hive.ql.exec.OperatorFactory;
import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.exec.RowSchema;
+import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
import org.apache.hadoop.hive.ql.parse.GenTezProcContext;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.BaseWork;
+import org.apache.hadoop.hive.ql.plan.ColStatistics;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.HashTableDummyDesc;
+import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
+import org.apache.hadoop.hive.ql.plan.OpTraits;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.PlanUtils;
import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
+import org.apache.hadoop.hive.ql.plan.Statistics;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.plan.TezEdgeProperty;
import org.apache.hadoop.hive.ql.plan.TezEdgeProperty.EdgeType;
import org.apache.hadoop.hive.ql.plan.TezWork;
+import org.apache.hadoop.hive.ql.stats.StatsUtils;
public class ReduceSinkMapJoinProc implements NodeProcessor {
@@ -111,18 +119,59 @@ public class ReduceSinkMapJoinProc imple
if (pos == -1) {
throw new SemanticException("Cannot find position of parent in mapjoin");
}
- LOG.debug("Mapjoin "+mapJoinOp+", pos: "+pos+" --> "+parentWork.getName());
- mapJoinOp.getConf().getParentToInput().put(pos, parentWork.getName());
+ MapJoinDesc joinConf = mapJoinOp.getConf();
+ long keyCount = Long.MAX_VALUE, rowCount = Long.MAX_VALUE, bucketCount = 1;
+ Statistics stats = parentRS.getStatistics();
+ if (stats != null) {
+ keyCount = rowCount = stats.getNumRows();
+ if (keyCount <= 0) {
+ keyCount = rowCount = Long.MAX_VALUE;
+ }
+ ArrayList<String> keyCols = parentRS.getConf().getOutputKeyColumnNames();
+ if (keyCols != null && !keyCols.isEmpty()) {
+ // See if we can arrive at a smaller number using distinct stats from key columns.
+ long maxKeyCount = 1;
+ String prefix = Utilities.ReduceField.KEY.toString();
+ for (String keyCol : keyCols) {
+ ExprNodeDesc realCol = parentRS.getColumnExprMap().get(prefix + "." + keyCol);
+ ColStatistics cs = StatsUtils.getColStatisticsFromExpression(null, stats, realCol);
+ if (cs == null || cs.getCountDistint() <= 0) {
+ maxKeyCount = Long.MAX_VALUE;
+ break;
+ }
+ maxKeyCount *= cs.getCountDistint();
+ if (maxKeyCount >= keyCount) {
+ break;
+ }
+ }
+ keyCount = Math.min(maxKeyCount, keyCount);
+ }
+ if (joinConf.isBucketMapJoin()) {
+ OpTraits opTraits = mapJoinOp.getOpTraits();
+ bucketCount = (opTraits == null) ? -1 : opTraits.getNumBuckets();
+ if (bucketCount > 0) {
+ // We cannot obtain a better estimate without CustomPartitionVertex providing it
+ // to us somehow; in which case using statistics would be completely unnecessary.
+ keyCount /= bucketCount;
+ }
+ }
+ }
+ LOG.info("Mapjoin " + mapJoinOp + ", pos: " + pos + " --> " + parentWork.getName() + " ("
+ + keyCount + " keys estimated from " + rowCount + " rows, " + bucketCount + " buckets)");
+ joinConf.getParentToInput().put(pos, parentWork.getName());
+ if (keyCount != Long.MAX_VALUE) {
+ joinConf.getParentKeyCounts().put(pos, keyCount);
+ }
int numBuckets = -1;
EdgeType edgeType = EdgeType.BROADCAST_EDGE;
- if (mapJoinOp.getConf().isBucketMapJoin()) {
+ if (joinConf.isBucketMapJoin()) {
// disable auto parallelism for bucket map joins
parentRS.getConf().setAutoParallel(false);
- numBuckets = (Integer) mapJoinOp.getConf().getBigTableBucketNumMapping().values().toArray()[0];
- if (mapJoinOp.getConf().getCustomBucketMapJoin()) {
+ numBuckets = (Integer) joinConf.getBigTableBucketNumMapping().values().toArray()[0];
+ if (joinConf.getCustomBucketMapJoin()) {
edgeType = EdgeType.CUSTOM_EDGE;
} else {
edgeType = EdgeType.CUSTOM_SIMPLE_EDGE;
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyCtx.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyCtx.java?rev=1617633&r1=1617632&r2=1617633&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyCtx.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyCtx.java Wed Aug 13 00:25:32 2014
@@ -30,10 +30,9 @@ import java.util.Stack;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.metastore.api.Index;
-import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.FilterOperator;
import org.apache.hadoop.hive.ql.exec.GroupByOperator;
-import org.apache.hadoop.hive.ql.exec.SelectOperator;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
import org.apache.hadoop.hive.ql.lib.Dispatcher;
import org.apache.hadoop.hive.ql.lib.GraphWalker;
@@ -45,7 +44,6 @@ import org.apache.hadoop.hive.ql.lib.Rul
import org.apache.hadoop.hive.ql.lib.RuleRegExp;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.SemanticException;
-import org.apache.hadoop.hive.ql.plan.OperatorDesc;
/**
* RewriteCanApplyCtx class stores the context for the {@link RewriteCanApplyProcFactory}
@@ -84,7 +82,9 @@ public final class RewriteCanApplyCtx im
private Set<String> aggFuncColList = new LinkedHashSet<String>();
private final ParseContext parseContext;
+ private String alias;
private String baseTableName;
+ private String indexTableName;
private String aggFunction;
void resetCanApplyCtx(){
@@ -230,6 +230,14 @@ public final class RewriteCanApplyCtx im
this.aggFuncCnt = aggFuncCnt;
}
+ public String getAlias() {
+ return alias;
+ }
+
+ public void setAlias(String alias) {
+ this.alias = alias;
+ }
+
public String getBaseTableName() {
return baseTableName;
}
@@ -238,10 +246,26 @@ public final class RewriteCanApplyCtx im
this.baseTableName = baseTableName;
}
+ public String getIndexTableName() {
+ return indexTableName;
+ }
+
+ public void setIndexTableName(String indexTableName) {
+ this.indexTableName = indexTableName;
+ }
+
public ParseContext getParseContext() {
return parseContext;
}
+ public Set<String> getAllColumns() {
+ Set<String> allColumns = new LinkedHashSet<String>(selectColumnsList);
+ allColumns.addAll(predicateColumnsList);
+ allColumns.addAll(gbKeyNameList);
+ allColumns.addAll(aggFuncColList);
+ return allColumns;
+ }
+
/**
* This method walks all the nodes starting from topOp TableScanOperator node
@@ -255,15 +279,13 @@ public final class RewriteCanApplyCtx im
* @param topOp
* @throws SemanticException
*/
- void populateRewriteVars(Operator<? extends OperatorDesc> topOp)
+ void populateRewriteVars(TableScanOperator topOp)
throws SemanticException{
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
opRules.put(new RuleRegExp("R1", FilterOperator.getOperatorName() + "%"),
- RewriteCanApplyProcFactory.canApplyOnFilterOperator());
+ RewriteCanApplyProcFactory.canApplyOnFilterOperator(topOp));
opRules.put(new RuleRegExp("R2", GroupByOperator.getOperatorName() + "%"),
- RewriteCanApplyProcFactory.canApplyOnGroupByOperator());
- opRules.put(new RuleRegExp("R3", SelectOperator.getOperatorName() + "%"),
- RewriteCanApplyProcFactory.canApplyOnSelectOperator());
+ RewriteCanApplyProcFactory.canApplyOnGroupByOperator(topOp));
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
@@ -364,5 +386,4 @@ public final class RewriteCanApplyCtx im
}
return true;
}
-
}
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyProcFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyProcFactory.java?rev=1617633&r1=1617632&r2=1617633&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyProcFactory.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyProcFactory.java Wed Aug 13 00:25:32 2014
@@ -18,19 +18,9 @@
package org.apache.hadoop.hive.ql.optimizer.index;
-import java.util.Iterator;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Stack;
-
-import org.apache.hadoop.hive.ql.exec.ColumnInfo;
-import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.exec.FilterOperator;
import org.apache.hadoop.hive.ql.exec.GroupByOperator;
-import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.exec.RowSchema;
-import org.apache.hadoop.hive.ql.exec.SelectOperator;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
@@ -39,10 +29,13 @@ import org.apache.hadoop.hive.ql.plan.Ag
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.plan.FilterDesc;
import org.apache.hadoop.hive.ql.plan.GroupByDesc;
-import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+
+import java.util.List;
+import java.util.Stack;
/**
* Factory of methods used by {@link RewriteGBUsingIndex}
@@ -50,43 +43,46 @@ import org.apache.hadoop.hive.ql.plan.Op
*
*/
public final class RewriteCanApplyProcFactory {
- private static RewriteCanApplyCtx canApplyCtx = null;
-
- private RewriteCanApplyProcFactory(){
- //this prevents the class from getting instantiated
- }
/**
* Check for conditions in FilterOperator that do not meet rewrite criteria.
*/
private static class CheckFilterProc implements NodeProcessor {
+
+ private TableScanOperator topOp;
+
+ public CheckFilterProc(TableScanOperator topOp) {
+ this.topOp = topOp;
+ }
+
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
Object... nodeOutputs) throws SemanticException {
FilterOperator operator = (FilterOperator)nd;
- canApplyCtx = (RewriteCanApplyCtx)ctx;
- FilterDesc conf = (FilterDesc)operator.getConf();
+ RewriteCanApplyCtx canApplyCtx = (RewriteCanApplyCtx)ctx;
+ FilterDesc conf = operator.getConf();
//The filter operator should have a predicate of ExprNodeGenericFuncDesc type.
//This represents the comparison operator
- ExprNodeGenericFuncDesc oldengfd = (ExprNodeGenericFuncDesc) conf.getPredicate();
+ ExprNodeDesc oldengfd = conf.getPredicate();
if(oldengfd == null){
canApplyCtx.setWhrClauseColsFetchException(true);
+ return null;
}
- //The predicate should have valid left and right columns
- List<String> colList = oldengfd.getCols();
- if(colList == null || colList.size() == 0){
+ ExprNodeDesc backtrack = ExprNodeDescUtils.backtrack(oldengfd, operator, topOp);
+ if (backtrack == null) {
canApplyCtx.setWhrClauseColsFetchException(true);
+ return null;
}
//Add the predicate columns to RewriteCanApplyCtx's predColRefs list to check later
//if index keys contain all filter predicate columns and vice-a-versa
- for (String col : colList) {
+ for (String col : backtrack.getCols()) {
canApplyCtx.getPredicateColumnsList().add(col);
}
return null;
}
}
- public static CheckFilterProc canApplyOnFilterOperator() {
- return new CheckFilterProc();
+ public static CheckFilterProc canApplyOnFilterOperator(TableScanOperator topOp) {
+ return new CheckFilterProc(topOp);
}
/**
@@ -95,10 +91,16 @@ public final class RewriteCanApplyProcFa
*/
private static class CheckGroupByProc implements NodeProcessor {
+ private TableScanOperator topOp;
+
+ public CheckGroupByProc(TableScanOperator topOp) {
+ this.topOp = topOp;
+ }
+
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
Object... nodeOutputs) throws SemanticException {
GroupByOperator operator = (GroupByOperator)nd;
- canApplyCtx = (RewriteCanApplyCtx)ctx;
+ RewriteCanApplyCtx canApplyCtx = (RewriteCanApplyCtx)ctx;
//for each group-by clause in query, only one GroupByOperator of the
//GBY-RS-GBY sequence is stored in getGroupOpToInputTables
//we need to process only this operator
@@ -107,7 +109,7 @@ public final class RewriteCanApplyProcFa
!canApplyCtx.isQueryHasGroupBy()){
canApplyCtx.setQueryHasGroupBy(true);
- GroupByDesc conf = (GroupByDesc) operator.getConf();
+ GroupByDesc conf = operator.getConf();
List<AggregationDesc> aggrList = conf.getAggregators();
if(aggrList != null && aggrList.size() > 0){
for (AggregationDesc aggregationDesc : aggrList) {
@@ -119,40 +121,39 @@ public final class RewriteCanApplyProcFa
String aggFunc = aggregationDesc.getGenericUDAFName();
if(!("count".equals(aggFunc))){
canApplyCtx.setAggFuncIsNotCount(true);
- }else{
- List<ExprNodeDesc> para = aggregationDesc.getParameters();
- //for a valid aggregation, it needs to have non-null parameter list
- if(para == null){
- canApplyCtx.setAggFuncColsFetchException(true);
- }else if(para.size() == 0){
- //count(*) case
- canApplyCtx.setCountOnAllCols(true);
- canApplyCtx.setAggFunction("_count_of_all");
- }else{
- assert para.size()==1;
- for(int i=0; i< para.size(); i++){
- ExprNodeDesc expr = para.get(i);
- if(expr instanceof ExprNodeColumnDesc){
- //Add the columns to RewriteCanApplyCtx's selectColumnsList list
- //to check later if index keys contain all select clause columns
- //and vice-a-versa. We get the select column 'actual' names only here
- //if we have a agg func along with group-by
- //SelectOperator has internal names in its colList data structure
- canApplyCtx.getSelectColumnsList().add(
- ((ExprNodeColumnDesc) expr).getColumn());
- //Add the columns to RewriteCanApplyCtx's aggFuncColList list to check later
- //if columns contained in agg func are index key columns
- canApplyCtx.getAggFuncColList().add(
- ((ExprNodeColumnDesc) expr).getColumn());
- canApplyCtx.setAggFunction("_count_of_" +
- ((ExprNodeColumnDesc) expr).getColumn() + "");
- }else if(expr instanceof ExprNodeConstantDesc){
- //count(1) case
- canApplyCtx.setCountOfOne(true);
- canApplyCtx.setAggFunction("_count_of_1");
- }
- }
+ return false;
+ }
+ List<ExprNodeDesc> para = aggregationDesc.getParameters();
+ //for a valid aggregation, it needs to have non-null parameter list
+ if (para == null) {
+ canApplyCtx.setAggFuncColsFetchException(true);
+ } else if (para.size() == 0) {
+ //count(*) case
+ canApplyCtx.setCountOnAllCols(true);
+ canApplyCtx.setAggFunction("_count_of_all");
+ } else if (para.size() == 1) {
+ ExprNodeDesc expr = ExprNodeDescUtils.backtrack(para.get(0), operator, topOp);
+ if (expr instanceof ExprNodeColumnDesc){
+ //Add the columns to RewriteCanApplyCtx's selectColumnsList list
+ //to check later if index keys contain all select clause columns
+ //and vice-a-versa. We get the select column 'actual' names only here
+ //if we have a agg func along with group-by
+ //SelectOperator has internal names in its colList data structure
+ canApplyCtx.getSelectColumnsList().add(
+ ((ExprNodeColumnDesc) expr).getColumn());
+ //Add the columns to RewriteCanApplyCtx's aggFuncColList list to check later
+ //if columns contained in agg func are index key columns
+ canApplyCtx.getAggFuncColList().add(
+ ((ExprNodeColumnDesc) expr).getColumn());
+ canApplyCtx.setAggFunction("_count_of_" +
+ ((ExprNodeColumnDesc) expr).getColumn() + "");
+ } else if(expr instanceof ExprNodeConstantDesc) {
+ //count(1) case
+ canApplyCtx.setCountOfOne(true);
+ canApplyCtx.setAggFunction("_count_of_1");
}
+ } else {
+ throw new SemanticException("Invalid number of arguments for count");
}
}
}
@@ -163,13 +164,13 @@ public final class RewriteCanApplyProcFa
canApplyCtx.setGbyKeysFetchException(true);
}
for (ExprNodeDesc expr : keyList) {
- checkExpression(expr);
+ checkExpression(canApplyCtx, expr);
}
}
return null;
}
- private void checkExpression(ExprNodeDesc expr){
+ private void checkExpression(RewriteCanApplyCtx canApplyCtx, ExprNodeDesc expr){
if(expr instanceof ExprNodeColumnDesc){
//Add the group-by keys to RewriteCanApplyCtx's gbKeyNameList list to check later
//if all keys are from index columns
@@ -182,59 +183,14 @@ public final class RewriteCanApplyProcFa
canApplyCtx.getGbKeyNameList().addAll(expr.getCols());
canApplyCtx.getSelectColumnsList().add(((ExprNodeColumnDesc) childExpr).getColumn());
}else if(childExpr instanceof ExprNodeGenericFuncDesc){
- checkExpression(childExpr);
+ checkExpression(canApplyCtx, childExpr);
}
}
}
}
}
-
- public static CheckGroupByProc canApplyOnGroupByOperator() {
- return new CheckGroupByProc();
+ public static CheckGroupByProc canApplyOnGroupByOperator(TableScanOperator topOp) {
+ return new CheckGroupByProc(topOp);
}
-
-
- /**
- * Check for conditions in SelectOperator that do not meet rewrite criteria.
- */
- private static class CheckSelectProc implements NodeProcessor {
- public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
- Object... nodeOutputs) throws SemanticException {
- SelectOperator operator = (SelectOperator)nd;
- canApplyCtx = (RewriteCanApplyCtx)ctx;
-
- List<Operator<? extends OperatorDesc>> childrenList = operator.getChildOperators();
- Operator<? extends OperatorDesc> child = childrenList.get(0);
- if(child instanceof FileSinkOperator){
- Map<String, String> internalToAlias = new LinkedHashMap<String, String>();
- RowSchema rs = operator.getSchema();
- //to get the internal to alias mapping
- List<ColumnInfo> sign = rs.getSignature();
- for (ColumnInfo columnInfo : sign) {
- internalToAlias.put(columnInfo.getInternalName(), columnInfo.getAlias());
- }
-
- //if FilterOperator predicate has internal column names,
- //we need to retrieve the 'actual' column names to
- //check if index keys contain all filter predicate columns and vice-a-versa
- Iterator<String> predItr = canApplyCtx.getPredicateColumnsList().iterator();
- while(predItr.hasNext()){
- String predCol = predItr.next();
- String newPredCol = "";
- if(internalToAlias.get(predCol) != null){
- newPredCol = internalToAlias.get(predCol);
- canApplyCtx.getPredicateColumnsList().remove(predCol);
- canApplyCtx.getPredicateColumnsList().add(newPredCol);
- }
- }
- }
- return null;
- }
- }
-
- public static CheckSelectProc canApplyOnSelectOperator() {
- return new CheckSelectProc();
- }
-
}