You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by br...@apache.org on 2014/08/13 02:25:34 UTC
svn commit: r1617633 [2/6] - in /hive/branches/spark: ./ ant/ ant/src/org/apache/hadoop/hive/ant/ beeline/ beeline/src/java/org/apache/hive/beeline/ beeline/src/main/resources/ cli/src/java/org/apache/hadoop/hive/cli/ common/src/java/org/apache/hadoop/...

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java?rev=1617633&r1=1617632&r2=1617633&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java Wed Aug 13 00:25:32 2014
@@ -80,6 +80,7 @@ import org.apache.hadoop.hive.ql.plan.Ex
 import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc;
 import org.apache.hadoop.hive.ql.udf.SettableUDF;
 import org.apache.hadoop.hive.ql.udf.UDFConv;
 import org.apache.hadoop.hive.ql.udf.UDFHex;
@@ -323,10 +324,12 @@ public class VectorizationContext {
         ve = getGenericUdfVectorExpression(expr.getGenericUDF(),
             childExpressions, mode, exprDesc.getTypeInfo());
       }
+    } else if (exprDesc instanceof ExprNodeNullDesc) {
+    	ve = getConstantVectorExpression(null, exprDesc.getTypeInfo(), mode);
     } else if (exprDesc instanceof ExprNodeConstantDesc) {
       ve = getConstantVectorExpression(((ExprNodeConstantDesc) exprDesc).getValue(), exprDesc.getTypeInfo(),
           mode);
-    }
+    } 
     if (ve == null) {
       throw new HiveException("Could not vectorize expression: "+exprDesc.getName());
     }
@@ -410,8 +413,8 @@ public class VectorizationContext {
         }
       }
     } else {
-      for (ExprNodeDesc child : children) {
-        ExprNodeDesc castExpression = getImplicitCastExpression(genericUDF, child, commonType);
+      for (ExprNodeDesc child : children) {    	
+    	ExprNodeDesc castExpression = getImplicitCastExpression(genericUDF, child, commonType);
         if (castExpression != null) {
           atleastOneCastNeeded = true;
           childrenWithCasts.add(castExpression);
@@ -652,69 +655,70 @@ public class VectorizationContext {
   }
 
   /**
-   * Handles only the special case of unary operators on a constant.
+   * Handles only the special cases of cast/+ve/-ve operator on a constant.
    * @param exprDesc
-   * @return The same expression if no folding done, else return the constant
+   * @return The same expression if no evaluation done, else return the constant
    *         expression.
    * @throws HiveException
    */
-  ExprNodeDesc foldConstantsForUnaryExpression(ExprNodeDesc exprDesc) throws HiveException {
-    if (!(exprDesc instanceof ExprNodeGenericFuncDesc)) {
-      return exprDesc;
-    }
-    
-    if (exprDesc.getChildren() == null || (exprDesc.getChildren().size() != 1) ) {
-      return exprDesc;
-    }
-
-    ExprNodeConstantDesc foldedChild = null;
-    if (!( exprDesc.getChildren().get(0) instanceof ExprNodeConstantDesc)) {
-
-      // try recursive folding
-      ExprNodeDesc expr = foldConstantsForUnaryExpression(exprDesc.getChildren().get(0));
-      if (expr instanceof ExprNodeConstantDesc) {
-        foldedChild = (ExprNodeConstantDesc) expr;
-      }
-    } else {
-      foldedChild = (ExprNodeConstantDesc) exprDesc.getChildren().get(0);
-    }
-
-    if (foldedChild == null) {
-      return exprDesc;
-    }
-
-    ObjectInspector childoi = foldedChild.getWritableObjectInspector();
-    GenericUDF gudf = ((ExprNodeGenericFuncDesc) exprDesc).getGenericUDF();
-
-    if (gudf instanceof GenericUDFOPNegative || gudf instanceof GenericUDFOPPositive
-        || castExpressionUdfs.contains(gudf.getClass())
-        || ((gudf instanceof GenericUDFBridge)
-            && castExpressionUdfs.contains(((GenericUDFBridge) gudf).getUdfClass()))) {
-      ExprNodeEvaluator<?> evaluator = ExprNodeEvaluatorFactory.get(exprDesc);
-      ObjectInspector output = evaluator.initialize(childoi);
-      Object constant = evaluator.evaluate(null);
-      Object java = ObjectInspectorUtils.copyToStandardJavaObject(constant, output);  
-      return new ExprNodeConstantDesc(exprDesc.getTypeInfo(), java);
-     }
-
-    return exprDesc;
+  ExprNodeDesc evaluateCastOnConstants(ExprNodeDesc exprDesc) throws HiveException {
+	  if (!(exprDesc instanceof ExprNodeGenericFuncDesc)) {
+		  return exprDesc;
+	  }
+      
+	  if (exprDesc.getChildren() == null || (exprDesc.getChildren().size() != 1) ) {
+		  return exprDesc;
+	  }
+  
+	  ExprNodeConstantDesc foldedChild = null;
+	  if (!( exprDesc.getChildren().get(0) instanceof ExprNodeConstantDesc)) {
+		  
+		  // try recursive folding
+		  ExprNodeDesc expr = evaluateCastOnConstants(exprDesc.getChildren().get(0));
+		  if (expr instanceof ExprNodeConstantDesc) {
+			  foldedChild = (ExprNodeConstantDesc) expr;
+		  }
+	  } else {
+		  foldedChild = (ExprNodeConstantDesc) exprDesc.getChildren().get(0);
+	  }
+  
+	  if (foldedChild == null) {
+		  return exprDesc;
+	  }
+  
+	  ObjectInspector childoi = foldedChild.getWritableObjectInspector();
+	  GenericUDF gudf = ((ExprNodeGenericFuncDesc) exprDesc).getGenericUDF();
+      
+	  // Only evaluate +ve/-ve or cast on constant or recursive casting.
+	  if (gudf instanceof GenericUDFOPNegative || gudf instanceof GenericUDFOPPositive ||
+			  castExpressionUdfs.contains(gudf.getClass())
+			  || ((gudf instanceof GenericUDFBridge)
+					  && castExpressionUdfs.contains(((GenericUDFBridge) gudf).getUdfClass()))) {
+		  ExprNodeEvaluator<?> evaluator = ExprNodeEvaluatorFactory.get(exprDesc);
+		  ObjectInspector output = evaluator.initialize(childoi);
+		  Object constant = evaluator.evaluate(null);
+		  Object java = ObjectInspectorUtils.copyToStandardJavaObject(constant, output);  
+		  return new ExprNodeConstantDesc(exprDesc.getTypeInfo(), java);
+	  }
+  
+	  return exprDesc;
   }
-
-  /* Fold simple unary expressions in all members of the input list and return new list
+  
+  /* For cast on constant operator in all members of the input list and return new list
    * containing results.
    */
-  private List<ExprNodeDesc> foldConstantsForUnaryExprs(List<ExprNodeDesc> childExpr)
-      throws HiveException {
-    List<ExprNodeDesc> constantFoldedChildren = new ArrayList<ExprNodeDesc>();
-    if (childExpr != null) {
-      for (ExprNodeDesc expr : childExpr) {
-        expr = this.foldConstantsForUnaryExpression(expr);
-        constantFoldedChildren.add(expr);
-      }
-    }
-    return constantFoldedChildren;
+  private List<ExprNodeDesc> evaluateCastOnConstants(List<ExprNodeDesc> childExpr)
+		  throws HiveException {
+	  List<ExprNodeDesc> evaluatedChildren = new ArrayList<ExprNodeDesc>();
+	  if (childExpr != null) {
+        for (ExprNodeDesc expr : childExpr) {
+        	expr = this.evaluateCastOnConstants(expr);
+        	evaluatedChildren.add(expr);
+        }
+	  }
+	  return evaluatedChildren;
   }
-
+      
   private VectorExpression getConstantVectorExpression(Object constantValue, TypeInfo typeInfo,
       Mode mode) throws HiveException {
     String type =  typeInfo.getTypeName();
@@ -903,8 +907,9 @@ public class VectorizationContext {
   private VectorExpression getGenericUdfVectorExpression(GenericUDF udf,
       List<ExprNodeDesc> childExpr, Mode mode, TypeInfo returnType) throws HiveException {
 
-    List<ExprNodeDesc> constantFoldedChildren = foldConstantsForUnaryExprs(childExpr);
-    childExpr = constantFoldedChildren;
+	List<ExprNodeDesc> castedChildren = evaluateCastOnConstants(childExpr);
+	childExpr = castedChildren;	  
+	  
     //First handle special cases
     if (udf instanceof GenericUDFBetween) {
       return getBetweenFilterExpression(childExpr, mode, returnType);
@@ -928,15 +933,15 @@ public class VectorizationContext {
       }
     } else if (udf instanceof GenericUDFToDecimal) {
       return getCastToDecimal(childExpr, returnType);
-    }
-
+    } 
+    
     // Now do a general lookup
     Class<?> udfClass = udf.getClass();
     if (udf instanceof GenericUDFBridge) {
       udfClass = ((GenericUDFBridge) udf).getUdfClass();
     }
 
-    VectorExpression ve = getVectorExpressionForUdf(udfClass, constantFoldedChildren, mode, returnType);
+    VectorExpression ve = getVectorExpressionForUdf(udfClass, castedChildren, mode, returnType);
 
     if (ve == null) {
       throw new HiveException("Udf: "+udf.getClass().getSimpleName()+", is not supported");
@@ -998,7 +1003,7 @@ public class VectorizationContext {
       }
     }
   }
-
+      
   /**
    * Create a filter or boolean-valued expression for column IN ( <list-of-constants> )
    */
@@ -1006,13 +1011,11 @@ public class VectorizationContext {
       throws HiveException {
     ExprNodeDesc colExpr = childExpr.get(0);
 
-    TypeInfo colTypeInfo = colExpr.getTypeInfo();
     String colType = colExpr.getTypeString();
 
     // prepare arguments for createVectorExpression
-    List<ExprNodeDesc> childrenForInList =
-        foldConstantsForUnaryExprs(childExpr.subList(1, childExpr.size()));
-
+    List<ExprNodeDesc> childrenForInList =  evaluateCastOnConstants(childExpr.subList(1, childExpr.size()));	
+    
     /* This method assumes that the IN list has no NULL entries. That is enforced elsewhere,
      * in the Vectorizer class. If NULL is passed in as a list entry, behavior is not defined.
      * If in the future, NULL values are allowed in the IN list, be sure to handle 3-valued
@@ -1107,16 +1110,116 @@ public class VectorizationContext {
       return getCastToString(childExpr, returnType);
     }
     return null;
-  }
-
+  } 
+  
+  private Decimal128 castConstantToDecimal(Object scalar, TypeInfo type) throws HiveException {
+	  PrimitiveTypeInfo ptinfo = (PrimitiveTypeInfo) type;
+	  String typename = type.getTypeName();
+	  Decimal128 d = new Decimal128();
+	  int scale = HiveDecimalUtils.getScaleForType(ptinfo);
+	  switch (ptinfo.getPrimitiveCategory()) {
+	  case FLOAT:
+		  float floatVal = ((Float) scalar).floatValue();
+		  d.update(floatVal, (short) scale);
+		  break;
+	  case DOUBLE:
+		  double doubleVal = ((Double) scalar).doubleValue();
+		  d.update(doubleVal, (short) scale);
+		  break;
+	  case BYTE:
+		  byte byteVal = ((Byte) scalar).byteValue();
+		  d.update(byteVal, (short) scale);
+		  break;
+	  case SHORT:
+		  short shortVal = ((Short) scalar).shortValue();
+		  d.update(shortVal, (short) scale);
+		  break;
+	  case INT:
+		  int intVal = ((Integer) scalar).intValue();
+		  d.update(intVal, (short) scale);
+		  break;
+	  case LONG:
+		  long longVal = ((Long) scalar).longValue();
+		  d.update(longVal, (short) scale);
+		  break;
+	  case DECIMAL:
+		  HiveDecimal decimalVal = (HiveDecimal) scalar;
+		  d.update(decimalVal.unscaledValue(), (short) scale);
+		  break;
+	  default:
+		  throw new HiveException("Unsupported type "+typename+" for cast to Decimal128");
+	  }
+	  return d;
+  }
+
+  private String castConstantToString(Object scalar, TypeInfo type) throws HiveException {
+	  PrimitiveTypeInfo ptinfo = (PrimitiveTypeInfo) type;
+	  String typename = type.getTypeName();
+	  switch (ptinfo.getPrimitiveCategory()) {
+	  case FLOAT:
+	  case DOUBLE:
+	  case BYTE:
+	  case SHORT:
+	  case INT:
+	  case LONG:
+		  return ((Number) scalar).toString();
+	  case DECIMAL:
+		  HiveDecimal decimalVal = (HiveDecimal) scalar;
+		  return decimalVal.toString();
+	  default:
+		  throw new HiveException("Unsupported type "+typename+" for cast to String");
+	  }
+  }
+
+  private Double castConstantToDouble(Object scalar, TypeInfo type) throws HiveException {
+	  PrimitiveTypeInfo ptinfo = (PrimitiveTypeInfo) type;
+	  String typename = type.getTypeName();
+	  switch (ptinfo.getPrimitiveCategory()) {
+	  case FLOAT:
+	  case DOUBLE:
+	  case BYTE:
+	  case SHORT:
+	  case INT:
+	  case LONG:
+		  return ((Number) scalar).doubleValue();
+	  case DECIMAL:
+		  HiveDecimal decimalVal = (HiveDecimal) scalar;
+		  return decimalVal.doubleValue();
+	  default:
+		  throw new HiveException("Unsupported type "+typename+" for cast to Double");
+	  }
+  }  
+
+  private Long castConstantToLong(Object scalar, TypeInfo type) throws HiveException {
+	  PrimitiveTypeInfo ptinfo = (PrimitiveTypeInfo) type;
+	  String typename = type.getTypeName();
+	  switch (ptinfo.getPrimitiveCategory()) {
+	  case FLOAT:
+	  case DOUBLE:
+	  case BYTE:
+	  case SHORT:
+	  case INT:
+	  case LONG:
+		  return ((Number) scalar).longValue();
+	  case DECIMAL:
+		  HiveDecimal decimalVal = (HiveDecimal) scalar;
+		  return decimalVal.longValue();
+	  default:
+		  throw new HiveException("Unsupported type "+typename+" for cast to Long");
+	  }
+  }    
+  
   private VectorExpression getCastToDecimal(List<ExprNodeDesc> childExpr, TypeInfo returnType)
       throws HiveException {
     ExprNodeDesc child = childExpr.get(0);
     String inputType = childExpr.get(0).getTypeString();
     if (child instanceof ExprNodeConstantDesc) {
-      // Don't do constant folding here.  Wait until the optimizer is changed to do it.
-      // Family of related JIRAs: HIVE-7421, HIVE-7422, and HIVE-7424.
-      return null;
+     // Return a constant vector expression
+      Object constantValue = ((ExprNodeConstantDesc) child).getValue();
+      Decimal128 decimalValue = castConstantToDecimal(constantValue, child.getTypeInfo());
+      return getConstantVectorExpression(decimalValue, returnType, Mode.PROJECTION);    	
+    } else if (child instanceof ExprNodeNullDesc) {
+    	return getConstantVectorExpression(null, returnType, Mode.PROJECTION); 
     }
     if (isIntFamily(inputType)) {
       return createVectorExpression(CastLongToDecimal.class, childExpr, Mode.PROJECTION, returnType);
@@ -1131,16 +1234,19 @@ public class VectorizationContext {
       return createVectorExpression(CastTimestampToDecimal.class, childExpr, Mode.PROJECTION, returnType);
     }
     throw new HiveException("Unhandled cast input type: " + inputType);
-  }
-
+  }  
+  
   private VectorExpression getCastToString(List<ExprNodeDesc> childExpr, TypeInfo returnType)
       throws HiveException {
     ExprNodeDesc child = childExpr.get(0);
     String inputType = childExpr.get(0).getTypeString();
     if (child instanceof ExprNodeConstantDesc) {
-      // Don't do constant folding here.  Wait until the optimizer is changed to do it.
-      // Family of related JIRAs: HIVE-7421, HIVE-7422, and HIVE-7424.
-      return null;
+        // Return a constant vector expression
+        Object constantValue = ((ExprNodeConstantDesc) child).getValue();
+        String strValue = castConstantToString(constantValue, child.getTypeInfo());
+        return getConstantVectorExpression(strValue, returnType, Mode.PROJECTION); 
+    } else if (child instanceof ExprNodeNullDesc) {
+    	return getConstantVectorExpression(null, returnType, Mode.PROJECTION); 
     }
     if (inputType.equals("boolean")) {
       // Boolean must come before the integer family. It's a special case.
@@ -1164,9 +1270,12 @@ public class VectorizationContext {
     ExprNodeDesc child = childExpr.get(0);
     String inputType = childExpr.get(0).getTypeString();
     if (child instanceof ExprNodeConstantDesc) {
-      // Don't do constant folding here.  Wait until the optimizer is changed to do it.
-      // Family of related JIRAs: HIVE-7421, HIVE-7422, and HIVE-7424.
-      return null;
+        // Return a constant vector expression
+        Object constantValue = ((ExprNodeConstantDesc) child).getValue();
+        Double doubleValue = castConstantToDouble(constantValue, child.getTypeInfo());
+        return getConstantVectorExpression(doubleValue, returnType, Mode.PROJECTION);     	
+    } else if (child instanceof ExprNodeNullDesc) {
+    	return getConstantVectorExpression(null, returnType, Mode.PROJECTION); 
     }
     if (isIntFamily(inputType)) {
       return createVectorExpression(CastLongToDouble.class, childExpr, Mode.PROJECTION, returnType);
@@ -1191,6 +1300,8 @@ public class VectorizationContext {
       // Don't do constant folding here.  Wait until the optimizer is changed to do it.
       // Family of related JIRAs: HIVE-7421, HIVE-7422, and HIVE-7424.
       return null;
+    } else if (child instanceof ExprNodeNullDesc) {
+    	return getConstantVectorExpression(null, TypeInfoFactory.booleanTypeInfo, Mode.PROJECTION); 
     }
     // Long and double are handled using descriptors, string needs to be specially handled.
     if (inputType.equals("string")) {
@@ -1215,9 +1326,12 @@ public class VectorizationContext {
     ExprNodeDesc child = childExpr.get(0);
     String inputType = childExpr.get(0).getTypeString();
     if (child instanceof ExprNodeConstantDesc) {
-      // Don't do constant folding here.  Wait until the optimizer is changed to do it.
-      // Family of related JIRAs: HIVE-7421, HIVE-7422, and HIVE-7424.
-      return null;
+        // Return a constant vector expression
+        Object constantValue = ((ExprNodeConstantDesc) child).getValue();
+        Long longValue = castConstantToLong(constantValue, child.getTypeInfo());
+        return getConstantVectorExpression(longValue, TypeInfoFactory.longTypeInfo, Mode.PROJECTION);    	
+    } else if (child instanceof ExprNodeNullDesc) {
+    	return getConstantVectorExpression(null, TypeInfoFactory.longTypeInfo, Mode.PROJECTION); 
     }
     // Float family, timestamp are handled via descriptor based lookup, int family needs
     // special handling.
@@ -1281,7 +1395,7 @@ public class VectorizationContext {
     String colType = commonType.getTypeName();
 
     // prepare arguments for createVectorExpression
-    List<ExprNodeDesc> childrenAfterNot = foldConstantsForUnaryExprs(castChildren);
+    List<ExprNodeDesc> childrenAfterNot = evaluateCastOnConstants(castChildren);
 
     // determine class
     Class<?> cl = null;

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java?rev=1617633&r1=1617632&r2=1617633&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ConstantVectorExpression.java Wed Aug 13 00:25:32 2014
@@ -187,13 +187,14 @@ public class ConstantVectorExpression ex
 
   public void setTypeString(String typeString) {
     this.outputType = typeString;
-    if ("string".equalsIgnoreCase(typeString)) {
+    if (VectorizationContext.isStringFamily(typeString)) {
       this.type = Type.BYTES;
-    } else if ("double".equalsIgnoreCase(typeString)) {
+    } else if (VectorizationContext.isFloatFamily(typeString)) {
       this.type = Type.DOUBLE;
-    } else if (VectorizationContext.decimalTypePattern.matcher(typeString).matches()){
+    } else if (VectorizationContext.isDecimalFamily(typeString)){
       this.type = Type.DECIMAL;
     } else {
+      // everything else that does not belong to string, double, decimal is treated as long.	
       this.type = Type.LONG;
     }
   }

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java?rev=1617633&r1=1617632&r2=1617633&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java Wed Aug 13 00:25:32 2014
@@ -50,25 +50,12 @@ import org.apache.hadoop.hive.serde2.obj
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveVarcharObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableIntObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableLongObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveVarcharObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableTimestampObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableShortObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableStringObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableTimestampObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.VoidObjectInspector;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
-import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo;
-import org.apache.hadoop.io.BooleanWritable;
-import org.apache.hadoop.io.FloatWritable;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
 
 /**
  * VectorExpressionWritableFactory helper class for generating VectorExpressionWritable objects.
@@ -364,7 +351,6 @@ public final class VectorExpressionWrite
      */
     public static VectorExpressionWriter genVectorExpressionWritable(ExprNodeDesc nodeDesc)
       throws HiveException {
-      String nodeType = nodeDesc.getTypeString();
       ObjectInspector objectInspector = nodeDesc.getWritableObjectInspector();
       if (null == objectInspector) {
         objectInspector = TypeInfoUtils
@@ -408,6 +394,9 @@ public final class VectorExpressionWrite
           case LONG:
             return genVectorExpressionWritableLong(
                 (SettableLongObjectInspector) fieldObjInspector);
+          case VOID:
+              return genVectorExpressionWritableVoid(
+                  (VoidObjectInspector) fieldObjInspector);        	  
           case BINARY:
             return genVectorExpressionWritableBinary(
                 (SettableBinaryObjectInspector) fieldObjInspector);
@@ -722,6 +711,39 @@ public final class VectorExpressionWrite
     }.init(fieldObjInspector);
   }
 
+  private static VectorExpressionWriter genVectorExpressionWritableVoid(
+	      VoidObjectInspector fieldObjInspector) throws HiveException {
+	    return new VectorExpressionWriterLong() {
+	      private Object obj;
+	      
+	      public VectorExpressionWriter init(VoidObjectInspector objInspector) 
+	          throws HiveException {
+	        super.init(objInspector);
+	        this.obj = initValue(null);
+	        return this;
+	      }
+	      
+	      @Override
+	      public Object writeValue(long value) throws HiveException {
+	        return this.obj;
+	      }
+	      
+	      @Override
+	      public Object setValue(Object field, long value) throws HiveException {
+	        if (null == field) {
+	          field = initValue(null);
+	        }
+	        return field;
+	      }
+
+	      @Override
+	      public Object initValue(Object ignored) {
+	        return ((VoidObjectInspector) this.objectInspector).copyObject(null);
+	      }
+	    }.init(fieldObjInspector);
+	  }
+  
+  
   private static VectorExpressionWriter genVectorExpressionWritableInt(
       SettableIntObjectInspector fieldObjInspector) throws HiveException {
     return new VectorExpressionWriterLong() {

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/index/AggregateIndexHandler.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/index/AggregateIndexHandler.java?rev=1617633&r1=1617632&r2=1617633&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/index/AggregateIndexHandler.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/index/AggregateIndexHandler.java Wed Aug 13 00:25:32 2014
@@ -23,7 +23,6 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
-import org.apache.hadoop.hive.common.StatsSetupConst;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.Index;
@@ -46,12 +45,9 @@ import org.apache.hadoop.hive.ql.plan.Pa
  */
 public class AggregateIndexHandler extends CompactIndexHandler {
 
-  private static Index index = null;
-
     @Override
-    public void analyzeIndexDefinition(Table baseTable, Index idx,
+    public void analyzeIndexDefinition(Table baseTable, Index index,
         Table indexTable) throws HiveException {
-      index = idx;
       StorageDescriptor storageDesc = index.getSd();
       if (this.usesIndexTable() && indexTable != null) {
         StorageDescriptor indexTableSd = storageDesc.deepCopy();
@@ -92,10 +88,11 @@ public class AggregateIndexHandler exten
     @Override
     protected Task<?> getIndexBuilderMapRedTask(Set<ReadEntity> inputs,
         Set<WriteEntity> outputs,
-        List<FieldSchema> indexField, boolean partitioned,
+        Index index, boolean partitioned,
         PartitionDesc indexTblPartDesc, String indexTableName,
         PartitionDesc baseTablePartDesc, String baseTableName, String dbName) {
 
+      List<FieldSchema> indexField = index.getSd().getCols();
       String indexCols = HiveUtils.getUnparsedColumnNamesFromFieldSchema(indexField);
 
       //form a new insert overwrite query.

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/index/TableBasedIndexHandler.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/index/TableBasedIndexHandler.java?rev=1617633&r1=1617632&r2=1617633&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/index/TableBasedIndexHandler.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/index/TableBasedIndexHandler.java Wed Aug 13 00:25:32 2014
@@ -29,6 +29,7 @@ import org.apache.hadoop.conf.Configurat
 import org.apache.hadoop.hive.common.StatsSetupConst;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.Index;
 import org.apache.hadoop.hive.ql.exec.Task;
 import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.hooks.ReadEntity;
@@ -64,7 +65,7 @@ public abstract class TableBasedIndexHan
       if (!baseTbl.isPartitioned()) {
         // the table does not have any partition, then create index for the
         // whole table
-        Task<?> indexBuilder = getIndexBuilderMapRedTask(inputs, outputs, index.getSd().getCols(), false,
+        Task<?> indexBuilder = getIndexBuilderMapRedTask(inputs, outputs, index, false,
             new PartitionDesc(desc, null), indexTbl.getTableName(),
             new PartitionDesc(Utilities.getTableDesc(baseTbl), null),
             baseTbl.getTableName(), indexTbl.getDbName());
@@ -88,7 +89,7 @@ public abstract class TableBasedIndexHan
                 "Partitions of base table and index table are inconsistent.");
           }
           // for each partition, spawn a map reduce task.
-          Task<?> indexBuilder = getIndexBuilderMapRedTask(inputs, outputs, index.getSd().getCols(), true,
+          Task<?> indexBuilder = getIndexBuilderMapRedTask(inputs, outputs, index, true,
               new PartitionDesc(indexPart), indexTbl.getTableName(),
               new PartitionDesc(basePart), baseTbl.getTableName(), indexTbl.getDbName());
           indexBuilderTasks.add(indexBuilder);
@@ -100,10 +101,20 @@ public abstract class TableBasedIndexHan
     }
   }
 
-  abstract protected Task<?> getIndexBuilderMapRedTask(Set<ReadEntity> inputs, Set<WriteEntity> outputs,
+  protected Task<?> getIndexBuilderMapRedTask(Set<ReadEntity> inputs, Set<WriteEntity> outputs,
+      Index index, boolean partitioned,
+      PartitionDesc indexTblPartDesc, String indexTableName,
+      PartitionDesc baseTablePartDesc, String baseTableName, String dbName) throws HiveException {
+    return getIndexBuilderMapRedTask(inputs, outputs, index.getSd().getCols(),
+        partitioned, indexTblPartDesc, indexTableName, baseTablePartDesc, baseTableName, dbName);
+  }
+
+  protected Task<?> getIndexBuilderMapRedTask(Set<ReadEntity> inputs, Set<WriteEntity> outputs,
       List<FieldSchema> indexField, boolean partitioned,
       PartitionDesc indexTblPartDesc, String indexTableName,
-      PartitionDesc baseTablePartDesc, String baseTableName, String dbName) throws HiveException;
+      PartitionDesc baseTablePartDesc, String baseTableName, String dbName) throws HiveException {
+    return null;
+  }
 
   protected void setStatsDir(HiveConf builderConf) {
     String statsDir;

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java?rev=1617633&r1=1617632&r2=1617633&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java Wed Aug 13 00:25:32 2014
@@ -409,6 +409,12 @@ public class Hive {
     }
   }
 
+  public void alterIndex(String baseTableName, String indexName, Index newIdx)
+      throws InvalidOperationException, HiveException {
+    String[] names = Utilities.getDbTableName(baseTableName);
+    alterIndex(names[0], names[1], indexName, newIdx);
+  }
+
   /**
    * Updates the existing index metadata with the new metadata.
    *
@@ -667,17 +673,16 @@ public class Hive {
       throws HiveException {
 
     try {
-      String dbName = SessionState.get().getCurrentDatabase();
       Index old_index = null;
       try {
-        old_index = getIndex(dbName, tableName, indexName);
+        old_index = getIndex(tableName, indexName);
       } catch (Exception e) {
       }
       if (old_index != null) {
-        throw new HiveException("Index " + indexName + " already exists on table " + tableName + ", db=" + dbName);
+        throw new HiveException("Index " + indexName + " already exists on table " + tableName);
       }
 
-      org.apache.hadoop.hive.metastore.api.Table baseTbl = getMSC().getTable(dbName, tableName);
+      org.apache.hadoop.hive.metastore.api.Table baseTbl = getTable(tableName).getTTable();
       if (baseTbl.getTableType() == TableType.VIRTUAL_VIEW.toString()) {
         throw new HiveException("tableName="+ tableName +" is a VIRTUAL VIEW. Index on VIRTUAL VIEW is not supported.");
       }
@@ -686,17 +691,13 @@ public class Hive {
             + " is a TEMPORARY TABLE. Index on TEMPORARY TABLE is not supported.");
       }
 
-      if (indexTblName == null) {
-        indexTblName = MetaStoreUtils.getIndexTableName(dbName, tableName, indexName);
-      } else {
-        org.apache.hadoop.hive.metastore.api.Table temp = null;
-        try {
-          temp = getMSC().getTable(dbName, indexTblName);
-        } catch (Exception e) {
-        }
-        if (temp != null) {
-          throw new HiveException("Table name " + indexTblName + " already exists. Choose another name.");
-        }
+      org.apache.hadoop.hive.metastore.api.Table temp = null;
+      try {
+        temp = getTable(indexTblName).getTTable();
+      } catch (Exception e) {
+      }
+      if (temp != null) {
+        throw new HiveException("Table name " + indexTblName + " already exists. Choose another name.");
       }
 
       org.apache.hadoop.hive.metastore.api.StorageDescriptor storageDescriptor = baseTbl.getSd().deepCopy();
@@ -774,7 +775,9 @@ public class Hive {
       HiveIndexHandler indexHandler = HiveUtils.getIndexHandler(this.getConf(), indexHandlerClass);
 
       if (indexHandler.usesIndexTable()) {
-        tt = new org.apache.hadoop.hive.ql.metadata.Table(dbName, indexTblName).getTTable();
+        String idname = Utilities.getDatabaseName(indexTblName);
+        String itname = Utilities.getTableName(indexTblName);
+        tt = new org.apache.hadoop.hive.ql.metadata.Table(idname, itname).getTTable();
         List<FieldSchema> partKeys = baseTbl.getPartitionKeys();
         tt.setPartitionKeys(partKeys);
         tt.setTableType(TableType.INDEX_TABLE.toString());
@@ -798,7 +801,9 @@ public class Hive {
         throw new RuntimeException("Please specify deferred rebuild using \" WITH DEFERRED REBUILD \".");
       }
 
-      Index indexDesc = new Index(indexName, indexHandlerClass, dbName, tableName, time, time, indexTblName,
+      String tdname = Utilities.getDatabaseName(tableName);
+      String ttname = Utilities.getTableName(tableName);
+      Index indexDesc = new Index(indexName, indexHandlerClass, tdname, ttname, time, time, indexTblName,
           storageDescriptor, params, deferredRebuild);
       if (indexComment != null) {
         indexDesc.getParameters().put("comment", indexComment);
@@ -818,19 +823,6 @@ public class Hive {
     }
   }
 
-  public Index getIndex(String qualifiedIndexName) throws HiveException {
-    String[] names = getQualifiedNames(qualifiedIndexName);
-    switch (names.length) {
-    case 3:
-      return getIndex(names[0], names[1], names[2]);
-    case 2:
-      return getIndex(SessionState.get().getCurrentDatabase(),
-          names[0], names[1]);
-    default:
-      throw new HiveException("Invalid index name:" + qualifiedIndexName);
-    }
-  }
-
   public Index getIndex(String baseTableName, String indexName) throws HiveException {
     String[] names = Utilities.getDbTableName(baseTableName);
     return this.getIndex(names[0], names[1], indexName);
@@ -845,6 +837,11 @@ public class Hive {
     }
   }
 
+  public boolean dropIndex(String baseTableName, String index_name, boolean deleteData) throws HiveException {
+    String[] names = Utilities.getDbTableName(baseTableName);
+    return dropIndex(names[0], names[1], index_name, deleteData);
+  }
+
   public boolean dropIndex(String db_name, String tbl_name, String index_name, boolean deleteData) throws HiveException {
     try {
       return getMSC().dropIndex(db_name, tbl_name, index_name, deleteData);

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java?rev=1617633&r1=1617632&r2=1617633&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java Wed Aug 13 00:25:32 2014
@@ -41,7 +41,6 @@ import org.apache.hadoop.hive.ql.exec.Ta
 import org.apache.hadoop.hive.ql.lib.Node;
 import org.apache.hadoop.hive.ql.lib.NodeProcessor;
 import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.metadata.Partition;
 import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.ql.parse.ParseContext;
@@ -267,15 +266,7 @@ abstract public class AbstractBucketJoin
 
       Table tbl = topToTable.get(tso);
       if (tbl.isPartitioned()) {
-        PrunedPartitionList prunedParts;
-        try {
-          prunedParts = pGraphContext.getPrunedPartitions(alias, tso);
-        } catch (HiveException e) {
-          // Has to use full name to make sure it does not conflict with
-          // org.apache.commons.lang.StringUtils
-          LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
-          throw new SemanticException(e.getMessage(), e);
-        }
+        PrunedPartitionList prunedParts = pGraphContext.getPrunedPartitions(alias, tso);
         List<Partition> partitions = prunedParts.getNotDeniedPartns();
         // construct a mapping of (Partition->bucket file names) and (Partition -> bucket number)
         if (partitions.isEmpty()) {

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractSMBJoinProc.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractSMBJoinProc.java?rev=1617633&r1=1617632&r2=1617633&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractSMBJoinProc.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractSMBJoinProc.java Wed Aug 13 00:25:32 2014
@@ -41,7 +41,6 @@ import org.apache.hadoop.hive.ql.exec.Ta
 import org.apache.hadoop.hive.ql.lib.Node;
 import org.apache.hadoop.hive.ql.lib.NodeProcessor;
 import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.metadata.Partition;
 import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.ql.parse.ParseContext;
@@ -316,13 +315,7 @@ abstract public class AbstractSMBJoinPro
 
     Table tbl = topToTable.get(tso);
     if (tbl.isPartitioned()) {
-      PrunedPartitionList prunedParts;
-      try {
-          prunedParts = pGraphContext.getPrunedPartitions(alias, tso);
-      } catch (HiveException e) {
-        LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
-        throw new SemanticException(e.getMessage(), e);
-      }
+      PrunedPartitionList prunedParts = pGraphContext.getPrunedPartitions(alias, tso);
       List<Partition> partitions = prunedParts.getNotDeniedPartns();
       // Populate the names and order of columns for the first partition of the
       // first table

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java?rev=1617633&r1=1617632&r2=1617633&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java Wed Aug 13 00:25:32 2014
@@ -497,7 +497,8 @@ public class BucketingSortingReduceSinkO
             }
 
             if (srcTable.isPartitioned()) {
-              PrunedPartitionList prunedParts = pGraphContext.getOpToPartList().get(ts);
+              PrunedPartitionList prunedParts =
+                  pGraphContext.getPrunedPartitions(srcTable.getTableName(), ts);
               List<Partition> partitions = prunedParts.getNotDeniedPartns();
 
               // Support for dynamic partitions can be added later

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java?rev=1617633&r1=1617632&r2=1617633&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java Wed Aug 13 00:25:32 2014
@@ -372,52 +372,57 @@ public final class ColumnPrunerProcFacto
       
       cppCtx.getPrunedColLists().put((Operator<? extends OperatorDesc>) nd,
           cols);
-      List<Integer> neededColumnIds = new ArrayList<Integer>();
-      List<String> neededColumnNames = new ArrayList<String>();
-      List<String> referencedColumnNames = new ArrayList<String>();
       RowResolver inputRR = cppCtx.getOpToParseCtxMap().get(scanOp).getRowResolver();
-      TableScanDesc desc = scanOp.getConf();
-      List<VirtualColumn> virtualCols = desc.getVirtualCols();
-      List<VirtualColumn> newVirtualCols = new ArrayList<VirtualColumn>();
-
-      // add virtual columns for ANALYZE TABLE
-      if(scanOp.getConf().isGatherStats()) {
-        cols.add(VirtualColumn.RAWDATASIZE.getName());
-      }
+      setupNeededColumns(scanOp, inputRR, cols);
+      return null;
+    }
+  }
 
-      for (String column : cols) {
-        String[] tabCol = inputRR.reverseLookup(column);
-        if (tabCol == null) {
-          continue;
-        }
-        referencedColumnNames.add(column);
-        ColumnInfo colInfo = inputRR.get(tabCol[0], tabCol[1]);
-        if (colInfo.getIsVirtualCol()) {
-          // part is also a virtual column, but part col should not in this
-          // list.
-          for (int j = 0; j < virtualCols.size(); j++) {
-            VirtualColumn vc = virtualCols.get(j);
-            if (vc.getName().equals(colInfo.getInternalName())) {
-              newVirtualCols.add(vc);
-            }
+  public static void setupNeededColumns(TableScanOperator scanOp, RowResolver inputRR,
+      List<String> cols) throws SemanticException {
+    List<Integer> neededColumnIds = new ArrayList<Integer>();
+    List<String> neededColumnNames = new ArrayList<String>();
+    List<String> referencedColumnNames = new ArrayList<String>();
+    TableScanDesc desc = scanOp.getConf();
+    List<VirtualColumn> virtualCols = desc.getVirtualCols();
+    List<VirtualColumn> newVirtualCols = new ArrayList<VirtualColumn>();
+
+    // add virtual columns for ANALYZE TABLE
+    if(scanOp.getConf().isGatherStats()) {
+      cols.add(VirtualColumn.RAWDATASIZE.getName());
+    }
+
+    for (String column : cols) {
+      String[] tabCol = inputRR.reverseLookup(column);
+      if (tabCol == null) {
+        continue;
+      }
+      referencedColumnNames.add(column);
+      ColumnInfo colInfo = inputRR.get(tabCol[0], tabCol[1]);
+      if (colInfo.getIsVirtualCol()) {
+        // part is also a virtual column, but part col should not in this
+        // list.
+        for (int j = 0; j < virtualCols.size(); j++) {
+          VirtualColumn vc = virtualCols.get(j);
+          if (vc.getName().equals(colInfo.getInternalName())) {
+            newVirtualCols.add(vc);
           }
-          //no need to pass virtual columns to reader.
-          continue;
-        }
-        int position = inputRR.getPosition(column);
-        if (position >= 0) {
-          // get the needed columns by id and name
-          neededColumnIds.add(position);
-          neededColumnNames.add(column);
         }
+        //no need to pass virtual columns to reader.
+        continue;
+      }
+      int position = inputRR.getPosition(column);
+      if (position >= 0) {
+        // get the needed columns by id and name
+        neededColumnIds.add(position);
+        neededColumnNames.add(column);
       }
-
-      desc.setVirtualCols(newVirtualCols);
-      scanOp.setNeededColumnIDs(neededColumnIds);
-      scanOp.setNeededColumns(neededColumnNames);
-      scanOp.setReferencedColumns(referencedColumnNames);
-      return null;
     }
+
+    desc.setVirtualCols(newVirtualCols);
+    scanOp.setNeededColumnIDs(neededColumnIds);
+    scanOp.setNeededColumns(neededColumnNames);
+    scanOp.setReferencedColumns(referencedColumnNames);
   }
 
   /**

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java?rev=1617633&r1=1617632&r2=1617633&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java Wed Aug 13 00:25:32 2014
@@ -77,12 +77,6 @@ public class ConstantPropagate implement
    */
   @Override
   public ParseContext transform(ParseContext pactx) throws SemanticException {
-    if (pactx.getConf().getBoolVar(ConfVars.HIVE_VECTORIZATION_ENABLED)) {
-      // Constant propagate is currently conflict with vectorizer, disabling constant propagate
-      //    if the later is enabled.
-      return pactx;
-    }
-
     pGraphContext = pactx;
     opToParseCtxMap = pGraphContext.getOpParseCtx();
 

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java?rev=1617633&r1=1617632&r2=1617633&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java Wed Aug 13 00:25:32 2014
@@ -46,7 +46,6 @@ import org.apache.hadoop.hive.ql.lib.Nod
 import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
 import org.apache.hadoop.hive.ql.lib.Rule;
 import org.apache.hadoop.hive.ql.lib.RuleRegExp;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.metadata.Partition;
 import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.ql.parse.ParseContext;
@@ -61,7 +60,6 @@ import org.apache.hadoop.hive.ql.plan.Gr
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 import org.apache.hadoop.hive.ql.plan.SelectDesc;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode;
-import org.apache.hadoop.util.StringUtils;
 
 /**
  * This transformation does group by optimization. If the grouping key is a superset
@@ -388,13 +386,8 @@ public class GroupByOptimizer implements
         List<String> bucketCols = table.getBucketCols();
         return matchBucketSortCols(groupByCols, bucketCols, sortCols);
       } else {
-        PrunedPartitionList partsList;
-        try {
-          partsList = pGraphContext.getPrunedPartitions(table.getTableName(), tableScanOp);
-        } catch (HiveException e) {
-          LOG.error(StringUtils.stringifyException(e));
-          throw new SemanticException(e.getMessage(), e);
-        }
+        PrunedPartitionList partsList =
+            pGraphContext.getPrunedPartitions(table.getTableName(), tableScanOp);
 
         List<Partition> notDeniedPartns = partsList.getNotDeniedPartns();
 

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IndexUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IndexUtils.java?rev=1617633&r1=1617632&r2=1617633&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IndexUtils.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IndexUtils.java Wed Aug 13 00:25:32 2014
@@ -23,7 +23,6 @@ import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.LinkedHashMap;
 import java.util.List;
-import java.util.Map;
 import java.util.Set;
 
 import org.apache.commons.logging.Log;
@@ -36,6 +35,7 @@ import org.apache.hadoop.hive.ql.Driver;
 import org.apache.hadoop.hive.ql.exec.TableScanOperator;
 import org.apache.hadoop.hive.ql.exec.Task;
 import org.apache.hadoop.hive.ql.exec.TaskFactory;
+import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.hooks.ReadEntity;
 import org.apache.hadoop.hive.ql.hooks.WriteEntity;
 import org.apache.hadoop.hive.ql.index.IndexMetadataChangeTask;
@@ -57,7 +57,6 @@ import org.apache.hadoop.hive.ql.parse.S
 public final class IndexUtils {
 
   private static final Log LOG = LogFactory.getLog(IndexWhereProcessor.class.getName());
-  private static final Map<Index, Table> indexToIndexTable = new HashMap<Index, Table>();
 
   private IndexUtils(){
   }
@@ -71,9 +70,7 @@ public final class IndexUtils {
    * @throws HiveException
    */
   public static Set<Partition> checkPartitionsCoveredByIndex(TableScanOperator tableScan,
-      ParseContext pctx,
-      Map<Table, List<Index>> indexes)
-    throws HiveException {
+      ParseContext pctx, List<Index> indexes) throws HiveException {
     Hive hive = Hive.get(pctx.getConf());
     // make sure each partition exists on the index table
     PrunedPartitionList queryPartitionList = pctx.getOpToPartList().get(tableScan);
@@ -83,7 +80,6 @@ public final class IndexUtils {
     }
 
     for (Partition part : queryPartitions) {
-      List<Table> sourceIndexTables = getIndexTables(hive, part, indexes);
       if (!containsPartition(hive, part, indexes)) {
         return null; // problem if it doesn't contain the partition
       }
@@ -93,63 +89,24 @@ public final class IndexUtils {
   }
 
   /**
-   * return index tables associated with a given base table
-   */
-  private List<Table> getIndexTables(Hive hive, Table table,
-      Map<Table, List<Index>> indexes) throws
-    HiveException {
-    List<Table> indexTables = new ArrayList<Table>();
-    if (indexes == null || indexes.get(table) == null) {
-      return indexTables;
-    }
-    for (Index index : indexes.get(table)) {
-      Table indexTable = hive.getTable(index.getIndexTableName());
-      indexToIndexTable.put(index, indexTable);
-      indexTables.add(indexTable);
-    }
-    return indexTables;
-  }
-
-  /**
-   * return index tables associated with the base table of the partition
-   */
-  private static List<Table> getIndexTables(Hive hive, Partition part,
-      Map<Table, List<Index>> indexes) throws HiveException {
-    List<Table> indexTables = new ArrayList<Table>();
-    Table partitionedTable = part.getTable();
-    if (indexes == null || indexes.get(partitionedTable) == null) {
-      return indexTables;
-    }
-    for (Index index : indexes.get(partitionedTable)) {
-      Table indexTable = hive.getTable(index.getIndexTableName());
-      indexToIndexTable.put(index, indexTable);
-      indexTables.add(indexTable);
-    }
-    return indexTables;
-  }
-
-  /**
    * check that every index table contains the given partition and is fresh
    */
-  private static boolean containsPartition(Hive hive, Partition part,
-      Map<Table, List<Index>> indexes)
-    throws HiveException {
+  private static boolean containsPartition(Hive hive, Partition part, List<Index> indexes)
+      throws HiveException {
     HashMap<String, String> partSpec = part.getSpec();
-
-    if (indexes == null || indexes.get(part.getTable()) == null) {
-      return false;
-    }
-
     if (partSpec.isEmpty()) {
       // empty specs come from non-partitioned tables
-      return isIndexTableFresh(hive, indexes.get(part.getTable()), part.getTable());
+      return isIndexTableFresh(hive, indexes, part.getTable());
     }
 
-    for (Index index : indexes.get(part.getTable())) {
-      Table indexTable = indexToIndexTable.get(index);
+    for (Index index : indexes) {
+      // index.getDbName() is used as a default database, which is database of target table,
+      // if index.getIndexTableName() does not contain database name
+      String[] qualified = Utilities.getDbTableName(index.getDbName(), index.getIndexTableName());
+      Table indexTable = hive.getTable(qualified[0], qualified[1]);
       // get partitions that match the spec
-      List<Partition> matchingPartitions = hive.getPartitions(indexTable, partSpec);
-      if (matchingPartitions == null || matchingPartitions.size() == 0) {
+      Partition matchingPartition = hive.getPartition(indexTable, partSpec, false);
+      if (matchingPartition == null) {
         LOG.info("Index table " + indexTable + "did not contain built partition that matched " + partSpec);
         return false;
       } else if (!isIndexPartitionFresh(hive, index, part)) {
@@ -160,7 +117,7 @@ public final class IndexUtils {
   }
 
   /**
-   * Check the index partitions on a parttioned table exist and are fresh
+   * Check the index partitions on a partitioned table exist and are fresh
    */
   private static boolean isIndexPartitionFresh(Hive hive, Index index,
       Partition part) throws HiveException {
@@ -187,7 +144,7 @@ public final class IndexUtils {
   }
 
   /**
-   * Check that the indexes on the unpartioned table exist and are fresh
+   * Check that the indexes on the un-partitioned table exist and are fresh
    */
   private static boolean isIndexTableFresh(Hive hive, List<Index> indexes, Table src)
     throws HiveException {
@@ -227,8 +184,8 @@ public final class IndexUtils {
   public static List<Index> getIndexes(Table baseTableMetaData, List<String> matchIndexTypes)
     throws SemanticException {
     List<Index> matchingIndexes = new ArrayList<Index>();
-    List<Index> indexesOnTable = null;
 
+    List<Index> indexesOnTable;
     try {
       indexesOnTable = baseTableMetaData.getAllIndexes((short) -1); // get all indexes
     } catch (HiveException e) {

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java?rev=1617633&r1=1617632&r2=1617633&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java Wed Aug 13 00:25:32 2014
@@ -26,6 +26,7 @@ import java.util.Stack;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
 import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
 import org.apache.hadoop.hive.ql.exec.HashTableDummyOperator;
 import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
@@ -33,21 +34,28 @@ import org.apache.hadoop.hive.ql.exec.Op
 import org.apache.hadoop.hive.ql.exec.OperatorFactory;
 import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
 import org.apache.hadoop.hive.ql.exec.RowSchema;
+import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.lib.Node;
 import org.apache.hadoop.hive.ql.lib.NodeProcessor;
 import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
 import org.apache.hadoop.hive.ql.parse.GenTezProcContext;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.plan.BaseWork;
+import org.apache.hadoop.hive.ql.plan.ColStatistics;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.HashTableDummyDesc;
+import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
+import org.apache.hadoop.hive.ql.plan.OpTraits;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 import org.apache.hadoop.hive.ql.plan.PlanUtils;
 import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
+import org.apache.hadoop.hive.ql.plan.Statistics;
 import org.apache.hadoop.hive.ql.plan.TableDesc;
 import org.apache.hadoop.hive.ql.plan.TezEdgeProperty;
 import org.apache.hadoop.hive.ql.plan.TezEdgeProperty.EdgeType;
 import org.apache.hadoop.hive.ql.plan.TezWork;
+import org.apache.hadoop.hive.ql.stats.StatsUtils;
 
 public class ReduceSinkMapJoinProc implements NodeProcessor {
 
@@ -111,18 +119,59 @@ public class ReduceSinkMapJoinProc imple
     if (pos == -1) {
       throw new SemanticException("Cannot find position of parent in mapjoin");
     }
-    LOG.debug("Mapjoin "+mapJoinOp+", pos: "+pos+" --> "+parentWork.getName());
-    mapJoinOp.getConf().getParentToInput().put(pos, parentWork.getName());
+    MapJoinDesc joinConf = mapJoinOp.getConf();
+    long keyCount = Long.MAX_VALUE, rowCount = Long.MAX_VALUE, bucketCount = 1;
+    Statistics stats = parentRS.getStatistics();
+    if (stats != null) {
+      keyCount = rowCount = stats.getNumRows();
+      if (keyCount <= 0) {
+        keyCount = rowCount = Long.MAX_VALUE;
+      }
+      ArrayList<String> keyCols = parentRS.getConf().getOutputKeyColumnNames();
+      if (keyCols != null && !keyCols.isEmpty()) {
+        // See if we can arrive at a smaller number using distinct stats from key columns.
+        long maxKeyCount = 1;
+        String prefix = Utilities.ReduceField.KEY.toString();
+        for (String keyCol : keyCols) {
+          ExprNodeDesc realCol = parentRS.getColumnExprMap().get(prefix + "." + keyCol);
+          ColStatistics cs = StatsUtils.getColStatisticsFromExpression(null, stats, realCol);
+          if (cs == null || cs.getCountDistint() <= 0) {
+            maxKeyCount = Long.MAX_VALUE;
+            break;
+          }
+          maxKeyCount *= cs.getCountDistint();
+          if (maxKeyCount >= keyCount) {
+            break;
+          }
+        }
+        keyCount = Math.min(maxKeyCount, keyCount);
+      }
+      if (joinConf.isBucketMapJoin()) {
+        OpTraits opTraits = mapJoinOp.getOpTraits();
+        bucketCount = (opTraits == null) ? -1 : opTraits.getNumBuckets();
+        if (bucketCount > 0) {
+          // We cannot obtain a better estimate without CustomPartitionVertex providing it
+          // to us somehow; in which case using statistics would be completely unnecessary.
+          keyCount /= bucketCount;
+        }
+      }
+    }
+    LOG.info("Mapjoin " + mapJoinOp + ", pos: " + pos + " --> " + parentWork.getName() + " ("
+      + keyCount + " keys estimated from " + rowCount + " rows, " + bucketCount + " buckets)");
+    joinConf.getParentToInput().put(pos, parentWork.getName());
+    if (keyCount != Long.MAX_VALUE) {
+      joinConf.getParentKeyCounts().put(pos, keyCount);
+    }
 
     int numBuckets = -1;
     EdgeType edgeType = EdgeType.BROADCAST_EDGE;
-    if (mapJoinOp.getConf().isBucketMapJoin()) {
+    if (joinConf.isBucketMapJoin()) {
 
       // disable auto parallelism for bucket map joins
       parentRS.getConf().setAutoParallel(false);
 
-      numBuckets = (Integer) mapJoinOp.getConf().getBigTableBucketNumMapping().values().toArray()[0];
-      if (mapJoinOp.getConf().getCustomBucketMapJoin()) {
+      numBuckets = (Integer) joinConf.getBigTableBucketNumMapping().values().toArray()[0];
+      if (joinConf.getCustomBucketMapJoin()) {
         edgeType = EdgeType.CUSTOM_EDGE;
       } else {
         edgeType = EdgeType.CUSTOM_SIMPLE_EDGE;

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyCtx.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyCtx.java?rev=1617633&r1=1617632&r2=1617633&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyCtx.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyCtx.java Wed Aug 13 00:25:32 2014
@@ -30,10 +30,9 @@ import java.util.Stack;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.metastore.api.Index;
-import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.exec.FilterOperator;
 import org.apache.hadoop.hive.ql.exec.GroupByOperator;
-import org.apache.hadoop.hive.ql.exec.SelectOperator;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
 import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
 import org.apache.hadoop.hive.ql.lib.Dispatcher;
 import org.apache.hadoop.hive.ql.lib.GraphWalker;
@@ -45,7 +44,6 @@ import org.apache.hadoop.hive.ql.lib.Rul
 import org.apache.hadoop.hive.ql.lib.RuleRegExp;
 import org.apache.hadoop.hive.ql.parse.ParseContext;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
-import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 
 /**
  * RewriteCanApplyCtx class stores the context for the {@link RewriteCanApplyProcFactory}
@@ -84,7 +82,9 @@ public final class RewriteCanApplyCtx im
   private Set<String> aggFuncColList = new LinkedHashSet<String>();
 
   private final ParseContext parseContext;
+  private String alias;
   private String baseTableName;
+  private String indexTableName;
   private String aggFunction;
 
   void resetCanApplyCtx(){
@@ -230,6 +230,14 @@ public final class RewriteCanApplyCtx im
     this.aggFuncCnt = aggFuncCnt;
   }
 
+  public String getAlias() {
+    return alias;
+  }
+
+  public void setAlias(String alias) {
+    this.alias = alias;
+  }
+
   public String getBaseTableName() {
     return baseTableName;
   }
@@ -238,10 +246,26 @@ public final class RewriteCanApplyCtx im
     this.baseTableName = baseTableName;
   }
 
+  public String getIndexTableName() {
+    return indexTableName;
+  }
+
+  public void setIndexTableName(String indexTableName) {
+    this.indexTableName = indexTableName;
+  }
+
   public  ParseContext getParseContext() {
     return parseContext;
   }
 
+  public Set<String> getAllColumns() {
+    Set<String> allColumns = new LinkedHashSet<String>(selectColumnsList);
+    allColumns.addAll(predicateColumnsList);
+    allColumns.addAll(gbKeyNameList);
+    allColumns.addAll(aggFuncColList);
+    return allColumns;
+  }
+
 
   /**
    * This method walks all the nodes starting from topOp TableScanOperator node
@@ -255,15 +279,13 @@ public final class RewriteCanApplyCtx im
    * @param topOp
    * @throws SemanticException
    */
-  void populateRewriteVars(Operator<? extends OperatorDesc> topOp)
+  void populateRewriteVars(TableScanOperator topOp)
     throws SemanticException{
     Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
     opRules.put(new RuleRegExp("R1", FilterOperator.getOperatorName() + "%"),
-        RewriteCanApplyProcFactory.canApplyOnFilterOperator());
+        RewriteCanApplyProcFactory.canApplyOnFilterOperator(topOp));
     opRules.put(new RuleRegExp("R2", GroupByOperator.getOperatorName() + "%"),
-        RewriteCanApplyProcFactory.canApplyOnGroupByOperator());
-    opRules.put(new RuleRegExp("R3", SelectOperator.getOperatorName() + "%"),
-        RewriteCanApplyProcFactory.canApplyOnSelectOperator());
+        RewriteCanApplyProcFactory.canApplyOnGroupByOperator(topOp));
 
     // The dispatcher fires the processor corresponding to the closest matching
     // rule and passes the context along
@@ -364,5 +386,4 @@ public final class RewriteCanApplyCtx im
     }
     return true;
   }
-
 }

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyProcFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyProcFactory.java?rev=1617633&r1=1617632&r2=1617633&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyProcFactory.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteCanApplyProcFactory.java Wed Aug 13 00:25:32 2014
@@ -18,19 +18,9 @@
 
 package org.apache.hadoop.hive.ql.optimizer.index;
 
-import java.util.Iterator;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Stack;
-
-import org.apache.hadoop.hive.ql.exec.ColumnInfo;
-import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
 import org.apache.hadoop.hive.ql.exec.FilterOperator;
 import org.apache.hadoop.hive.ql.exec.GroupByOperator;
-import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.exec.RowSchema;
-import org.apache.hadoop.hive.ql.exec.SelectOperator;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
 import org.apache.hadoop.hive.ql.lib.Node;
 import org.apache.hadoop.hive.ql.lib.NodeProcessor;
 import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
@@ -39,10 +29,13 @@ import org.apache.hadoop.hive.ql.plan.Ag
 import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
 import org.apache.hadoop.hive.ql.plan.FilterDesc;
 import org.apache.hadoop.hive.ql.plan.GroupByDesc;
-import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+
+import java.util.List;
+import java.util.Stack;
 
 /**
  * Factory of methods used by {@link RewriteGBUsingIndex}
@@ -50,43 +43,46 @@ import org.apache.hadoop.hive.ql.plan.Op
  *
  */
 public final class RewriteCanApplyProcFactory {
-  private static RewriteCanApplyCtx canApplyCtx = null;
-
-  private RewriteCanApplyProcFactory(){
-    //this prevents the class from getting instantiated
-  }
 
   /**
    * Check for conditions in FilterOperator that do not meet rewrite criteria.
    */
   private static class CheckFilterProc implements NodeProcessor {
+
+    private TableScanOperator topOp;
+
+    public CheckFilterProc(TableScanOperator topOp) {
+      this.topOp = topOp;
+    }
+
     public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
         Object... nodeOutputs) throws SemanticException {
       FilterOperator operator = (FilterOperator)nd;
-      canApplyCtx = (RewriteCanApplyCtx)ctx;
-      FilterDesc conf = (FilterDesc)operator.getConf();
+      RewriteCanApplyCtx canApplyCtx = (RewriteCanApplyCtx)ctx;
+      FilterDesc conf = operator.getConf();
       //The filter operator should have a predicate of ExprNodeGenericFuncDesc type.
       //This represents the comparison operator
-      ExprNodeGenericFuncDesc oldengfd = (ExprNodeGenericFuncDesc) conf.getPredicate();
+      ExprNodeDesc oldengfd = conf.getPredicate();
       if(oldengfd == null){
         canApplyCtx.setWhrClauseColsFetchException(true);
+        return null;
       }
-      //The predicate should have valid left and right columns
-      List<String> colList = oldengfd.getCols();
-      if(colList == null || colList.size() == 0){
+      ExprNodeDesc backtrack = ExprNodeDescUtils.backtrack(oldengfd, operator, topOp);
+      if (backtrack == null) {
         canApplyCtx.setWhrClauseColsFetchException(true);
+        return null;
       }
       //Add the predicate columns to RewriteCanApplyCtx's predColRefs list to check later
       //if index keys contain all filter predicate columns and vice-a-versa
-      for (String col : colList) {
+      for (String col : backtrack.getCols()) {
         canApplyCtx.getPredicateColumnsList().add(col);
       }
       return null;
     }
   }
 
- public static CheckFilterProc canApplyOnFilterOperator() {
-    return new CheckFilterProc();
+ public static CheckFilterProc canApplyOnFilterOperator(TableScanOperator topOp) {
+    return new CheckFilterProc(topOp);
   }
 
    /**
@@ -95,10 +91,16 @@ public final class RewriteCanApplyProcFa
    */
   private static class CheckGroupByProc implements NodeProcessor {
 
+     private TableScanOperator topOp;
+
+     public CheckGroupByProc(TableScanOperator topOp) {
+       this.topOp = topOp;
+     }
+
      public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
          Object... nodeOutputs) throws SemanticException {
        GroupByOperator operator = (GroupByOperator)nd;
-       canApplyCtx = (RewriteCanApplyCtx)ctx;
+       RewriteCanApplyCtx canApplyCtx = (RewriteCanApplyCtx)ctx;
        //for each group-by clause in query, only one GroupByOperator of the
        //GBY-RS-GBY sequence is stored in  getGroupOpToInputTables
        //we need to process only this operator
@@ -107,7 +109,7 @@ public final class RewriteCanApplyProcFa
            !canApplyCtx.isQueryHasGroupBy()){
 
          canApplyCtx.setQueryHasGroupBy(true);
-         GroupByDesc conf = (GroupByDesc) operator.getConf();
+         GroupByDesc conf = operator.getConf();
          List<AggregationDesc> aggrList = conf.getAggregators();
          if(aggrList != null && aggrList.size() > 0){
              for (AggregationDesc aggregationDesc : aggrList) {
@@ -119,40 +121,39 @@ public final class RewriteCanApplyProcFa
                String aggFunc = aggregationDesc.getGenericUDAFName();
                if(!("count".equals(aggFunc))){
                  canApplyCtx.setAggFuncIsNotCount(true);
-               }else{
-                List<ExprNodeDesc> para = aggregationDesc.getParameters();
-                //for a valid aggregation, it needs to have non-null parameter list
-                 if(para == null){
-                   canApplyCtx.setAggFuncColsFetchException(true);
-                 }else if(para.size() == 0){
-                   //count(*) case
-                   canApplyCtx.setCountOnAllCols(true);
-                   canApplyCtx.setAggFunction("_count_of_all");
-                 }else{
-                   assert para.size()==1;
-                   for(int i=0; i< para.size(); i++){
-                     ExprNodeDesc expr = para.get(i);
-                     if(expr instanceof ExprNodeColumnDesc){
-                       //Add the columns to RewriteCanApplyCtx's selectColumnsList list
-                       //to check later if index keys contain all select clause columns
-                       //and vice-a-versa. We get the select column 'actual' names only here
-                       //if we have a agg func along with group-by
-                       //SelectOperator has internal names in its colList data structure
-                       canApplyCtx.getSelectColumnsList().add(
-                           ((ExprNodeColumnDesc) expr).getColumn());
-                       //Add the columns to RewriteCanApplyCtx's aggFuncColList list to check later
-                       //if columns contained in agg func are index key columns
-                       canApplyCtx.getAggFuncColList().add(
-                           ((ExprNodeColumnDesc) expr).getColumn());
-                       canApplyCtx.setAggFunction("_count_of_" +
-                           ((ExprNodeColumnDesc) expr).getColumn() + "");
-                     }else if(expr instanceof ExprNodeConstantDesc){
-                       //count(1) case
-                       canApplyCtx.setCountOfOne(true);
-                       canApplyCtx.setAggFunction("_count_of_1");
-                     }
-                   }
+                 return false;
+               }
+               List<ExprNodeDesc> para = aggregationDesc.getParameters();
+               //for a valid aggregation, it needs to have non-null parameter list
+               if (para == null) {
+                 canApplyCtx.setAggFuncColsFetchException(true);
+               } else if (para.size() == 0) {
+                 //count(*) case
+                 canApplyCtx.setCountOnAllCols(true);
+                 canApplyCtx.setAggFunction("_count_of_all");
+               } else if (para.size() == 1) {
+                 ExprNodeDesc expr = ExprNodeDescUtils.backtrack(para.get(0), operator, topOp);
+                 if (expr instanceof ExprNodeColumnDesc){
+                   //Add the columns to RewriteCanApplyCtx's selectColumnsList list
+                   //to check later if index keys contain all select clause columns
+                   //and vice-a-versa. We get the select column 'actual' names only here
+                   //if we have a agg func along with group-by
+                   //SelectOperator has internal names in its colList data structure
+                   canApplyCtx.getSelectColumnsList().add(
+                       ((ExprNodeColumnDesc) expr).getColumn());
+                   //Add the columns to RewriteCanApplyCtx's aggFuncColList list to check later
+                   //if columns contained in agg func are index key columns
+                   canApplyCtx.getAggFuncColList().add(
+                       ((ExprNodeColumnDesc) expr).getColumn());
+                   canApplyCtx.setAggFunction("_count_of_" +
+                       ((ExprNodeColumnDesc) expr).getColumn() + "");
+                 } else if(expr instanceof ExprNodeConstantDesc) {
+                   //count(1) case
+                   canApplyCtx.setCountOfOne(true);
+                   canApplyCtx.setAggFunction("_count_of_1");
                  }
+               } else {
+                 throw new SemanticException("Invalid number of arguments for count");
                }
              }
          }
@@ -163,13 +164,13 @@ public final class RewriteCanApplyProcFa
            canApplyCtx.setGbyKeysFetchException(true);
          }
          for (ExprNodeDesc expr : keyList) {
-           checkExpression(expr);
+           checkExpression(canApplyCtx, expr);
          }
        }
        return null;
      }
 
-     private void checkExpression(ExprNodeDesc expr){
+     private void checkExpression(RewriteCanApplyCtx canApplyCtx, ExprNodeDesc expr){
        if(expr instanceof ExprNodeColumnDesc){
          //Add the group-by keys to RewriteCanApplyCtx's gbKeyNameList list to check later
          //if all keys are from index columns
@@ -182,59 +183,14 @@ public final class RewriteCanApplyProcFa
              canApplyCtx.getGbKeyNameList().addAll(expr.getCols());
              canApplyCtx.getSelectColumnsList().add(((ExprNodeColumnDesc) childExpr).getColumn());
            }else if(childExpr instanceof ExprNodeGenericFuncDesc){
-             checkExpression(childExpr);
+             checkExpression(canApplyCtx, childExpr);
            }
          }
        }
      }
    }
 
-
-   public static CheckGroupByProc canApplyOnGroupByOperator() {
-     return new CheckGroupByProc();
+   public static CheckGroupByProc canApplyOnGroupByOperator(TableScanOperator topOp) {
+     return new CheckGroupByProc(topOp);
    }
-
-
- /**
-   * Check for conditions in SelectOperator that do not meet rewrite criteria.
-   */
-  private static class CheckSelectProc implements NodeProcessor {
-     public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx ctx,
-         Object... nodeOutputs) throws SemanticException {
-       SelectOperator operator = (SelectOperator)nd;
-       canApplyCtx = (RewriteCanApplyCtx)ctx;
-
-       List<Operator<? extends OperatorDesc>> childrenList = operator.getChildOperators();
-       Operator<? extends OperatorDesc> child = childrenList.get(0);
-       if(child instanceof FileSinkOperator){
-         Map<String, String> internalToAlias = new LinkedHashMap<String, String>();
-         RowSchema rs = operator.getSchema();
-         //to get the internal to alias mapping
-         List<ColumnInfo> sign = rs.getSignature();
-         for (ColumnInfo columnInfo : sign) {
-           internalToAlias.put(columnInfo.getInternalName(), columnInfo.getAlias());
-         }
-
-         //if FilterOperator predicate has internal column names,
-         //we need to retrieve the 'actual' column names to
-         //check if index keys contain all filter predicate columns and vice-a-versa
-         Iterator<String> predItr = canApplyCtx.getPredicateColumnsList().iterator();
-         while(predItr.hasNext()){
-           String predCol = predItr.next();
-           String newPredCol = "";
-           if(internalToAlias.get(predCol) != null){
-             newPredCol = internalToAlias.get(predCol);
-             canApplyCtx.getPredicateColumnsList().remove(predCol);
-             canApplyCtx.getPredicateColumnsList().add(newPredCol);
-           }
-         }
-       }
-       return null;
-     }
-   }
-
-   public static CheckSelectProc canApplyOnSelectOperator() {
-     return new CheckSelectProc();
-   }
-
 }