You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2016/12/15 18:36:09 UTC
[4/4] hive git commit: HIVE-15409: Add support for GROUPING function with grouping sets (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

HIVE-15409: Add support for GROUPING function with grouping sets (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/89362a14
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/89362a14
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/89362a14

Branch: refs/heads/master
Commit: 89362a14d2124e439d4d254eab3bae504d90199e
Parents: cb7918c
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Wed Dec 14 11:05:23 2016 +0000
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Thu Dec 15 18:35:34 2016 +0000

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/ql/ErrorMsg.java     |   1 +
 .../hadoop/hive/ql/exec/FunctionRegistry.java   |   2 +
 .../hadoop/hive/ql/exec/GroupByOperator.java    |  11 +-
 .../hadoop/hive/ql/parse/CalcitePlanner.java    |  32 +-
 .../hadoop/hive/ql/parse/IdentifiersParser.g    |   2 +-
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  | 100 ++-
 .../hive/ql/udf/generic/GenericUDFGrouping.java |  91 +++
 .../groupby_grouping_sets_grouping.q            |  89 +++
 .../test/queries/clientpositive/perf/query22.q  |  22 +-
 .../test/queries/clientpositive/perf/query27.q  |  23 +-
 .../test/queries/clientpositive/perf/query36.q  |  29 +
 .../test/queries/clientpositive/perf/query67.q  |  14 +-
 .../test/queries/clientpositive/perf/query70.q  |  39 +-
 .../test/queries/clientpositive/perf/query86.q  |  25 +
 .../clientpositive/annotate_stats_groupby.q.out | 192 ++---
 .../annotate_stats_groupby2.q.out               |  40 +-
 .../results/clientpositive/groupby_cube1.q.out  |  74 +-
 .../clientpositive/groupby_cube_multi_gby.q.out |  18 +-
 .../clientpositive/groupby_grouping_sets2.q.out |  32 +-
 .../clientpositive/groupby_grouping_sets3.q.out |  24 +-
 .../clientpositive/groupby_grouping_sets4.q.out |  48 +-
 .../clientpositive/groupby_grouping_sets5.q.out |  24 +-
 .../clientpositive/groupby_grouping_sets6.q.out |  16 +-
 .../groupby_grouping_sets_grouping.q.out        | 765 +++++++++++++++++++
 .../groupby_grouping_sets_limit.q.out           |  48 +-
 .../groupby_grouping_window.q.out               |   8 +-
 .../clientpositive/groupby_rollup1.q.out        |  56 +-
 .../infer_bucket_sort_grouping_operators.q.out  |  24 +-
 .../clientpositive/limit_pushdown2.q.out        |  16 +-
 .../llap/vector_grouping_sets.q.out             |  28 +-
 .../results/clientpositive/perf/query18.q.out   |   2 +-
 .../results/clientpositive/perf/query22.q.out   |  46 +-
 .../results/clientpositive/perf/query27.q.out   | 198 +++--
 .../results/clientpositive/perf/query36.q.out   | 149 ++++
 .../results/clientpositive/perf/query67.q.out   |  28 +-
 .../results/clientpositive/perf/query70.q.out   |  88 ++-
 .../results/clientpositive/perf/query80.q.out   |   6 +-
 .../results/clientpositive/perf/query86.q.out   | 126 +++
 .../results/clientpositive/show_functions.q.out |   1 +
 .../clientpositive/spark/groupby_cube1.q.out    |  74 +-
 .../clientpositive/spark/groupby_rollup1.q.out  |  56 +-
 .../tez/multi_count_distinct.q.out              |   4 +-
 .../clientpositive/vector_grouping_sets.q.out   |  28 +-
 43 files changed, 2158 insertions(+), 541 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/89362a14/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
index 306c57f..038dd3f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
@@ -461,6 +461,7 @@ public enum ErrorMsg {
   MERGE_TOO_MANY_UPDATE(10406, "MERGE statment can have at most 1 WHEN MATCHED ... UPDATE clause: <{0}>", true),
   INVALID_JOIN_CONDITION(10407, "Error parsing condition in outer join"),
   INVALID_TARGET_COLUMN_IN_SET_CLAUSE(10408, "Target column \"{0}\" of set clause is not found in table \"{1}\".", true),
+  HIVE_GROUPING_FUNCTION_EXPR_NOT_IN_GROUPBY(10409, "Expression in GROUPING function not present in GROUP BY"),
   //========================== 20000 range starts here ========================//
   SCRIPT_INIT_ERROR(20000, "Unable to initialize custom script."),
   SCRIPT_IO_ERROR(20001, "An error occurred while reading or writing to your custom script. "

http://git-wip-us.apache.org/repos/asf/hive/blob/89362a14/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
index 87330ed..6f01da0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
@@ -342,6 +342,8 @@ public final class FunctionRegistry {
     system.registerUDF("shiftright", UDFOPBitShiftRight.class, true);
     system.registerUDF("shiftrightunsigned", UDFOPBitShiftRightUnsigned.class, true);
 
+    system.registerGenericUDF("grouping", GenericUDFGrouping.class);
+
     system.registerGenericUDF("current_database", UDFCurrentDB.class);
     system.registerGenericUDF("current_date", GenericUDFCurrentDate.class);
     system.registerGenericUDF("current_timestamp", GenericUDFCurrentTimestamp.class);

http://git-wip-us.apache.org/repos/asf/hive/blob/89362a14/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
index f28d33e..cddf14f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
@@ -31,8 +31,6 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
-import javolution.util.FastBitSet;
-
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.CompilationOpContext;
@@ -64,8 +62,11 @@ import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
 import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.Text;
 
+import javolution.util.FastBitSet;
+
 /**
  * GroupBy operator implementation.
  */
@@ -127,7 +128,7 @@ public class GroupByOperator extends Operator<GroupByDesc> {
   private transient int groupingSetsPosition;         // position of grouping set, generally the last of keys
   private transient List<Integer> groupingSets;       // declared grouping set values
   private transient FastBitSet[] groupingSetsBitSet;  // bitsets acquired from grouping set values
-  private transient Text[] newKeysGroupingSets;
+  private transient IntWritable[] newKeysGroupingSets;
 
   // for these positions, some variable primitive type (String) is used, so size
   // cannot be estimated. sample it at runtime.
@@ -218,13 +219,13 @@ public class GroupByOperator extends Operator<GroupByDesc> {
     if (groupingSetsPresent) {
       groupingSets = conf.getListGroupingSets();
       groupingSetsPosition = conf.getGroupingSetPosition();
-      newKeysGroupingSets = new Text[groupingSets.size()];
+      newKeysGroupingSets = new IntWritable[groupingSets.size()];
       groupingSetsBitSet = new FastBitSet[groupingSets.size()];
 
       int pos = 0;
       for (Integer groupingSet: groupingSets) {
         // Create the mapping corresponding to the grouping set
-        newKeysGroupingSets[pos] = new Text(String.valueOf(groupingSet));
+        newKeysGroupingSets[pos] = new IntWritable(groupingSet);
         groupingSetsBitSet[pos] = groupingSet2BitSet(groupingSet);
         pos++;
       }

http://git-wip-us.apache.org/repos/asf/hive/blob/89362a14/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index f1f3bf9..16df496 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -81,7 +81,6 @@ import org.apache.calcite.rel.rules.ProjectRemoveRule;
 import org.apache.calcite.rel.rules.SemiJoinFilterTransposeRule;
 import org.apache.calcite.rel.rules.SemiJoinJoinTransposeRule;
 import org.apache.calcite.rel.rules.SemiJoinProjectTransposeRule;
-import org.apache.calcite.rel.rules.SemiJoinRule;
 import org.apache.calcite.rel.rules.UnionMergeRule;
 import org.apache.calcite.rel.type.RelDataType;
 import org.apache.calcite.rel.type.RelDataTypeFactory;
@@ -109,6 +108,7 @@ import org.apache.calcite.util.CompositeList;
 import org.apache.calcite.util.ImmutableBitSet;
 import org.apache.calcite.util.ImmutableIntList;
 import org.apache.calcite.util.Pair;
+import org.apache.commons.lang.mutable.MutableBoolean;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.conf.Constants;
 import org.apache.hadoop.hive.conf.HiveConf;
@@ -240,6 +240,7 @@ import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableList.Builder;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Lists;
+import com.google.common.math.IntMath;
 
 public class CalcitePlanner extends SemanticAnalyzer {
 
@@ -2964,7 +2965,16 @@ public class CalcitePlanner extends SemanticAnalyzer {
 
       // 4. Walk through Window Expressions & Construct RexNodes for those,
       // Update out_rwsch
+      final QBParseInfo qbp = getQBParseInfo(qb);
+      final String selClauseName = qbp.getClauseNames().iterator().next();
+      final boolean cubeRollupGrpSetPresent = (!qbp.getDestRollups().isEmpty()
+              || !qbp.getDestGroupingSets().isEmpty() || !qbp.getDestCubes().isEmpty());
       for (WindowExpressionSpec wExprSpec : windowExpressions) {
+        if (cubeRollupGrpSetPresent) {
+          // Special handling of grouping function
+          wExprSpec.setExpression(rewriteGroupingFunctionAST(
+                  getGroupByForClause(qbp, selClauseName), wExprSpec.getExpression()));
+        }
         if (out_rwsch.getExpression(wExprSpec.getExpression()) == null) {
           Pair<RexNode, TypeInfo> wtp = genWindowingProj(qb, wExprSpec, srcRel);
           projsForWindowSelOp.add(wtp.getKey());
@@ -3067,6 +3077,9 @@ public class CalcitePlanner extends SemanticAnalyzer {
       String selClauseName = qbp.getClauseNames().iterator().next();
       ASTNode selExprList = qbp.getSelForClause(selClauseName);
 
+      final boolean cubeRollupGrpSetPresent = (!qbp.getDestRollups().isEmpty()
+              || !qbp.getDestGroupingSets().isEmpty() || !qbp.getDestCubes().isEmpty());
+
       // 2.Row resolvers for input, output
       RowResolver out_rwsch = new RowResolver();
       Integer pos = Integer.valueOf(0);
@@ -3238,6 +3251,10 @@ public class CalcitePlanner extends SemanticAnalyzer {
           TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR);
           // We allow stateful functions in the SELECT list (but nowhere else)
           tcCtx.setAllowStatefulFunctions(true);
+          if (cubeRollupGrpSetPresent) {
+            // Special handling of grouping function
+            expr = rewriteGroupingFunctionAST(getGroupByForClause(qbp, selClauseName), expr);
+          }
           ExprNodeDesc exp = genExprNodeDesc(expr, inputRR, tcCtx);
           String recommended = recommendName(exp, colAlias);
           if (recommended != null && out_rwsch.get(null, recommended) == null) {
@@ -3604,6 +3621,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
         throws SemanticException {
       RelNode gbFilter = null;
       QBParseInfo qbp = getQBParseInfo(qb);
+      String destClauseName = qbp.getClauseNames().iterator().next();
       ASTNode havingClause = qbp.getHavingForClause(qbp.getClauseNames().iterator().next());
 
       if (havingClause != null) {
@@ -3612,9 +3630,15 @@ public class CalcitePlanner extends SemanticAnalyzer {
           throw new CalciteSemanticException("Having clause without any group-by.",
               UnsupportedFeature.Having_clause_without_any_groupby);
         }
-        validateNoHavingReferenceToAlias(qb, (ASTNode) havingClause.getChild(0));
-        gbFilter = genFilterRelNode(qb, (ASTNode) havingClause.getChild(0), srcRel, aliasToRel,
-            true);
+        ASTNode targetNode = (ASTNode) havingClause.getChild(0);
+        validateNoHavingReferenceToAlias(qb, targetNode);
+        final boolean cubeRollupGrpSetPresent = (!qbp.getDestRollups().isEmpty()
+                || !qbp.getDestGroupingSets().isEmpty() || !qbp.getDestCubes().isEmpty());
+        if (cubeRollupGrpSetPresent) {
+          // Special handling of grouping function
+          targetNode = rewriteGroupingFunctionAST(getGroupByForClause(qbp, destClauseName), targetNode);
+        }
+        gbFilter = genFilterRelNode(qb, targetNode, srcRel, aliasToRel, true);
       }
 
       return gbFilter;

http://git-wip-us.apache.org/repos/asf/hive/blob/89362a14/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
index d50d839..cd9adfc 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
@@ -768,5 +768,5 @@ nonReserved
 //The following SQL2011 reserved keywords are used as function name only, but not as identifiers.
 sql11ReservedKeywordsUsedAsFunctionName
     :
-    KW_BIGINT | KW_BINARY | KW_BOOLEAN | KW_CURRENT_DATE | KW_CURRENT_TIMESTAMP | KW_DATE | KW_DOUBLE | KW_FLOAT | KW_INT | KW_SMALLINT | KW_TIMESTAMP
+    KW_BIGINT | KW_BINARY | KW_BOOLEAN | KW_CURRENT_DATE | KW_CURRENT_TIMESTAMP | KW_DATE | KW_DOUBLE | KW_FLOAT | KW_GROUPING | KW_INT | KW_SMALLINT | KW_TIMESTAMP
     ;

http://git-wip-us.apache.org/repos/asf/hive/blob/89362a14/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 79e55b2..def1a7d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -47,9 +47,12 @@ import org.antlr.runtime.ClassicToken;
 import org.antlr.runtime.CommonToken;
 import org.antlr.runtime.Token;
 import org.antlr.runtime.tree.Tree;
+import org.antlr.runtime.tree.TreeVisitor;
+import org.antlr.runtime.tree.TreeVisitorAction;
 import org.antlr.runtime.tree.TreeWizard;
 import org.antlr.runtime.tree.TreeWizard.ContextVisitor;
 import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang.mutable.MutableBoolean;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
@@ -235,6 +238,7 @@ import org.apache.hadoop.mapred.OutputFormat;
 import org.apache.hadoop.security.UserGroupInformation;
 
 import com.google.common.collect.Sets;
+import com.google.common.math.IntMath;
 
 /**
  * Implementation of the semantic analyzer. It generates the query plan.
@@ -2992,6 +2996,18 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
     }
     ASTNode condn = (ASTNode) havingExpr.getChild(0);
 
+    if (!isCBOExecuted()) {
+      // If CBO did not optimize the query, we might need to replace grouping function
+      final String destClauseName = qb.getParseInfo().getClauseNames().iterator().next();
+      final boolean cubeRollupGrpSetPresent = (!qb.getParseInfo().getDestRollups().isEmpty()
+              || !qb.getParseInfo().getDestGroupingSets().isEmpty()
+              || !qb.getParseInfo().getDestCubes().isEmpty());
+      if (cubeRollupGrpSetPresent) {
+        // Special handling of grouping function
+        condn = rewriteGroupingFunctionAST(getGroupByForClause(qb.getParseInfo(), destClauseName), condn);
+      }
+    }
+
     /*
      * Now a having clause can contain a SubQuery predicate;
      * so we invoke genFilterPlan to handle SubQuery algebraic transformation,
@@ -3002,6 +3018,52 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
     return output;
   }
 
+  protected static ASTNode rewriteGroupingFunctionAST(final List<ASTNode> grpByAstExprs, ASTNode targetNode) throws SemanticException {
+    final MutableBoolean visited = new MutableBoolean(false);
+    final MutableBoolean found = new MutableBoolean(false);
+
+    TreeVisitorAction action = new TreeVisitorAction() {
+
+      @Override
+      public Object pre(Object t) {
+        return t;
+      }
+
+      @Override
+      public Object post(Object t) {
+        ASTNode root = (ASTNode) t;
+        if (root.getType() == HiveParser.TOK_FUNCTION && root.getChildCount() == 2) {
+          ASTNode func = (ASTNode) ParseDriver.adaptor.getChild(root, 0);
+          if (func.getText().equals("grouping")) {
+            ASTNode c = (ASTNode) ParseDriver.adaptor.getChild(root, 1);
+            visited.setValue(true);
+            for (int i = 0; i < grpByAstExprs.size(); i++) {
+              ASTNode grpByExpr = grpByAstExprs.get(i);
+              if (grpByExpr.toStringTree().equals(c.toStringTree())) {
+                ASTNode child1 = (ASTNode) ParseDriver.adaptor.create(
+                        HiveParser.TOK_TABLE_OR_COL, "TOK_TABLE_OR_COL");
+                ParseDriver.adaptor.addChild(child1, ParseDriver.adaptor.create(
+                        HiveParser.Identifier, VirtualColumn.GROUPINGID.getName()));
+                ASTNode child2 = (ASTNode) ParseDriver.adaptor.create(HiveParser.IntegralLiteral,
+                        String.valueOf(IntMath.mod(-i, grpByAstExprs.size())));
+                root.setChild(1, child1);
+                root.addChild(child2);
+                found.setValue(true);
+                break;
+              }
+            }
+          }
+        }
+        return t;
+      }
+    };
+    ASTNode newTargetNode = (ASTNode) new TreeVisitor(ParseDriver.adaptor).visit(targetNode, action);
+    if (visited.booleanValue() && !found.booleanValue()) {
+      throw new SemanticException(ErrorMsg.HIVE_GROUPING_FUNCTION_EXPR_NOT_IN_GROUPBY.getMsg());
+    }
+    return newTargetNode;
+  }
+
   private Operator genPlanForSubQueryPredicate(
       QB qbSQ,
       ISubQueryJoinInfo subQueryPredicate) throws SemanticException {
@@ -4101,6 +4163,9 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
       startPosn = 0;
     }
 
+    final boolean cubeRollupGrpSetPresent = (!qb.getParseInfo().getDestRollups().isEmpty()
+            || !qb.getParseInfo().getDestGroupingSets().isEmpty()
+            || !qb.getParseInfo().getDestCubes().isEmpty());
     Set<String> colAliases = new HashSet<String>();
     ASTNode[] exprs = new ASTNode[exprList.getChildCount()];
     String[][] aliases = new String[exprList.getChildCount()][];
@@ -4186,6 +4251,11 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
         // We allow stateful functions in the SELECT list (but nowhere else)
         tcCtx.setAllowStatefulFunctions(true);
         tcCtx.setAllowDistinctFunctions(false);
+        if (!isCBOExecuted() && cubeRollupGrpSetPresent) {
+          // If CBO did not optimize the query, we might need to replace grouping function
+          // Special handling of grouping function
+          expr = rewriteGroupingFunctionAST(getGroupByForClause(qb.getParseInfo(), dest), expr);
+        }
         ExprNodeDesc exp = genExprNodeDesc(expr, inputRR, tcCtx);
         String recommended = recommendName(exp, colAlias);
         if (recommended != null && !colAliases.contains(recommended) &&
@@ -4684,7 +4754,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
     // For grouping sets, add a dummy grouping key
     String groupingSetColumnName =
         groupByInputRowResolver.get(null, VirtualColumn.GROUPINGID.getName()).getInternalName();
-    ExprNodeDesc inputExpr = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo,
+    ExprNodeDesc inputExpr = new ExprNodeColumnDesc(TypeInfoFactory.intTypeInfo,
         groupingSetColumnName, null, false);
     groupByKeys.add(inputExpr);
 
@@ -4693,7 +4763,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
     groupByOutputRowResolver.put(null, VirtualColumn.GROUPINGID.getName(),
         new ColumnInfo(
             field,
-            TypeInfoFactory.stringTypeInfo,
+            TypeInfoFactory.intTypeInfo,
             null,
             true));
     colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1));
@@ -4715,7 +4785,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
     // add a key for reduce sink
     String groupingSetColumnName =
         reduceSinkInputRowResolver.get(null, VirtualColumn.GROUPINGID.getName()).getInternalName();
-    ExprNodeDesc inputExpr = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo,
+    ExprNodeDesc inputExpr = new ExprNodeColumnDesc(TypeInfoFactory.intTypeInfo,
         groupingSetColumnName, null, false);
     reduceKeys.add(inputExpr);
 
@@ -4947,14 +5017,14 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
       Map<String, ExprNodeDesc> colExprMap) {
     // The value for the constant does not matter. It is replaced by the grouping set
     // value for the actual implementation
-    ExprNodeConstantDesc constant = new ExprNodeConstantDesc("0");
+    ExprNodeConstantDesc constant = new ExprNodeConstantDesc(0);
     groupByKeys.add(constant);
     String field = getColumnInternalName(groupByKeys.size() - 1);
     outputColumnNames.add(field);
     groupByOutputRowResolver.put(null, VirtualColumn.GROUPINGID.getName(),
         new ColumnInfo(
             field,
-            TypeInfoFactory.stringTypeInfo,
+            TypeInfoFactory.intTypeInfo,
             null,
             true));
     colExprMap.put(field, constant);
@@ -9471,7 +9541,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
     }
 
     if(queryProperties.hasWindowing() && qb.getWindowingSpec(dest) != null) {
-      curr = genWindowingPlan(qb.getWindowingSpec(dest), curr);
+      curr = genWindowingPlan(qb, qb.getWindowingSpec(dest), curr);
       // GBy for DISTINCT after windowing
       if ((qbp.getAggregationExprsForClause(dest).size() != 0
               || getGroupByForClause(qbp, dest).size() > 0)
@@ -12972,8 +13042,24 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
 
 //--------------------------- Windowing handling: PTFInvocationSpec to PTFDesc --------------------
 
-  Operator genWindowingPlan(WindowingSpec wSpec, Operator input) throws SemanticException {
+  Operator genWindowingPlan(QB qb, WindowingSpec wSpec, Operator input) throws SemanticException {
     wSpec.validateAndMakeEffective();
+
+    if (!isCBOExecuted()) {
+      // If CBO did not optimize the query, we might need to replace grouping function
+      final String selClauseName = qb.getParseInfo().getClauseNames().iterator().next();
+      final boolean cubeRollupGrpSetPresent = (!qb.getParseInfo().getDestRollups().isEmpty()
+              || !qb.getParseInfo().getDestGroupingSets().isEmpty()
+              || !qb.getParseInfo().getDestCubes().isEmpty());
+      if (cubeRollupGrpSetPresent) {
+        for (WindowExpressionSpec wExprSpec : wSpec.getWindowExpressions()) {
+          // Special handling of grouping function
+          wExprSpec.setExpression(rewriteGroupingFunctionAST(
+                  getGroupByForClause(qb.getParseInfo(), selClauseName), wExprSpec.getExpression()));
+        }
+      }
+    }
+
     WindowingComponentizer groups = new WindowingComponentizer(wSpec);
     RowResolver rr = opParseCtx.get(input).getRowResolver();
 

http://git-wip-us.apache.org/repos/asf/hive/blob/89362a14/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFGrouping.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFGrouping.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFGrouping.java
new file mode 100644
index 0000000..cc01526
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFGrouping.java
@@ -0,0 +1,91 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.UDFType;
+import org.apache.hadoop.hive.serde2.io.ByteWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableConstantIntObjectInspector;
+
+/**
+ * UDF grouping
+ */
+@Description(name = "grouping",
+value = "_FUNC_(a, b) - Indicates whether a specified column expression in "
++ "is aggregated or not. Returns 1 for aggregated or 0 for not aggregated. ",
+extended = "a is the grouping id, b is the index we want to extract")
+@UDFType(deterministic = true)
+@NDV(maxNdv = 2)
+public class GenericUDFGrouping extends GenericUDF {
+
+  private transient IntObjectInspector groupingIdOI;
+  private int index = 0;
+  private ByteWritable byteWritable = new ByteWritable();
+
+  @Override
+  public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
+    if (arguments.length != 2) {
+      throw new UDFArgumentLengthException(
+        "grouping() requires 2 argument, got " + arguments.length);
+    }
+
+    if (arguments[0].getCategory() != Category.PRIMITIVE) {
+      throw new UDFArgumentTypeException(0, "The first argument to grouping() must be primitive");
+    }
+    PrimitiveObjectInspector arg1OI = (PrimitiveObjectInspector) arguments[0];
+    if (arg1OI.getPrimitiveCategory() != PrimitiveCategory.INT) {
+      throw new UDFArgumentTypeException(0, "The first argument to grouping() must be an integer");
+    }
+    groupingIdOI = (IntObjectInspector) arguments[0];
+
+    PrimitiveObjectInspector arg2OI = (PrimitiveObjectInspector) arguments[1];
+    if (!(arg2OI instanceof WritableConstantIntObjectInspector)) {
+      throw new UDFArgumentTypeException(1, "The second argument to grouping() must be a constant");
+    }
+    index = ((WritableConstantIntObjectInspector)arg2OI).getWritableConstantValue().get();
+
+    return PrimitiveObjectInspectorFactory.writableByteObjectInspector;
+  }
+
+  @Override
+  public Object evaluate(DeferredObject[] arguments) throws HiveException {
+    // groupingId = PrimitiveObjectInspectorUtils.getInt(arguments[0].get(), groupingIdOI);
+    // Check that the bit at the given index is '1' or '0'
+    byteWritable.set((byte)
+            ((PrimitiveObjectInspectorUtils.getInt(arguments[0].get(), groupingIdOI) >> index) & 1));
+    return byteWritable;
+  }
+
+  @Override
+  public String getDisplayString(String[] children) {
+    assert (children.length == 2);
+    return getStandardDisplayString("grouping", children);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/89362a14/ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q b/ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q
new file mode 100644
index 0000000..1b753e1
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/groupby_grouping_sets_grouping.q
@@ -0,0 +1,89 @@
+CREATE TABLE T1(key INT, value INT) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1;
+
+explain
+select key, value, `grouping__id`, grouping(key), grouping(value)
+from T1
+group by rollup(key, value);
+
+select key, value, `grouping__id`, grouping(key), grouping(value)
+from T1
+group by rollup(key, value);
+
+explain
+select key, value, `grouping__id`, grouping(key), grouping(value)
+from T1
+group by cube(key, value);
+
+select key, value, `grouping__id`, grouping(key), grouping(value)
+from T1
+group by cube(key, value);
+
+explain
+select key, value
+from T1
+group by cube(key, value)
+having grouping(key) = 1;
+
+select key, value
+from T1
+group by cube(key, value)
+having grouping(key) = 1;
+
+explain
+select key, value, grouping(key)+grouping(value) as x
+from T1
+group by cube(key, value)
+having grouping(key) = 1 OR grouping(value) = 1
+order by x desc, case when x = 1 then key end;
+
+select key, value, grouping(key)+grouping(value) as x
+from T1
+group by cube(key, value)
+having grouping(key) = 1 OR grouping(value) = 1
+order by x desc, case when x = 1 then key end;
+
+set hive.cbo.enable=false;
+
+explain
+select key, value, `grouping__id`, grouping(key), grouping(value)
+from T1
+group by rollup(key, value);
+
+select key, value, `grouping__id`, grouping(key), grouping(value)
+from T1
+group by rollup(key, value);
+
+explain
+select key, value, `grouping__id`, grouping(key), grouping(value)
+from T1
+group by cube(key, value);
+
+select key, value, `grouping__id`, grouping(key), grouping(value)
+from T1
+group by cube(key, value);
+
+explain
+select key, value
+from T1
+group by cube(key, value)
+having grouping(key) = 1;
+
+select key, value
+from T1
+group by cube(key, value)
+having grouping(key) = 1;
+
+explain
+select key, value, grouping(key)+grouping(value) as x
+from T1
+group by cube(key, value)
+having grouping(key) = 1 OR grouping(value) = 1
+order by x desc, case when x = 1 then key end;
+
+select key, value, grouping(key)+grouping(value) as x
+from T1
+group by cube(key, value)
+having grouping(key) = 1 OR grouping(value) = 1
+order by x desc, case when x = 1 then key end;

http://git-wip-us.apache.org/repos/asf/hive/blob/89362a14/ql/src/test/queries/clientpositive/perf/query22.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/perf/query22.q b/ql/src/test/queries/clientpositive/perf/query22.q
index 17cece2..adb509d 100644
--- a/ql/src/test/queries/clientpositive/perf/query22.q
+++ b/ql/src/test/queries/clientpositive/perf/query22.q
@@ -1 +1,21 @@
-explain select i_product_name ,i_brand ,i_class ,i_category ,avg(inv_quantity_on_hand) qoh from inventory ,date_dim ,item ,warehouse where inventory.inv_date_sk=date_dim.d_date_sk and inventory.inv_item_sk=item.i_item_sk and inventory.inv_warehouse_sk = warehouse.w_warehouse_sk and date_dim.d_month_seq between 1193 and 1193 + 11 group by i_product_name ,i_brand ,i_class ,i_category with rollup order by qoh, i_product_name, i_brand, i_class, i_category limit 100;
+explain
+select  i_product_name
+             ,i_brand
+             ,i_class
+             ,i_category
+             ,avg(inv_quantity_on_hand) qoh
+       from inventory
+           ,date_dim
+           ,item
+           ,warehouse
+       where inv_date_sk=d_date_sk
+              and inv_item_sk=i_item_sk
+              and inv_warehouse_sk = w_warehouse_sk
+              and d_month_seq between 1212 and 1212 + 11
+       group by rollup(i_product_name
+                       ,i_brand
+                       ,i_class
+                       ,i_category)
+order by qoh, i_product_name, i_brand, i_class, i_category
+limit 100;
+

http://git-wip-us.apache.org/repos/asf/hive/blob/89362a14/ql/src/test/queries/clientpositive/perf/query27.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/perf/query27.q b/ql/src/test/queries/clientpositive/perf/query27.q
index 58be664..0cbb3ae 100644
--- a/ql/src/test/queries/clientpositive/perf/query27.q
+++ b/ql/src/test/queries/clientpositive/perf/query27.q
@@ -1 +1,22 @@
-explain select i_item_id, s_state, avg(ss_quantity) agg1, avg(ss_list_price) agg2, avg(ss_coupon_amt) agg3, avg(ss_sales_price) agg4 from store_sales, customer_demographics, date_dim, store, item where store_sales.ss_sold_date_sk = date_dim.d_date_sk and store_sales.ss_item_sk = item.i_item_sk and store_sales.ss_store_sk = store.s_store_sk and store_sales.ss_cdemo_sk = customer_demographics.cd_demo_sk and customer_demographics.cd_gender = 'F' and customer_demographics.cd_marital_status = 'D' and customer_demographics.cd_education_status = 'Unknown' and date_dim.d_year = 1998 and store.s_state in ('KS','AL', 'MN', 'AL', 'SC', 'VT') group by i_item_id, s_state order by i_item_id ,s_state limit 100;
+explain
+select  i_item_id,
+        s_state, grouping(s_state) g_state,
+        avg(ss_quantity) agg1,
+        avg(ss_list_price) agg2,
+        avg(ss_coupon_amt) agg3,
+        avg(ss_sales_price) agg4
+ from store_sales, customer_demographics, date_dim, store, item
+ where ss_sold_date_sk = d_date_sk and
+       ss_item_sk = i_item_sk and
+       ss_store_sk = s_store_sk and
+       ss_cdemo_sk = cd_demo_sk and
+       cd_gender = 'M' and
+       cd_marital_status = 'U' and
+       cd_education_status = '2 yr Degree' and
+       d_year = 2001 and
+       s_state in ('SD','FL', 'MI', 'LA', 'MO', 'SC')
+ group by rollup (i_item_id, s_state)
+ order by i_item_id
+         ,s_state
+ limit 100;
+

http://git-wip-us.apache.org/repos/asf/hive/blob/89362a14/ql/src/test/queries/clientpositive/perf/query36.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/perf/query36.q b/ql/src/test/queries/clientpositive/perf/query36.q
new file mode 100644
index 0000000..6c3a945
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/perf/query36.q
@@ -0,0 +1,29 @@
+explain
+select  
+    sum(ss_net_profit)/sum(ss_ext_sales_price) as gross_margin
+   ,i_category
+   ,i_class
+   ,grouping(i_category)+grouping(i_class) as lochierarchy
+   ,rank() over (
+     partition by grouping(i_category)+grouping(i_class),
+     case when grouping(i_class) = 0 then i_category end 
+     order by sum(ss_net_profit)/sum(ss_ext_sales_price) asc) as rank_within_parent
+ from
+    store_sales
+   ,date_dim       d1
+   ,item
+   ,store
+ where
+    d1.d_year = 1999 
+ and d1.d_date_sk = ss_sold_date_sk
+ and i_item_sk  = ss_item_sk 
+ and s_store_sk  = ss_store_sk
+ and s_state in ('SD','FL','MI','LA',
+                 'MO','SC','AL','GA')
+ group by rollup(i_category,i_class)
+ order by
+   lochierarchy desc
+  ,case when lochierarchy = 0 then i_category end
+  ,rank_within_parent
+  limit 100;
+

http://git-wip-us.apache.org/repos/asf/hive/blob/89362a14/ql/src/test/queries/clientpositive/perf/query67.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/perf/query67.q b/ql/src/test/queries/clientpositive/perf/query67.q
index 56ef907..ca2fc61 100644
--- a/ql/src/test/queries/clientpositive/perf/query67.q
+++ b/ql/src/test/queries/clientpositive/perf/query67.q
@@ -1,5 +1,6 @@
 set hive.mapred.mode=nonstrict;
-explain 
+
+explain
 select  *
 from (select i_category
             ,i_class
@@ -24,11 +25,11 @@ from (select i_category
                 ,date_dim
                 ,store
                 ,item
-       where  store_sales.ss_sold_date_sk=date_dim.d_date_sk
-          and store_sales.ss_item_sk=item.i_item_sk
-          and store_sales.ss_store_sk = store.s_store_sk
-          and d_month_seq between 1193 and 1193+11
-       group by i_category, i_class, i_brand, i_product_name, d_year, d_qoy, d_moy,s_store_id with rollup)dw1) dw2
+       where  ss_sold_date_sk=d_date_sk
+          and ss_item_sk=i_item_sk
+          and ss_store_sk = s_store_sk
+          and d_month_seq between 1212 and 1212+11
+       group by  rollup(i_category, i_class, i_brand, i_product_name, d_year, d_qoy, d_moy,s_store_id))dw1) dw2
 where rk <= 100
 order by i_category
         ,i_class
@@ -42,4 +43,3 @@ order by i_category
         ,rk
 limit 100;
 
-

http://git-wip-us.apache.org/repos/asf/hive/blob/89362a14/ql/src/test/queries/clientpositive/perf/query70.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/perf/query70.q b/ql/src/test/queries/clientpositive/perf/query70.q
index 07d68e7..80f8c23 100644
--- a/ql/src/test/queries/clientpositive/perf/query70.q
+++ b/ql/src/test/queries/clientpositive/perf/query70.q
@@ -1,2 +1,39 @@
 set hive.mapred.mode=nonstrict;
-explain select sum(ss_net_profit) as total_sum ,s_state ,s_county ,grouping__id as lochierarchy , rank() over(partition by grouping__id, case when grouping__id == 2 then s_state end order by sum(ss_net_profit)) as rank_within_parent from store_sales ss join date_dim d1 on d1.d_date_sk = ss.ss_sold_date_sk join store s on s.s_store_sk = ss.ss_store_sk where d1.d_month_seq between 1193 and 1193+11 and s.s_state in ( select s_state from (select s_state as s_state, sum(ss_net_profit), rank() over ( partition by s_state order by sum(ss_net_profit) desc) as ranking from store_sales, store, date_dim where d_month_seq between 1193 and 1193+11 and date_dim.d_date_sk = store_sales.ss_sold_date_sk and store.s_store_sk = store_sales.ss_store_sk group by s_state ) tmp1 where ranking <= 5 ) group by s_state,s_county with rollup order by lochierarchy desc ,case when lochierarchy = 0 then s_state end ,rank_within_parent limit 100;
+
+explain
+select  
+    sum(ss_net_profit) as total_sum
+   ,s_state
+   ,s_county
+   ,grouping(s_state)+grouping(s_county) as lochierarchy
+   ,rank() over (
+     partition by grouping(s_state)+grouping(s_county),
+     case when grouping(s_county) = 0 then s_state end 
+     order by sum(ss_net_profit) desc) as rank_within_parent
+ from
+    store_sales
+   ,date_dim       d1
+   ,store s
+ where
+    d1.d_month_seq between 1212 and 1212+11
+ and d1.d_date_sk = ss_sold_date_sk
+ and s_store_sk  = ss_store_sk
+ and s.s_state in
+             ( select s_state
+               from  (select s_state as s_state,
+                 rank() over ( partition by s_state order by sum(ss_net_profit) desc) as ranking
+                      from   store_sales, store, date_dim
+                      where  d_month_seq between 1212 and 1212+11
+                 and d_date_sk = ss_sold_date_sk
+                 and s_store_sk  = ss_store_sk
+                      group by s_state
+                     ) tmp1 
+               where ranking <= 5
+             )
+ group by rollup(s_state,s_county)
+ order by
+   lochierarchy desc
+  ,case when lochierarchy = 0 then s_state end
+  ,rank_within_parent
+ limit 100;
+

http://git-wip-us.apache.org/repos/asf/hive/blob/89362a14/ql/src/test/queries/clientpositive/perf/query86.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/perf/query86.q b/ql/src/test/queries/clientpositive/perf/query86.q
new file mode 100644
index 0000000..07a9ec5
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/perf/query86.q
@@ -0,0 +1,25 @@
+explain
+select   
+    sum(ws_net_paid) as total_sum
+   ,i_category
+   ,i_class
+   ,grouping(i_category)+grouping(i_class) as lochierarchy
+   ,rank() over (
+     partition by grouping(i_category)+grouping(i_class),
+     case when grouping(i_class) = 0 then i_category end 
+     order by sum(ws_net_paid) desc) as rank_within_parent
+ from
+    web_sales
+   ,date_dim       d1
+   ,item
+ where
+    d1.d_month_seq between 1212 and 1212+11
+ and d1.d_date_sk = ws_sold_date_sk
+ and i_item_sk  = ws_item_sk
+ group by rollup(i_category,i_class)
+ order by
+   lochierarchy desc,
+   case when lochierarchy = 0 then i_category end,
+   rank_within_parent
+ limit 100;
+

http://git-wip-us.apache.org/repos/asf/hive/blob/89362a14/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out b/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out
index 99be3c1..d134d27 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out
@@ -360,25 +360,25 @@ STAGE PLANS:
               outputColumnNames: state, locid
               Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE
               Group By Operator
-                keys: state (type: string), locid (type: int), '0' (type: string)
+                keys: state (type: string), locid (type: int), 0 (type: int)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 32 Data size: 3008 Basic stats: COMPLETE Column stats: COMPLETE
                 Reduce Output Operator
-                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int)
                   sort order: +++
-                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
-                  Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int)
+                  Statistics: Num rows: 32 Data size: 3008 Basic stats: COMPLETE Column stats: COMPLETE
       Reduce Operator Tree:
         Group By Operator
-          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
+          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: int)
           mode: mergepartial
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE
+          Statistics: Num rows: 32 Data size: 3008 Basic stats: COMPLETE Column stats: COMPLETE
           pruneGroupingSetId: true
           File Output Operator
             compressed: false
-            Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE
+            Statistics: Num rows: 32 Data size: 3008 Basic stats: COMPLETE Column stats: COMPLETE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -414,25 +414,25 @@ STAGE PLANS:
               outputColumnNames: state, locid
               Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE
               Group By Operator
-                keys: state (type: string), locid (type: int), '0' (type: string)
+                keys: state (type: string), locid (type: int), 0 (type: int)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 24 Data size: 2256 Basic stats: COMPLETE Column stats: COMPLETE
                 Reduce Output Operator
-                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int)
                   sort order: +++
-                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
-                  Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int)
+                  Statistics: Num rows: 24 Data size: 2256 Basic stats: COMPLETE Column stats: COMPLETE
       Reduce Operator Tree:
         Group By Operator
-          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
+          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: int)
           mode: mergepartial
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE
+          Statistics: Num rows: 24 Data size: 2256 Basic stats: COMPLETE Column stats: COMPLETE
           pruneGroupingSetId: true
           File Output Operator
             compressed: false
-            Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE
+            Statistics: Num rows: 24 Data size: 2256 Basic stats: COMPLETE Column stats: COMPLETE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -464,25 +464,25 @@ STAGE PLANS:
               outputColumnNames: state, locid
               Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE
               Group By Operator
-                keys: state (type: string), locid (type: int), '0' (type: string)
+                keys: state (type: string), locid (type: int), 0 (type: int)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 24 Data size: 2256 Basic stats: COMPLETE Column stats: COMPLETE
                 Reduce Output Operator
-                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int)
                   sort order: +++
-                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
-                  Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int)
+                  Statistics: Num rows: 24 Data size: 2256 Basic stats: COMPLETE Column stats: COMPLETE
       Reduce Operator Tree:
         Group By Operator
-          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
+          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: int)
           mode: mergepartial
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE
+          Statistics: Num rows: 24 Data size: 2256 Basic stats: COMPLETE Column stats: COMPLETE
           pruneGroupingSetId: true
           File Output Operator
             compressed: false
-            Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE
+            Statistics: Num rows: 24 Data size: 2256 Basic stats: COMPLETE Column stats: COMPLETE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -518,25 +518,25 @@ STAGE PLANS:
               outputColumnNames: state, locid
               Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE
               Group By Operator
-                keys: state (type: string), locid (type: int), '0' (type: string)
+                keys: state (type: string), locid (type: int), 0 (type: int)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 8 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: COMPLETE
                 Reduce Output Operator
-                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int)
                   sort order: +++
-                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
-                  Statistics: Num rows: 8 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int)
+                  Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: COMPLETE
       Reduce Operator Tree:
         Group By Operator
-          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
+          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: int)
           mode: mergepartial
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 8 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE
+          Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: COMPLETE
           pruneGroupingSetId: true
           File Output Operator
             compressed: false
-            Statistics: Num rows: 8 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE
+            Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: COMPLETE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -572,25 +572,25 @@ STAGE PLANS:
               outputColumnNames: state, locid
               Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE
               Group By Operator
-                keys: state (type: string), locid (type: int), '0' (type: string)
+                keys: state (type: string), locid (type: int), 0 (type: int)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE
                 Reduce Output Operator
-                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int)
                   sort order: +++
-                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
-                  Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int)
+                  Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE
       Reduce Operator Tree:
         Group By Operator
-          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
+          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: int)
           mode: mergepartial
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE
+          Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE
           pruneGroupingSetId: true
           File Output Operator
             compressed: false
-            Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE
+            Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -626,25 +626,25 @@ STAGE PLANS:
               outputColumnNames: state, locid
               Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE
               Group By Operator
-                keys: state (type: string), locid (type: int), '0' (type: string)
+                keys: state (type: string), locid (type: int), 0 (type: int)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 24 Data size: 2256 Basic stats: COMPLETE Column stats: COMPLETE
                 Reduce Output Operator
-                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int)
                   sort order: +++
-                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
-                  Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int)
+                  Statistics: Num rows: 24 Data size: 2256 Basic stats: COMPLETE Column stats: COMPLETE
       Reduce Operator Tree:
         Group By Operator
-          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
+          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: int)
           mode: mergepartial
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE
+          Statistics: Num rows: 24 Data size: 2256 Basic stats: COMPLETE Column stats: COMPLETE
           pruneGroupingSetId: true
           File Output Operator
             compressed: false
-            Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE
+            Statistics: Num rows: 24 Data size: 2256 Basic stats: COMPLETE Column stats: COMPLETE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -680,25 +680,25 @@ STAGE PLANS:
               outputColumnNames: state, locid
               Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE
               Group By Operator
-                keys: state (type: string), locid (type: int), '0' (type: string)
+                keys: state (type: string), locid (type: int), 0 (type: int)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 32 Data size: 3008 Basic stats: COMPLETE Column stats: COMPLETE
                 Reduce Output Operator
-                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int)
                   sort order: +++
-                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
-                  Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int)
+                  Statistics: Num rows: 32 Data size: 3008 Basic stats: COMPLETE Column stats: COMPLETE
       Reduce Operator Tree:
         Group By Operator
-          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
+          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: int)
           mode: mergepartial
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE
+          Statistics: Num rows: 32 Data size: 3008 Basic stats: COMPLETE Column stats: COMPLETE
           pruneGroupingSetId: true
           File Output Operator
             compressed: false
-            Statistics: Num rows: 32 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE
+            Statistics: Num rows: 32 Data size: 3008 Basic stats: COMPLETE Column stats: COMPLETE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -791,25 +791,25 @@ STAGE PLANS:
               outputColumnNames: state, locid
               Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE
               Group By Operator
-                keys: state (type: string), locid (type: int), '0' (type: string)
+                keys: state (type: string), locid (type: int), 0 (type: int)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE
                 Reduce Output Operator
-                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int)
                   sort order: +++
-                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
-                  Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int)
+                  Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE
       Reduce Operator Tree:
         Group By Operator
-          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
+          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: int)
           mode: mergepartial
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE
+          Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE
           pruneGroupingSetId: true
           File Output Operator
             compressed: false
-            Statistics: Num rows: 16 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE
+            Statistics: Num rows: 16 Data size: 1504 Basic stats: COMPLETE Column stats: COMPLETE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -900,18 +900,18 @@ STAGE PLANS:
               outputColumnNames: state, locid
               Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
-                keys: state (type: string), locid (type: int), '0' (type: string)
+                keys: state (type: string), locid (type: int), 0 (type: int)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int)
                   sort order: +++
-                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int)
                   Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Group By Operator
-          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
+          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: int)
           mode: mergepartial
           outputColumnNames: _col0, _col1
           Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE
@@ -954,18 +954,18 @@ STAGE PLANS:
               outputColumnNames: state, locid
               Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
-                keys: state (type: string), locid (type: int), '0' (type: string)
+                keys: state (type: string), locid (type: int), 0 (type: int)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int)
                   sort order: +++
-                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int)
                   Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Group By Operator
-          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
+          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: int)
           mode: mergepartial
           outputColumnNames: _col0, _col1
           Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE
@@ -1004,18 +1004,18 @@ STAGE PLANS:
               outputColumnNames: state, locid
               Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
-                keys: state (type: string), locid (type: int), '0' (type: string)
+                keys: state (type: string), locid (type: int), 0 (type: int)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int)
                   sort order: +++
-                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int)
                   Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Group By Operator
-          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
+          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: int)
           mode: mergepartial
           outputColumnNames: _col0, _col1
           Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE
@@ -1058,18 +1058,18 @@ STAGE PLANS:
               outputColumnNames: state, locid
               Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
-                keys: state (type: string), locid (type: int), '0' (type: string)
+                keys: state (type: string), locid (type: int), 0 (type: int)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int)
                   sort order: +++
-                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int)
                   Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Group By Operator
-          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
+          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: int)
           mode: mergepartial
           outputColumnNames: _col0, _col1
           Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE
@@ -1112,18 +1112,18 @@ STAGE PLANS:
               outputColumnNames: state, locid
               Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
-                keys: state (type: string), locid (type: int), '0' (type: string)
+                keys: state (type: string), locid (type: int), 0 (type: int)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int)
                   sort order: +++
-                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int)
                   Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Group By Operator
-          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
+          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: int)
           mode: mergepartial
           outputColumnNames: _col0, _col1
           Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
@@ -1166,18 +1166,18 @@ STAGE PLANS:
               outputColumnNames: state, locid
               Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
-                keys: state (type: string), locid (type: int), '0' (type: string)
+                keys: state (type: string), locid (type: int), 0 (type: int)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int)
                   sort order: +++
-                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int)
                   Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Group By Operator
-          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
+          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: int)
           mode: mergepartial
           outputColumnNames: _col0, _col1
           Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE
@@ -1220,18 +1220,18 @@ STAGE PLANS:
               outputColumnNames: state, locid
               Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
-                keys: state (type: string), locid (type: int), '0' (type: string)
+                keys: state (type: string), locid (type: int), 0 (type: int)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int)
                   sort order: +++
-                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int)
                   Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Group By Operator
-          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
+          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: int)
           mode: mergepartial
           outputColumnNames: _col0, _col1
           Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE
@@ -1327,18 +1327,18 @@ STAGE PLANS:
               outputColumnNames: state, locid
               Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
-                keys: state (type: string), locid (type: int), '0' (type: string)
+                keys: state (type: string), locid (type: int), 0 (type: int)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+                  key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int)
                   sort order: +++
-                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string)
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: int)
                   Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Group By Operator
-          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string)
+          keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: int)
           mode: mergepartial
           outputColumnNames: _col0, _col1
           Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE

http://git-wip-us.apache.org/repos/asf/hive/blob/89362a14/ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out b/ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out
index 9535344..5e5efa8 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out
@@ -147,18 +147,18 @@ STAGE PLANS:
               outputColumnNames: state, country
               Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
-                keys: state (type: string), country (type: string), '0' (type: string)
+                keys: state (type: string), country (type: string), 0 (type: int)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 80 Data size: 800 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
-                  key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+                  key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int)
                   sort order: +++
-                  Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int)
                   Statistics: Num rows: 80 Data size: 800 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Group By Operator
-          keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string)
+          keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int)
           mode: mergepartial
           outputColumnNames: _col0, _col1
           Statistics: Num rows: 40 Data size: 400 Basic stats: COMPLETE Column stats: NONE
@@ -313,25 +313,25 @@ STAGE PLANS:
               outputColumnNames: state, country
               Statistics: Num rows: 20 Data size: 3460 Basic stats: COMPLETE Column stats: COMPLETE
               Group By Operator
-                keys: state (type: string), country (type: string), '0' (type: string)
+                keys: state (type: string), country (type: string), 0 (type: int)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 40 Data size: 10320 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 40 Data size: 7080 Basic stats: COMPLETE Column stats: COMPLETE
                 Reduce Output Operator
-                  key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+                  key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int)
                   sort order: +++
-                  Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
-                  Statistics: Num rows: 40 Data size: 10320 Basic stats: COMPLETE Column stats: COMPLETE
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+                  Statistics: Num rows: 40 Data size: 7080 Basic stats: COMPLETE Column stats: COMPLETE
       Reduce Operator Tree:
         Group By Operator
-          keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string)
+          keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int)
           mode: mergepartial
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 8 Data size: 2064 Basic stats: COMPLETE Column stats: COMPLETE
+          Statistics: Num rows: 8 Data size: 1416 Basic stats: COMPLETE Column stats: COMPLETE
           pruneGroupingSetId: true
           File Output Operator
             compressed: false
-            Statistics: Num rows: 8 Data size: 2064 Basic stats: COMPLETE Column stats: COMPLETE
+            Statistics: Num rows: 8 Data size: 1416 Basic stats: COMPLETE Column stats: COMPLETE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -420,25 +420,25 @@ STAGE PLANS:
               outputColumnNames: state, country
               Statistics: Num rows: 20 Data size: 3460 Basic stats: COMPLETE Column stats: COMPLETE
               Group By Operator
-                keys: state (type: string), country (type: string), '0' (type: string)
+                keys: state (type: string), country (type: string), 0 (type: int)
                 mode: hash
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 80 Data size: 20640 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 80 Data size: 14160 Basic stats: COMPLETE Column stats: COMPLETE
                 Reduce Output Operator
-                  key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
+                  key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int)
                   sort order: +++
-                  Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
-                  Statistics: Num rows: 80 Data size: 20640 Basic stats: COMPLETE Column stats: COMPLETE
+                  Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+                  Statistics: Num rows: 80 Data size: 14160 Basic stats: COMPLETE Column stats: COMPLETE
       Reduce Operator Tree:
         Group By Operator
-          keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string)
+          keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int)
           mode: mergepartial
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 8 Data size: 2064 Basic stats: COMPLETE Column stats: COMPLETE
+          Statistics: Num rows: 8 Data size: 1416 Basic stats: COMPLETE Column stats: COMPLETE
           pruneGroupingSetId: true
           File Output Operator
             compressed: false
-            Statistics: Num rows: 8 Data size: 2064 Basic stats: COMPLETE Column stats: COMPLETE
+            Statistics: Num rows: 8 Data size: 1416 Basic stats: COMPLETE Column stats: COMPLETE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat