You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2014/08/06 19:58:02 UTC
svn commit: r1616292 [1/2] - in /hive/trunk: data/files/ ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/ ql/src/java/org/apache/hadoop/hive/ql/stats/ ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/ ql/src/test/...

Author: hashutosh
Date: Wed Aug  6 17:58:01 2014
New Revision: 1616292

URL: http://svn.apache.org/r1616292
Log:
HIVE-7589 : Some fixes and improvements to statistics annotation rules (Prasanth J via Ashutosh Chauhan)

Modified:
    hive/trunk/data/files/dept.txt
    hive/trunk/data/files/emp.txt
    hive/trunk/data/files/loc.txt
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateWithStatistics.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
    hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_filter.q
    hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_groupby.q
    hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_join.q
    hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_limit.q
    hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_part.q
    hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_select.q
    hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_table.q
    hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_union.q
    hive/trunk/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
    hive/trunk/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out
    hive/trunk/ql/src/test/results/clientpositive/annotate_stats_join.q.out
    hive/trunk/ql/src/test/results/clientpositive/annotate_stats_limit.q.out
    hive/trunk/ql/src/test/results/clientpositive/annotate_stats_part.q.out
    hive/trunk/ql/src/test/results/clientpositive/annotate_stats_select.q.out
    hive/trunk/ql/src/test/results/clientpositive/annotate_stats_table.q.out
    hive/trunk/ql/src/test/results/clientpositive/annotate_stats_union.q.out
    hive/trunk/ql/src/test/results/clientpositive/combine2.q.out
    hive/trunk/ql/src/test/results/clientpositive/groupby_sort_11.q.out
    hive/trunk/ql/src/test/results/clientpositive/input24.q.out
    hive/trunk/ql/src/test/results/clientpositive/input25.q.out
    hive/trunk/ql/src/test/results/clientpositive/metadataonly1.q.out
    hive/trunk/ql/src/test/results/clientpositive/nullgroup3.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/metadataonly1.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/union5.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/union7.q.out
    hive/trunk/ql/src/test/results/clientpositive/udf_explode.q.out
    hive/trunk/ql/src/test/results/clientpositive/udtf_explode.q.out
    hive/trunk/ql/src/test/results/clientpositive/union11.q.out
    hive/trunk/ql/src/test/results/clientpositive/union14.q.out
    hive/trunk/ql/src/test/results/clientpositive/union15.q.out
    hive/trunk/ql/src/test/results/clientpositive/union17.q.out
    hive/trunk/ql/src/test/results/clientpositive/union19.q.out
    hive/trunk/ql/src/test/results/clientpositive/union20.q.out
    hive/trunk/ql/src/test/results/clientpositive/union21.q.out
    hive/trunk/ql/src/test/results/clientpositive/union5.q.out
    hive/trunk/ql/src/test/results/clientpositive/union7.q.out

Modified: hive/trunk/data/files/dept.txt
URL: http://svn.apache.org/viewvc/hive/trunk/data/files/dept.txt?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/data/files/dept.txt (original)
+++ hive/trunk/data/files/dept.txt Wed Aug  6 17:58:01 2014
@@ -2,3 +2,5 @@
 33|engineering
 34|clerical
 35|marketing
+36|transport
+37|hr

Modified: hive/trunk/data/files/emp.txt
URL: http://svn.apache.org/viewvc/hive/trunk/data/files/emp.txt?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/data/files/emp.txt (original)
+++ hive/trunk/data/files/emp.txt Wed Aug  6 17:58:01 2014
@@ -1,6 +1,48 @@
-Rafferty|31
-Jones|33
-Steinberg|33
-Robinson|34
-Smith|34
-John|
+Rafferty|31|1
+Jones|33|2
+Steinberg|33|3
+Robinson|34|4
+Smith|34|5
+John|31|6
+Rafferty|31|1
+Jones|33|2
+Steinberg|33|3
+Robinson|34|4
+Smith|34|5
+John|31|6
+Rafferty|31|1
+Jones|33|2
+Steinberg|33|3
+Robinson|34|4
+Smith|34|5
+John|31|6
+Rafferty|31|1
+Jones|33|2
+Steinberg|33|3
+Robinson|34|4
+Smith|34|5
+John|31|6
+Rafferty|31|1
+Jones|33|2
+Steinberg|33|3
+Robinson|34|4
+Smith|34|5
+John|31|6
+Rafferty|31|1
+Jones|33|2
+Steinberg|33|3
+Robinson|34|4
+Smith|34|5
+John|31|6
+Rafferty|31|1
+Jones|33|2
+Steinberg|33|3
+Robinson|34|4
+Smith|34|5
+John|31|6
+Rafferty|31|1
+Jones|33|2
+Steinberg|33|3
+Robinson|34|4
+Smith|34|5
+John|31|6

Modified: hive/trunk/data/files/loc.txt
URL: http://svn.apache.org/viewvc/hive/trunk/data/files/loc.txt?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/data/files/loc.txt (original)
+++ hive/trunk/data/files/loc.txt Wed Aug  6 17:58:01 2014
@@ -1,8 +1,8 @@
-OH|31|43201|2001
-IO|32|43202|2001
-CA|35|43809|2001
-FL|33|54342|2001
-UT|35||2001
-CA|35|43809|2001
-|34|40000|
-FL|33|54342|2001
+OH|1|43201|2001
+IO|2|43202|2001
+CA|5|43809|2001
+FL|3|54342|2001
+UT|5||2001
+CA|5|43809|2001
+|4|40000|
+FL|6|54342|2001

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateWithStatistics.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateWithStatistics.java?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateWithStatistics.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateWithStatistics.java Wed Aug  6 17:58:01 2014
@@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.exec.Fi
 import org.apache.hadoop.hive.ql.exec.GroupByOperator;
 import org.apache.hadoop.hive.ql.exec.LimitOperator;
 import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
 import org.apache.hadoop.hive.ql.exec.SelectOperator;
 import org.apache.hadoop.hive.ql.exec.TableScanOperator;
 import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
@@ -62,6 +63,8 @@ public class AnnotateWithStatistics impl
         + MapJoinOperator.getOperatorName() + "%"), StatsRulesProcFactory.getJoinRule());
     opRules.put(new RuleRegExp("LIM", LimitOperator.getOperatorName() + "%"),
         StatsRulesProcFactory.getLimitRule());
+    opRules.put(new RuleRegExp("RS", ReduceSinkOperator.getOperatorName() + "%"),
+        StatsRulesProcFactory.getReduceSinkRule());
 
     // The dispatcher fires the processor corresponding to the closest matching
     // rule and passes the context along

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java Wed Aug  6 17:58:01 2014
@@ -601,12 +601,18 @@ public class StatsRulesProcFactory {
               }
               dvProd *= dv;
             } else {
-
-              // partial column statistics on grouping attributes case.
-              // if column statistics on grouping attribute is missing, then
-              // assume worst case.
-              // GBY rule will emit half the number of rows if dvProd is 0
-              dvProd = 0;
+              if (parentStats.getColumnStatsState().equals(Statistics.State.COMPLETE)) {
+                // the column must be an aggregate column inserted by GBY. We
+                // don't have to account for this column when computing product
+                // of NDVs
+                continue;
+              } else {
+                // partial column statistics on grouping attributes case.
+                // if column statistics on grouping attribute is missing, then
+                // assume worst case.
+                // GBY rule will emit half the number of rows if dvProd is 0
+                dvProd = 0;
+              }
               break;
             }
           }
@@ -687,7 +693,17 @@ public class StatsRulesProcFactory {
               aggColStats.add(cs);
             }
           }
-          stats.addToColumnStats(aggColStats);
+
+          // add the new aggregate column and recompute data size
+          if (aggColStats.size() > 0) {
+            stats.addToColumnStats(aggColStats);
+
+            // only if the column stats is available, update the data size from
+            // the column stats
+            if (!stats.getColumnStatsState().equals(Statistics.State.NONE)) {
+              updateStats(stats, stats.getNumRows(), true);
+            }
+          }
 
           // if UDAF present and if column expression map is empty then it must
           // be full aggregation query like count(*) in which case number of
@@ -734,15 +750,24 @@ public class StatsRulesProcFactory {
    * <p>
    * In the absence of histograms, we can use the following general case
    * <p>
-   * <b>Single attribute</b>
+   * <b>2 Relations, 1 attribute</b>
    * <p>
    * T(RXS) = (T(R)*T(S))/max(V(R,Y), V(S,Y)) where Y is the join attribute
    * <p>
-   * <b>Multiple attributes</b>
+   * <b>2 Relations, 2 attributes</b>
    * <p>
    * T(RXS) = T(R)*T(S)/max(V(R,y1), V(S,y1)) * max(V(R,y2), V(S,y2)), where y1 and y2 are the join
    * attributes
    * <p>
+   * <b>3 Relations, 1 attributes</b>
+   * <p>
+   * T(RXSXQ) = T(R)*T(S)*T(Q)/top2largest(V(R,y), V(S,y), V(Q,y)), where y is the join attribute
+   * <p>
+   * <b>3 Relations, 2 attributes</b>
+   * <p>
+   * T(RXSXQ) = T(R)*T(S)*T(Q)/top2largest(V(R,y1), V(S,y1), V(Q,y1)) * top2largest(V(R,y2), V(S,y2), V(Q,y2)),
+   * where y1 and y2 are the join attributes
+   * <p>
    * <i>Worst case:</i> If no column statistics are available, then T(RXS) = joinFactor * max(T(R),
    * T(S)) * (numParents - 1) will be used as heuristics. joinFactor is from hive.stats.join.factor
    * hive config. In the worst case, since we do not know any information about join keys (and hence
@@ -783,9 +808,12 @@ public class StatsRulesProcFactory {
           // statistics object that is combination of statistics from all
           // relations involved in JOIN
           Statistics stats = new Statistics();
-          long prodRows = 1;
+          List<Long> rowCountParents = Lists.newArrayList();
           List<Long> distinctVals = Lists.newArrayList();
+
+          // 2 relations, multiple attributes
           boolean multiAttr = false;
+          int numAttr = 1;
 
           Map<String, ColStatistics> joinedColStats = Maps.newHashMap();
           Map<Integer, List<String>> joinKeys = Maps.newHashMap();
@@ -795,12 +823,13 @@ public class StatsRulesProcFactory {
             ReduceSinkOperator parent = (ReduceSinkOperator) jop.getParentOperators().get(pos);
 
             Statistics parentStats = parent.getStatistics();
-            prodRows *= parentStats.getNumRows();
+            rowCountParents.add(parentStats.getNumRows());
             List<ExprNodeDesc> keyExprs = parent.getConf().getKeyCols();
 
             // multi-attribute join key
             if (keyExprs.size() > 1) {
               multiAttr = true;
+              numAttr = keyExprs.size();
             }
 
             // compute fully qualified join key column names. this name will be
@@ -811,16 +840,9 @@ public class StatsRulesProcFactory {
                 StatsUtils.getFullQualifedColNameFromExprs(keyExprs, parent.getColumnExprMap());
             joinKeys.put(pos, fqCols);
 
-            Map<String, ExprNodeDesc> colExprMap = parent.getColumnExprMap();
-            RowSchema rs = parent.getSchema();
-
             // get column statistics for all output columns
-            List<ColStatistics> cs =
-                StatsUtils.getColStatisticsFromExprMap(conf, parentStats, colExprMap, rs);
-            for (ColStatistics c : cs) {
-              if (c != null) {
-                joinedColStats.put(c.getFullyQualifiedColName(), c);
-              }
+            for (ColStatistics cs : parentStats.getColumnStats()) {
+              joinedColStats.put(cs.getFullyQualifiedColName(), cs);
             }
 
             // since new statistics is derived from all relations involved in
@@ -834,10 +856,10 @@ public class StatsRulesProcFactory {
           long denom = 1;
           if (multiAttr) {
             List<Long> perAttrDVs = Lists.newArrayList();
-            int numAttr = joinKeys.get(0).size();
             for (int idx = 0; idx < numAttr; idx++) {
               for (Integer i : joinKeys.keySet()) {
                 String col = joinKeys.get(i).get(idx);
+                col = StatsUtils.stripPrefixFromColumnName(col);
                 ColStatistics cs = joinedColStats.get(col);
                 if (cs != null) {
                   perAttrDVs.add(cs.getCountDistint());
@@ -853,6 +875,7 @@ public class StatsRulesProcFactory {
           } else {
             for (List<String> jkeys : joinKeys.values()) {
               for (String jk : jkeys) {
+                jk = StatsUtils.stripPrefixFromColumnName(jk);
                 ColStatistics cs = joinedColStats.get(jk);
                 if (cs != null) {
                   distinctVals.add(cs.getCountDistint());
@@ -862,6 +885,11 @@ public class StatsRulesProcFactory {
             denom = getDenominator(distinctVals);
           }
 
+          // Update NDV of joined columns to be min(V(R,y), V(S,y))
+          if (multiAttr) {
+            updateJoinColumnsNDV(joinKeys, joinedColStats, numAttr);
+          }
+
           // column statistics from different sources are put together and rename
           // fully qualified column names based on output schema of join operator
           Map<String, ExprNodeDesc> colExprMap = jop.getColumnExprMap();
@@ -878,7 +906,6 @@ public class StatsRulesProcFactory {
               ColStatistics cs = joinedColStats.get(fqColName);
               String outColName = key;
               String outTabAlias = ci.getTabAlias();
-              outColName = StatsUtils.stripPrefixFromColumnName(outColName);
               if (cs != null) {
                 cs.setColumnName(outColName);
                 cs.setTableAlias(outTabAlias);
@@ -889,13 +916,21 @@ public class StatsRulesProcFactory {
 
           // update join statistics
           stats.setColumnStats(outColStats);
-          long newRowCount = prodRows / denom;
+          long newRowCount = computeNewRowCount(rowCountParents, denom);
+
+          if (newRowCount <= 0 && LOG.isDebugEnabled()) {
+            newRowCount = 0;
+            LOG.debug("[0] STATS-" + jop.toString() + ": Product of #rows might be greater than"
+                + " denominator or overflow might have occurred. Resetting row count to 0."
+                + " #Rows of parents: " + rowCountParents.toString() + ". Denominator: " + denom);
+          }
+
           stats.setNumRows(newRowCount);
           stats.setDataSize(StatsUtils.getDataSizeFromColumnStats(newRowCount, outColStats));
           jop.setStatistics(stats);
 
           if (LOG.isDebugEnabled()) {
-            LOG.debug("[0] STATS-" + jop.toString() + ": " + stats.extendedToString());
+            LOG.debug("[1] STATS-" + jop.toString() + ": " + stats.extendedToString());
           }
         } else {
 
@@ -930,13 +965,72 @@ public class StatsRulesProcFactory {
           jop.setStatistics(wcStats);
 
           if (LOG.isDebugEnabled()) {
-            LOG.debug("[1] STATS-" + jop.toString() + ": " + wcStats.extendedToString());
+            LOG.debug("[2] STATS-" + jop.toString() + ": " + wcStats.extendedToString());
           }
         }
       }
       return null;
     }
 
+    private long computeNewRowCount(List<Long> rowCountParents, long denom) {
+      double factor = 0.0d;
+      long result = 1;
+      long max = rowCountParents.get(0);
+      long maxIdx = 0;
+
+      // To avoid long overflow, we will divide the max row count by denominator
+      // and use that factor to multiply with other row counts
+      for (int i = 1; i < rowCountParents.size(); i++) {
+        if (rowCountParents.get(i) > max) {
+          max = rowCountParents.get(i);
+          maxIdx = i;
+        }
+      }
+
+      factor = (double) max / (double) denom;
+
+      for (int i = 0; i < rowCountParents.size(); i++) {
+        if (i != maxIdx) {
+          result *= rowCountParents.get(i);
+        }
+      }
+
+      result = (long) (result * factor);
+
+      return result;
+    }
+
+    private void updateJoinColumnsNDV(Map<Integer, List<String>> joinKeys,
+        Map<String, ColStatistics> joinedColStats, int numAttr) {
+      int joinColIdx = 0;
+      while (numAttr > 0) {
+        long minNDV = Long.MAX_VALUE;
+
+        // find min NDV for joining columns
+        for (Map.Entry<Integer, List<String>> entry : joinKeys.entrySet()) {
+          String key = entry.getValue().get(joinColIdx);
+          key = StatsUtils.stripPrefixFromColumnName(key);
+          ColStatistics cs = joinedColStats.get(key);
+          if (cs != null && cs.getCountDistint() < minNDV) {
+            minNDV = cs.getCountDistint();
+          }
+        }
+
+        // set min NDV value to both columns involved in join
+        if (minNDV != Long.MAX_VALUE) {
+          for (Map.Entry<Integer, List<String>> entry : joinKeys.entrySet()) {
+            String key = entry.getValue().get(joinColIdx);
+            key = StatsUtils.stripPrefixFromColumnName(key);
+            ColStatistics cs = joinedColStats.get(key);
+            cs.setCountDistint(minNDV);
+          }
+        }
+
+        joinColIdx++;
+        numAttr--;
+      }
+    }
+
     private long getDenominator(List<Long> distinctVals) {
 
       if (distinctVals.isEmpty()) {
@@ -954,16 +1048,23 @@ public class StatsRulesProcFactory {
         return Collections.max(distinctVals);
       } else {
 
+        // remember min value and ignore it from the denominator
+        long minNDV = distinctVals.get(0);
+        int minIdx = 0;
+
+        for (int i = 1; i < distinctVals.size(); i++) {
+          if (distinctVals.get(i) < minNDV) {
+            minNDV = distinctVals.get(i);
+            minIdx = i;
+          }
+        }
+
         // join from multiple relations:
-        // denom = max(v1, v2) * max(v2, v3) * max(v3, v4)
+        // denom = Product of all NDVs except the least of all
         long denom = 1;
-        for (int i = 0; i < distinctVals.size() - 1; i++) {
-          long v1 = distinctVals.get(i);
-          long v2 = distinctVals.get(i + 1);
-          if (v1 >= v2) {
-            denom *= v1;
-          } else {
-            denom *= v2;
+        for (int i = 0; i < distinctVals.size(); i++) {
+          if (i != minIdx) {
+            denom *= distinctVals.get(i);
           }
         }
         return denom;
@@ -983,8 +1084,6 @@ public class StatsRulesProcFactory {
       LimitOperator lop = (LimitOperator) nd;
       Operator<? extends OperatorDesc> parent = lop.getParentOperators().get(0);
       Statistics parentStats = parent.getStatistics();
-      AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx;
-      HiveConf conf = aspCtx.getConf();
 
       try {
         long limit = -1;
@@ -1032,6 +1131,73 @@ public class StatsRulesProcFactory {
   }
 
   /**
+   * ReduceSink operator does not change any of the statistics. But it renames
+   * the column statistics from its parent based on the output key and value
+   * column names to make it easy for the downstream operators. This is different
+   * from the default stats which just aggregates and passes along the statistics
+   * without actually renaming based on output schema of the operator.
+   */
+  public static class ReduceSinkStatsRule extends DefaultStatsRule implements NodeProcessor {
+
+    @Override
+    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+        Object... nodeOutputs) throws SemanticException {
+      ReduceSinkOperator rop = (ReduceSinkOperator) nd;
+      Operator<? extends OperatorDesc> parent = rop.getParentOperators().get(0);
+      Statistics parentStats = parent.getStatistics();
+      if (parentStats != null) {
+        AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx;
+        HiveConf conf = aspCtx.getConf();
+
+        List<String> outKeyColNames = rop.getConf().getOutputKeyColumnNames();
+        List<String> outValueColNames = rop.getConf().getOutputValueColumnNames();
+        Map<String, ExprNodeDesc> colExprMap = rop.getColumnExprMap();
+        try {
+          Statistics outStats = parentStats.clone();
+          if (satisfyPrecondition(parentStats)) {
+            List<ColStatistics> colStats = Lists.newArrayList();
+            for (String key : outKeyColNames) {
+              String prefixedKey = "KEY." + key;
+              ExprNodeDesc end = colExprMap.get(prefixedKey);
+              if (end != null) {
+                ColStatistics cs = StatsUtils
+                    .getColStatisticsFromExpression(conf, parentStats, end);
+                if (cs != null) {
+                  cs.setColumnName(key);
+                  colStats.add(cs);
+                }
+              }
+            }
+
+            for (String val : outValueColNames) {
+              String prefixedVal = "VALUE." + val;
+              ExprNodeDesc end = colExprMap.get(prefixedVal);
+              if (end != null) {
+                ColStatistics cs = StatsUtils
+                    .getColStatisticsFromExpression(conf, parentStats, end);
+                if (cs != null) {
+                  cs.setColumnName(val);
+                  colStats.add(cs);
+                }
+              }
+            }
+
+            outStats.setColumnStats(colStats);
+          }
+          rop.setStatistics(outStats);
+          if (LOG.isDebugEnabled()) {
+            LOG.debug("[0] STATS-" + rop.toString() + ": " + outStats.extendedToString());
+          }
+        } catch (CloneNotSupportedException e) {
+          throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg());
+        }
+      }
+      return null;
+    }
+
+  }
+
+  /**
    * Default rule is to aggregate the statistics from all its parent operators.
    */
   public static class DefaultStatsRule implements NodeProcessor {
@@ -1108,6 +1274,10 @@ public class StatsRulesProcFactory {
     return new LimitStatsRule();
   }
 
+  public static NodeProcessor getReduceSinkRule() {
+    return new ReduceSinkStatsRule();
+  }
+
   public static NodeProcessor getDefaultRule() {
     return new DefaultStatsRule();
   }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java Wed Aug  6 17:58:01 2014
@@ -196,7 +196,8 @@ public class StatsUtils {
       stats.addToDataSize(ds);
 
       // if at least a partition does not contain row count then mark basic stats state as PARTIAL
-      if (containsNonPositives(rowCounts)) {
+      if (containsNonPositives(rowCounts) &&
+          stats.getBasicStatsState().equals(State.COMPLETE)) {
         stats.setBasicStatsState(State.PARTIAL);
       }
       boolean haveFullStats = fetchColStats;
@@ -860,12 +861,9 @@ public class StatsUtils {
     if (colExprMap != null) {
       for (ColumnInfo ci : rowSchema.getSignature()) {
         String outColName = ci.getInternalName();
+        outColName = StatsUtils.stripPrefixFromColumnName(outColName);
         String outTabAlias = ci.getTabAlias();
         ExprNodeDesc end = colExprMap.get(outColName);
-        if (end == null) {
-          outColName = StatsUtils.stripPrefixFromColumnName(outColName);
-          end = colExprMap.get(outColName);
-        }
         ColStatistics colStat = getColStatisticsFromExpression(conf, parentStats, end);
         if (colStat != null) {
           outColName = StatsUtils.stripPrefixFromColumnName(outColName);
@@ -1126,7 +1124,7 @@ public class StatsUtils {
    */
   public static String stripPrefixFromColumnName(String colName) {
     String stripedName = colName;
-    if (colName.startsWith("KEY._") || colName.startsWith("VALUE._")) {
+    if (colName.startsWith("KEY") || colName.startsWith("VALUE")) {
       // strip off KEY./VALUE. from column name
       stripedName = colName.split("\\.")[1];
     }
@@ -1194,15 +1192,16 @@ public class StatsUtils {
         for (Map.Entry<String, ExprNodeDesc> entry : map.entrySet()) {
           if (entry.getValue().isSame(end)) {
             outColName = entry.getKey();
+            outColName = stripPrefixFromColumnName(outColName);
           }
         }
         if (end instanceof ExprNodeColumnDesc) {
           ExprNodeColumnDesc encd = (ExprNodeColumnDesc) end;
           if (outColName == null) {
             outColName = encd.getColumn();
+            outColName = stripPrefixFromColumnName(outColName);
           }
           String tabAlias = encd.getTabAlias();
-          outColName = stripPrefixFromColumnName(outColName);
           result.add(getFullyQualifiedColumnName(tabAlias, outColName));
         } else if (end instanceof ExprNodeGenericFuncDesc) {
           ExprNodeGenericFuncDesc enf = (ExprNodeGenericFuncDesc) end;

Modified: hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_filter.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_filter.q?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_filter.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_filter.q Wed Aug  6 17:58:01 2014
@@ -15,76 +15,76 @@ load data local inpath '../../data/files
 insert overwrite table loc_orc select * from loc_staging;
 
 -- numRows: 8 rawDataSize: 796
-explain extended select * from loc_orc;
+explain select * from loc_orc;
 
 -- column stats are not COMPLETE, so stats are not updated
 -- numRows: 8 rawDataSize: 796
-explain extended select * from loc_orc where state='OH';
+explain select * from loc_orc where state='OH';
 
 analyze table loc_orc compute statistics for columns state,locid,zip,year;
 
 -- state column has 5 distincts. numRows/countDistincts
 -- numRows: 1 rawDataSize: 102
-explain extended select * from loc_orc where state='OH';
+explain select * from loc_orc where state='OH';
 
 -- not equals comparison shouldn't affect number of rows
 -- numRows: 8 rawDataSize: 804
-explain extended select * from loc_orc where state!='OH';
-explain extended select * from loc_orc where state<>'OH';
+explain select * from loc_orc where state!='OH';
+explain select * from loc_orc where state<>'OH';
 
 -- nulls are treated as constant equality comparison
 -- numRows: 1 rawDataSize: 102
-explain extended select * from loc_orc where zip is null;
+explain select * from loc_orc where zip is null;
 -- numRows: 1 rawDataSize: 102
-explain extended select * from loc_orc where !(zip is not null);
+explain select * from loc_orc where !(zip is not null);
 
 -- not nulls are treated as inverse of nulls
 -- numRows: 7 rawDataSize: 702
-explain extended select * from loc_orc where zip is not null;
+explain select * from loc_orc where zip is not null;
 -- numRows: 7 rawDataSize: 702
-explain extended select * from loc_orc where !(zip is null);
+explain select * from loc_orc where !(zip is null);
 
 -- NOT evaluation. true will pass all rows, false will not pass any rows
 -- numRows: 8 rawDataSize: 804
-explain extended select * from loc_orc where !false;
+explain select * from loc_orc where !false;
 -- numRows: 0 rawDataSize: 0
-explain extended select * from loc_orc where !true;
+explain select * from loc_orc where !true;
 
 -- Constant evaluation. true will pass all rows, false will not pass any rows
 -- numRows: 8 rawDataSize: 804
-explain extended select * from loc_orc where true;
+explain select * from loc_orc where true;
 -- numRows: 8 rawDataSize: 804
-explain extended select * from loc_orc where 'foo';
+explain select * from loc_orc where 'foo';
 -- numRows: 8 rawDataSize: 804
-explain extended select * from loc_orc where true = true;
+explain select * from loc_orc where true = true;
 -- numRows: 0 rawDataSize: 0
-explain extended select * from loc_orc where false = true;
+explain select * from loc_orc where false = true;
 -- numRows: 0 rawDataSize: 0
-explain extended select * from loc_orc where 'foo' = 'bar';
+explain select * from loc_orc where 'foo' = 'bar';
 -- numRows: 0 rawDataSize: 0
-explain extended select * from loc_orc where false;
+explain select * from loc_orc where false;
 
 -- OR evaluation. 1 row for OH and 1 row for CA
 -- numRows: 2 rawDataSize: 204
-explain extended select * from loc_orc where state='OH' or state='CA';
+explain select * from loc_orc where state='OH' or state='CA';
 
 -- AND evaluation. cascadingly apply rules. 8/2 = 4/2 = 2
 -- numRows: 2 rawDataSize: 204
-explain extended select * from loc_orc where year=2001 and year is null;
+explain select * from loc_orc where year=2001 and year is null;
 -- numRows: 1 rawDataSize: 102
-explain extended select * from loc_orc where year=2001 and state='OH' and state='FL';
+explain select * from loc_orc where year=2001 and state='OH' and state='FL';
 
 -- AND and OR together. left expr will yield 1 row and right will yield 1 row
 -- numRows: 3 rawDataSize: 306
-explain extended select * from loc_orc where (year=2001 and year is null) or (state='CA');
+explain select * from loc_orc where (year=2001 and year is null) or (state='CA');
 
 -- AND and OR together. left expr will yield 8 rows and right will yield 1 row
 -- numRows: 1 rawDataSize: 102
-explain extended select * from loc_orc where (year=2001 or year is null) and (state='CA');
+explain select * from loc_orc where (year=2001 or year is null) and (state='CA');
 
 -- all inequality conditions rows/3 is the rules
 -- numRows: 2 rawDataSize: 204
-explain extended select * from loc_orc where locid < 30;
-explain extended select * from loc_orc where locid > 30;
-explain extended select * from loc_orc where locid <= 30;
-explain extended select * from loc_orc where locid >= 30;
+explain select * from loc_orc where locid < 30;
+explain select * from loc_orc where locid > 30;
+explain select * from loc_orc where locid <= 30;
+explain select * from loc_orc where locid >= 30;

Modified: hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_groupby.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_groupby.q?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_groupby.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_groupby.q Wed Aug  6 17:58:01 2014
@@ -15,14 +15,14 @@ load data local inpath '../../data/files
 insert overwrite table loc_orc select * from loc_staging;
 
 -- numRows: 8 rawDataSize: 796
-explain extended select * from loc_orc;
+explain select * from loc_orc;
 
 -- partial column stats
 analyze table loc_orc compute statistics for columns state;
 
 -- inner group by: map - numRows: 8 reduce - numRows: 4
 -- outer group by: map - numRows: 4 reduce numRows: 2
-explain extended select a, c, min(b)
+explain select a, c, min(b)
 from ( select state as a, locid as b, count(*) as c
        from loc_orc
        group by state,locid
@@ -34,36 +34,36 @@ analyze table loc_orc compute statistics
 -- only one distinct value in year column + 1 NULL value
 -- map-side GBY: numRows: 8 (map-side will not do any reduction)
 -- reduce-side GBY: numRows: 2
-explain extended select year from loc_orc group by year;
+explain select year from loc_orc group by year;
 
 -- map-side GBY: numRows: 8
 -- reduce-side GBY: numRows: 4
-explain extended select state,locid from loc_orc group by state,locid;
+explain select state,locid from loc_orc group by state,locid;
 
 -- map-side GBY numRows: 32 reduce-side GBY numRows: 16
-explain extended select state,locid from loc_orc group by state,locid with cube;
+explain select state,locid from loc_orc group by state,locid with cube;
 
 -- map-side GBY numRows: 24 reduce-side GBY numRows: 12
-explain extended select state,locid from loc_orc group by state,locid with rollup;
+explain select state,locid from loc_orc group by state,locid with rollup;
 
 -- map-side GBY numRows: 8 reduce-side GBY numRows: 4
-explain extended select state,locid from loc_orc group by state,locid grouping sets((state));
+explain select state,locid from loc_orc group by state,locid grouping sets((state));
 
 -- map-side GBY numRows: 16 reduce-side GBY numRows: 8
-explain extended select state,locid from loc_orc group by state,locid grouping sets((state),(locid));
+explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid));
 
 -- map-side GBY numRows: 24 reduce-side GBY numRows: 12
-explain extended select state,locid from loc_orc group by state,locid grouping sets((state),(locid),());
+explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid),());
 
 -- map-side GBY numRows: 32 reduce-side GBY numRows: 16
-explain extended select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),());
+explain select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),());
 
 set hive.stats.map.parallelism=10;
 
 -- map-side GBY: numRows: 80 (map-side will not do any reduction)
 -- reduce-side GBY: numRows: 2 Reason: numDistinct of year is 2. numRows = min(80/2, 2)
-explain extended select year from loc_orc group by year;
+explain select year from loc_orc group by year;
 
 -- map-side GBY numRows: 320 reduce-side GBY numRows: 42 Reason: numDistinct of state and locid are 6,7 resp. numRows = min(320/2, 6*7)
-explain extended select state,locid from loc_orc group by state,locid with cube;
+explain select state,locid from loc_orc group by state,locid with cube;
 

Modified: hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_join.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_join.q?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_join.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_join.q Wed Aug  6 17:58:01 2014
@@ -1,81 +1,70 @@
 set hive.stats.fetch.column.stats=true;
+set hive.stats.ndv.error=0.0;
 
-create table if not exists emp_staging (
+create table if not exists emp (
   lastname string,
-  deptid int
+  deptid int,
+  locid int
 ) row format delimited fields terminated by '|' stored as textfile;
 
-create table if not exists dept_staging (
+create table if not exists dept (
   deptid int,
   deptname string
 ) row format delimited fields terminated by '|' stored as textfile;
 
-create table if not exists loc_staging (
+create table if not exists loc (
   state string,
   locid int,
   zip bigint,
   year int
 ) row format delimited fields terminated by '|' stored as textfile;
 
-create table if not exists emp_orc like emp_staging;
-alter table emp_orc set fileformat orc;
-
-create table if not exists dept_orc like dept_staging;
-alter table dept_orc set fileformat orc;
-
-create table loc_orc like loc_staging;
-alter table loc_orc set fileformat orc;
-
-LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging;
-LOAD DATA LOCAL INPATH '../../data/files/dept.txt' OVERWRITE INTO TABLE dept_staging;
-LOAD DATA LOCAL INPATH '../../data/files/loc.txt' OVERWRITE INTO TABLE loc_staging;
-
-insert overwrite table emp_orc select * from emp_staging;
-insert overwrite table dept_orc select * from dept_staging;
-insert overwrite table loc_orc select * from loc_staging;
-
-analyze table emp_orc compute statistics for columns lastname,deptid;
-analyze table dept_orc compute statistics for columns deptname,deptid;
-analyze table loc_orc compute statistics for columns state,locid,zip,year;
+LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp;
+LOAD DATA LOCAL INPATH '../../data/files/dept.txt' OVERWRITE INTO TABLE dept;
+LOAD DATA LOCAL INPATH '../../data/files/loc.txt' OVERWRITE INTO TABLE loc;
+
+analyze table emp compute statistics;
+analyze table dept compute statistics;
+analyze table loc compute statistics;
+analyze table emp compute statistics for columns lastname,deptid,locid;
+analyze table dept compute statistics for columns deptname,deptid;
+analyze table loc compute statistics for columns state,locid,zip,year;
 
 -- number of rows
--- emp_orc  - 6
--- dept_orc - 4
--- loc_orc  - 8
+-- emp  - 48
+-- dept - 6
+-- loc  - 8
 
 -- count distincts for relevant columns (since count distinct values are approximate in some cases count distint values will be greater than number of rows)
--- emp_orc.deptid - 3
--- emp_orc.lastname - 7
--- dept_orc.deptid - 6
--- dept_orc.deptname - 5
--- loc_orc.locid - 6
--- loc_orc.state - 7
-
--- Expected output rows: 4
--- Reason: #rows = (6*4)/max(3,6)
-explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid);
-
--- 3 way join
--- Expected output rows: 4
--- Reason: #rows = (6*4*6)/max(3,6)*max(6,3)
-explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join emp_orc e1 on (e.deptid = e1.deptid);
-
--- Expected output rows: 5
--- Reason: #rows = (6*4*8)/max(3,6)*max(6,6)
-explain extended select * from emp_orc e join dept_orc d  on (e.deptid = d.deptid) join loc_orc l on (e.deptid = l.locid);
-
--- join keys of different types
--- Expected output rows: 4
--- Reason: #rows = (6*4*8)/max(3,6)*max(6,7)
-explain extended select * from emp_orc e join dept_orc d  on (e.deptid = d.deptid) join loc_orc l on (e.deptid = l.state);
-
--- multi-attribute join
--- Expected output rows: 0
--- Reason: #rows = (6*4)/max(3,6)*max(7,5)
-explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid and e.lastname = d.deptname);
-
--- 3 way and multi-attribute join
--- Expected output rows: 0
--- Reason: #rows = (6*4*8)/max(3,6)*max(7,5)*max(3,6)*max(7,7)
-explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid and e.lastname = d.deptname) join loc_orc l on (e.deptid = l.locid and e.lastname = l.state);
+-- emp.deptid - 3
+-- emp.lastname - 6
+-- emp.locid - 7
+-- dept.deptid - 7
+-- dept.deptname - 6
+-- loc.locid - 7
+-- loc.state - 6
+
+-- 2 relations, 1 attribute
+-- Expected output rows: (48*6)/max(3,7) = 41
+explain select * from emp e join dept d on (e.deptid = d.deptid);
+
+-- 2 relations, 2 attributes
+-- Expected output rows: (48*6)/(max(3,7) * max(6,6)) = 6
+explain select * from emp,dept where emp.deptid = dept.deptid and emp.lastname = dept.deptname;
+explain select * from emp e join dept d on (e.deptid = d.deptid and e.lastname = d.deptname);
+
+-- 2 relations, 3 attributes
+-- Expected output rows: (48*6)/(max(3,7) * max(6,6) * max(6,6)) = 1
+explain select * from emp,dept where emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname;
+
+-- 3 relations, 1 attribute
+-- Expected output rows: (48*6*48)/top2largest(3,7,3) = 658
+explain select * from emp e join dept d on (e.deptid = d.deptid) join emp e1 on (e.deptid = e1.deptid);
+
+-- Expected output rows: (48*6*8)/top2largest(3,7,7) = 47
+explain select * from emp e join dept d  on (e.deptid = d.deptid) join loc l on (e.deptid = l.locid);
+
+-- 3 relations and 2 attribute
+-- Expected output rows: (48*6*8)/top2largest(3,7,7)*top2largest(6,6,6) = 1
+explain select * from emp e join dept d on (e.deptid = d.deptid and e.lastname = d.deptname) join loc l on (e.deptid = l.locid and e.lastname = l.state);
 

Modified: hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_limit.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_limit.q?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_limit.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_limit.q Wed Aug  6 17:58:01 2014
@@ -17,14 +17,14 @@ insert overwrite table loc_orc select * 
 analyze table loc_orc compute statistics for columns state, locid, zip, year;
 
 -- numRows: 8 rawDataSize: 796
-explain extended select * from loc_orc;
+explain select * from loc_orc;
 
 -- numRows: 4 rawDataSize: 396
-explain extended select * from loc_orc limit 4;
+explain select * from loc_orc limit 4;
 
 -- greater than the available number of rows
 -- numRows: 8 rawDataSize: 796
-explain extended select * from loc_orc limit 16;
+explain select * from loc_orc limit 16;
 
 -- numRows: 0 rawDataSize: 0
-explain extended select * from loc_orc limit 0;
+explain select * from loc_orc limit 0;

Modified: hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_part.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_part.q?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_part.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_part.q Wed Aug  6 17:58:01 2014
@@ -19,67 +19,67 @@ create table if not exists loc_orc (
 ) partitioned by(year string) stored as orc;
 
 -- basicStatState: NONE colStatState: NONE
-explain extended select * from loc_orc;
+explain select * from loc_orc;
 
 insert overwrite table loc_orc partition(year) select * from loc_staging;
 
 -- stats are disabled. basic stats will report the file size but not raw data size. so initial statistics will be PARTIAL
 
 -- basicStatState: PARTIAL colStatState: NONE
-explain extended select * from loc_orc;
+explain select * from loc_orc;
 
 -- partition level analyze statistics for specific parition
 analyze table loc_orc partition(year='2001') compute statistics;
 
 -- basicStatState: PARTIAL colStatState: NONE
-explain extended select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__';
+explain select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__';
 
 -- basicStatState: PARTIAL colStatState: NONE
-explain extended select * from loc_orc;
+explain select * from loc_orc;
 
 -- basicStatState: COMPLETE colStatState: NONE
-explain extended select * from loc_orc where year='2001';
+explain select * from loc_orc where year='2001';
 
 -- partition level analyze statistics for all partitions
 analyze table loc_orc partition(year) compute statistics;
 
 -- basicStatState: COMPLETE colStatState: NONE
-explain extended select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__';
+explain select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__';
 
 -- basicStatState: COMPLETE colStatState: NONE
-explain extended select * from loc_orc;
+explain select * from loc_orc;
 
 -- basicStatState: COMPLETE colStatState: NONE
-explain extended select * from loc_orc where year='2001' or year='__HIVE_DEFAULT_PARTITION__';
+explain select * from loc_orc where year='2001' or year='__HIVE_DEFAULT_PARTITION__';
 
 -- both partitions will be pruned
 -- basicStatState: NONE colStatState: NONE
-explain extended select * from loc_orc where year='2001' and year='__HIVE_DEFAULT_PARTITION__';
+explain select * from loc_orc where year='2001' and year='__HIVE_DEFAULT_PARTITION__';
 
 -- partition level partial column statistics
 analyze table loc_orc partition(year='2001') compute statistics for columns state,locid;
 
 -- basicStatState: COMPLETE colStatState: NONE
-explain extended select zip from loc_orc;
+explain select zip from loc_orc;
 
 -- basicStatState: COMPLETE colStatState: PARTIAL
-explain extended select state from loc_orc;
+explain select state from loc_orc;
 
 -- column statistics for __HIVE_DEFAULT_PARTITION__ is not supported yet. Hence colStatState reports PARTIAL
 -- basicStatState: COMPLETE colStatState: PARTIAL
-explain extended select state,locid from loc_orc;
+explain select state,locid from loc_orc;
 
 -- basicStatState: COMPLETE colStatState: COMPLETE
-explain extended select state,locid from loc_orc where year='2001';
+explain select state,locid from loc_orc where year='2001';
 
 -- basicStatState: COMPLETE colStatState: NONE
-explain extended select state,locid from loc_orc where year!='2001';
+explain select state,locid from loc_orc where year!='2001';
 
 -- basicStatState: COMPLETE colStatState: PARTIAL
-explain extended select * from loc_orc;
+explain select * from loc_orc;
 
 -- This is to test filter expression evaluation on partition column
 -- numRows: 2 dataSize: 8 basicStatState: COMPLETE colStatState: COMPLETE
-explain extended select locid from loc_orc where locid>0 and year='2001';
-explain extended select locid,year from loc_orc where locid>0 and year='2001';
-explain extended select * from (select locid,year from loc_orc) test where locid>0 and year='2001';
+explain select locid from loc_orc where locid>0 and year='2001';
+explain select locid,year from loc_orc where locid>0 and year='2001';
+explain select * from (select locid,year from loc_orc) test where locid>0 and year='2001';

Modified: hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_select.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_select.q?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_select.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_select.q Wed Aug  6 17:58:01 2014
@@ -28,116 +28,116 @@ load data local inpath '../../data/files
 insert overwrite table alltypes_orc select * from alltypes;
 
 -- basicStatState: COMPLETE colStatState: NONE numRows: 2 rawDataSize: 1514
-explain extended select * from alltypes_orc;
+explain select * from alltypes_orc;
 
 -- statistics for complex types are not supported yet
 analyze table alltypes_orc compute statistics for columns bo1, ti1, si1, i1, bi1, f1, d1, s1, vc1;
 
 -- numRows: 2 rawDataSize: 1514
-explain extended select * from alltypes_orc;
+explain select * from alltypes_orc;
 
 -- numRows: 2 rawDataSize: 8
-explain extended select bo1 from alltypes_orc;
+explain select bo1 from alltypes_orc;
 
 -- col alias renaming
 -- numRows: 2 rawDataSize: 8
-explain extended select i1 as int1 from alltypes_orc;
+explain select i1 as int1 from alltypes_orc;
 
 -- numRows: 2 rawDataSize: 174
-explain extended select s1 from alltypes_orc;
+explain select s1 from alltypes_orc;
 
 -- column statistics for complex types unsupported and so statistics will not be updated
 -- numRows: 2 rawDataSize: 1514
-explain extended select m1 from alltypes_orc;
+explain select m1 from alltypes_orc;
 
 -- numRows: 2 rawDataSize: 246
-explain extended select bo1, ti1, si1, i1, bi1, f1, d1,s1 from alltypes_orc;
+explain select bo1, ti1, si1, i1, bi1, f1, d1,s1 from alltypes_orc;
 
 -- numRows: 2 rawDataSize: 0
-explain extended select null from alltypes_orc;
+explain select null from alltypes_orc;
 
 -- numRows: 2 rawDataSize: 8
-explain extended select 11 from alltypes_orc;
+explain select 11 from alltypes_orc;
 
 -- numRows: 2 rawDataSize: 16
-explain extended select 11L from alltypes_orc;
+explain select 11L from alltypes_orc;
 
 -- numRows: 2 rawDataSize: 16
-explain extended select 11.0 from alltypes_orc;
+explain select 11.0 from alltypes_orc;
 
 -- numRows: 2 rawDataSize: 178
-explain extended select "hello" from alltypes_orc;
-explain extended select cast("hello" as char(5)) from alltypes_orc;
-explain extended select cast("hello" as varchar(5)) from alltypes_orc;
+explain select "hello" from alltypes_orc;
+explain select cast("hello" as char(5)) from alltypes_orc;
+explain select cast("hello" as varchar(5)) from alltypes_orc;
 
 -- numRows: 2 rawDataSize: 96
-explain extended select unbase64("0xe23") from alltypes_orc;
+explain select unbase64("0xe23") from alltypes_orc;
 
 -- numRows: 2 rawDataSize: 16
-explain extended select cast("1" as TINYINT), cast("20" as SMALLINT) from alltypes_orc;
+explain select cast("1" as TINYINT), cast("20" as SMALLINT) from alltypes_orc;
 
 -- numRows: 2 rawDataSize: 80
-explain extended select cast("1970-12-31 15:59:58.174" as TIMESTAMP) from alltypes_orc;
+explain select cast("1970-12-31 15:59:58.174" as TIMESTAMP) from alltypes_orc;
 
 -- numRows: 2 rawDataSize: 112
-explain extended select cast("1970-12-31 15:59:58.174" as DATE) from alltypes_orc;
+explain select cast("1970-12-31 15:59:58.174" as DATE) from alltypes_orc;
 
 -- numRows: 2 rawDataSize: 224
-explain extended select cast("58.174" as DECIMAL) from alltypes_orc;
+explain select cast("58.174" as DECIMAL) from alltypes_orc;
 
 -- numRows: 2 rawDataSize: 112
-explain extended select array(1,2,3) from alltypes_orc;
+explain select array(1,2,3) from alltypes_orc;
 
 -- numRows: 2 rawDataSize: 1508
-explain extended select str_to_map("a=1 b=2 c=3", " ", "=") from alltypes_orc;
+explain select str_to_map("a=1 b=2 c=3", " ", "=") from alltypes_orc;
 
 -- numRows: 2 rawDataSize: 112
-explain extended select NAMED_STRUCT("a", 11, "b", 11) from alltypes_orc;
+explain select NAMED_STRUCT("a", 11, "b", 11) from alltypes_orc;
 
 -- numRows: 2 rawDataSize: 250
-explain extended select CREATE_UNION(0, "hello") from alltypes_orc;
+explain select CREATE_UNION(0, "hello") from alltypes_orc;
 
 -- COUNT(*) is projected as new column. It is not projected as GenericUDF and so datasize estimate will be based on number of rows
 -- numRows: 1 rawDataSize: 8
-explain extended select count(*) from alltypes_orc;
+explain select count(*) from alltypes_orc;
 
 -- COUNT(1) is projected as new column. It is not projected as GenericUDF and so datasize estimate will be based on number of rows
 -- numRows: 1 rawDataSize: 8
-explain extended select count(1) from alltypes_orc;
+explain select count(1) from alltypes_orc;
 
 -- column statistics for complex column types will be missing. data size will be calculated from available column statistics
 -- numRows: 2 rawDataSize: 254
-explain extended select *,11 from alltypes_orc;
+explain select *,11 from alltypes_orc;
 
 -- subquery selects
 -- inner select - numRows: 2 rawDataSize: 8
 -- outer select - numRows: 2 rawDataSize: 8
-explain extended select i1 from (select i1 from alltypes_orc limit 10) temp;
+explain select i1 from (select i1 from alltypes_orc limit 10) temp;
 
 -- inner select - numRows: 2 rawDataSize: 16
 -- outer select - numRows: 2 rawDataSize: 8
-explain extended select i1 from (select i1,11 from alltypes_orc limit 10) temp;
+explain select i1 from (select i1,11 from alltypes_orc limit 10) temp;
 
 -- inner select - numRows: 2 rawDataSize: 16
 -- outer select - numRows: 2 rawDataSize: 186
-explain extended select i1,"hello" from (select i1,11 from alltypes_orc limit 10) temp;
+explain select i1,"hello" from (select i1,11 from alltypes_orc limit 10) temp;
 
 -- inner select - numRows: 2 rawDataSize: 24
 -- outer select - numRows: 2 rawDataSize: 16
-explain extended select x from (select i1,11.0 as x from alltypes_orc limit 10) temp;
+explain select x from (select i1,11.0 as x from alltypes_orc limit 10) temp;
 
 -- inner select - numRows: 2 rawDataSize: 104
 -- outer select - numRows: 2 rawDataSize: 186
-explain extended select x,"hello" from (select i1 as x, unbase64("0xe23") as ub from alltypes_orc limit 10) temp;
+explain select x,"hello" from (select i1 as x, unbase64("0xe23") as ub from alltypes_orc limit 10) temp;
 
 -- inner select -  numRows: 2 rawDataSize: 186
 -- middle select - numRows: 2 rawDataSize: 178
 -- outer select -  numRows: 2 rawDataSize: 194
-explain extended select h, 11.0 from (select hell as h from (select i1, "hello" as hell from alltypes_orc limit 10) in1 limit 10) in2;
+explain select h, 11.0 from (select hell as h from (select i1, "hello" as hell from alltypes_orc limit 10) in1 limit 10) in2;
 
 -- This test is for FILTER operator where filter expression is a boolean column
 -- numRows: 2 rawDataSize: 8
-explain extended select bo1 from alltypes_orc where bo1;
+explain select bo1 from alltypes_orc where bo1;
 
 -- numRows: 0 rawDataSize: 0
-explain extended select bo1 from alltypes_orc where !bo1;
+explain select bo1 from alltypes_orc where !bo1;

Modified: hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_table.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_table.q?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_table.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_table.q Wed Aug  6 17:58:01 2014
@@ -10,7 +10,7 @@ create table if not exists emp_orc like 
 alter table emp_orc set fileformat orc;
 
 -- basicStatState: NONE colStatState: NONE
-explain extended select * from emp_orc;
+explain select * from emp_orc;
 
 LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging;
 
@@ -19,35 +19,35 @@ insert overwrite table emp_orc select * 
 -- stats are disabled. basic stats will report the file size but not raw data size. so initial statistics will be PARTIAL
 
 -- basicStatState: PARTIAL colStatState: NONE
-explain extended select * from emp_orc;
+explain select * from emp_orc;
 
 -- table level analyze statistics
 analyze table emp_orc compute statistics;
 
 -- basicStatState: COMPLETE colStatState: NONE
-explain extended select * from emp_orc;
+explain select * from emp_orc;
 
 -- column level partial statistics
 analyze table emp_orc compute statistics for columns deptid;
 
 -- basicStatState: COMPLETE colStatState: PARTIAL
-explain extended select * from emp_orc;
+explain select * from emp_orc;
 
 -- all selected columns have statistics
 -- basicStatState: COMPLETE colStatState: COMPLETE
-explain extended select deptid from emp_orc;
+explain select deptid from emp_orc;
 
 -- column level complete statistics
 analyze table emp_orc compute statistics for columns lastname,deptid;
 
 -- basicStatState: COMPLETE colStatState: COMPLETE
-explain extended select * from emp_orc;
+explain select * from emp_orc;
 
 -- basicStatState: COMPLETE colStatState: COMPLETE
-explain extended select lastname from emp_orc;
+explain select lastname from emp_orc;
 
 -- basicStatState: COMPLETE colStatState: COMPLETE
-explain extended select deptid from emp_orc;
+explain select deptid from emp_orc;
 
 -- basicStatState: COMPLETE colStatState: COMPLETE
-explain extended select lastname,deptid from emp_orc;
+explain select lastname,deptid from emp_orc;

Modified: hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_union.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_union.q?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_union.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/annotate_stats_union.q Wed Aug  6 17:58:01 2014
@@ -17,16 +17,16 @@ insert overwrite table loc_orc select * 
 analyze table loc_orc compute statistics for columns state,locid,zip,year;
 
 -- numRows: 8 rawDataSize: 688
-explain extended select state from loc_orc;
+explain select state from loc_orc;
 
 -- numRows: 16 rawDataSize: 1376
-explain extended select * from (select state from loc_orc union all select state from loc_orc) tmp;
+explain select * from (select state from loc_orc union all select state from loc_orc) tmp;
 
 -- numRows: 8 rawDataSize: 796
-explain extended select * from loc_orc;
+explain select * from loc_orc;
 
 -- numRows: 16 rawDataSize: 1592
-explain extended select * from (select * from loc_orc union all select * from loc_orc) tmp;
+explain select * from (select * from loc_orc union all select * from loc_orc) tmp;
 
 create database test;
 use test;
@@ -49,7 +49,7 @@ analyze table loc_staging compute statis
 analyze table loc_orc compute statistics for columns state,locid,zip,year;
 
 -- numRows: 16 rawDataSize: 1376
-explain extended select * from (select state from default.loc_orc union all select state from test.loc_orc) temp;
+explain select * from (select state from default.loc_orc union all select state from test.loc_orc) temp;
 
 -- numRows: 16 rawDataSize: 1376
-explain extended select * from (select state from test.loc_staging union all select state from test.loc_orc) temp;
+explain select * from (select state from test.loc_staging union all select state from test.loc_orc) temp;

Modified: hive/trunk/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/annotate_stats_filter.q.out?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
Files hive/trunk/ql/src/test/results/clientpositive/annotate_stats_filter.q.out (original) and hive/trunk/ql/src/test/results/clientpositive/annotate_stats_filter.q.out Wed Aug  6 17:58:01 2014 differ

Modified: hive/trunk/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
Files hive/trunk/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out (original) and hive/trunk/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out Wed Aug  6 17:58:01 2014 differ

Modified: hive/trunk/ql/src/test/results/clientpositive/annotate_stats_join.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/annotate_stats_join.q.out?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
Files hive/trunk/ql/src/test/results/clientpositive/annotate_stats_join.q.out (original) and hive/trunk/ql/src/test/results/clientpositive/annotate_stats_join.q.out Wed Aug  6 17:58:01 2014 differ

Modified: hive/trunk/ql/src/test/results/clientpositive/annotate_stats_limit.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/annotate_stats_limit.q.out?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/annotate_stats_limit.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/annotate_stats_limit.q.out Wed Aug  6 17:58:01 2014
@@ -59,27 +59,11 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@loc_orc
 #### A masked pattern was here ####
 PREHOOK: query: -- numRows: 8 rawDataSize: 796
-explain extended select * from loc_orc
+explain select * from loc_orc
 PREHOOK: type: QUERY
 POSTHOOK: query: -- numRows: 8 rawDataSize: 796
-explain extended select * from loc_orc
+explain select * from loc_orc
 POSTHOOK: type: QUERY
-ABSTRACT SYNTAX TREE:
-  
-TOK_QUERY
-   TOK_FROM
-      TOK_TABREF
-         TOK_TABNAME
-            loc_orc
-   TOK_INSERT
-      TOK_DESTINATION
-         TOK_DIR
-            TOK_TMP_FILE
-      TOK_SELECT
-         TOK_SELEXPR
-            TOK_ALLCOLREF
-
-
 STAGE DEPENDENCIES:
   Stage-0 is a root stage
 
@@ -91,7 +75,6 @@ STAGE PLANS:
         TableScan
           alias: loc_orc
           Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
-          GatherStats: false
           Select Operator
             expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int)
             outputColumnNames: _col0, _col1, _col2, _col3
@@ -99,29 +82,11 @@ STAGE PLANS:
             ListSink
 
 PREHOOK: query: -- numRows: 4 rawDataSize: 396
-explain extended select * from loc_orc limit 4
+explain select * from loc_orc limit 4
 PREHOOK: type: QUERY
 POSTHOOK: query: -- numRows: 4 rawDataSize: 396
-explain extended select * from loc_orc limit 4
+explain select * from loc_orc limit 4
 POSTHOOK: type: QUERY
-ABSTRACT SYNTAX TREE:
-  
-TOK_QUERY
-   TOK_FROM
-      TOK_TABREF
-         TOK_TABNAME
-            loc_orc
-   TOK_INSERT
-      TOK_DESTINATION
-         TOK_DIR
-            TOK_TMP_FILE
-      TOK_SELECT
-         TOK_SELEXPR
-            TOK_ALLCOLREF
-      TOK_LIMIT
-         4
-
-
 STAGE DEPENDENCIES:
   Stage-0 is a root stage
 
@@ -133,7 +98,6 @@ STAGE PLANS:
         TableScan
           alias: loc_orc
           Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
-          GatherStats: false
           Select Operator
             expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int)
             outputColumnNames: _col0, _col1, _col2, _col3
@@ -145,30 +109,12 @@ STAGE PLANS:
 
 PREHOOK: query: -- greater than the available number of rows
 -- numRows: 8 rawDataSize: 796
-explain extended select * from loc_orc limit 16
+explain select * from loc_orc limit 16
 PREHOOK: type: QUERY
 POSTHOOK: query: -- greater than the available number of rows
 -- numRows: 8 rawDataSize: 796
-explain extended select * from loc_orc limit 16
+explain select * from loc_orc limit 16
 POSTHOOK: type: QUERY
-ABSTRACT SYNTAX TREE:
-  
-TOK_QUERY
-   TOK_FROM
-      TOK_TABREF
-         TOK_TABNAME
-            loc_orc
-   TOK_INSERT
-      TOK_DESTINATION
-         TOK_DIR
-            TOK_TMP_FILE
-      TOK_SELECT
-         TOK_SELEXPR
-            TOK_ALLCOLREF
-      TOK_LIMIT
-         16
-
-
 STAGE DEPENDENCIES:
   Stage-0 is a root stage
 
@@ -180,7 +126,6 @@ STAGE PLANS:
         TableScan
           alias: loc_orc
           Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
-          GatherStats: false
           Select Operator
             expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int)
             outputColumnNames: _col0, _col1, _col2, _col3
@@ -191,29 +136,11 @@ STAGE PLANS:
               ListSink
 
 PREHOOK: query: -- numRows: 0 rawDataSize: 0
-explain extended select * from loc_orc limit 0
+explain select * from loc_orc limit 0
 PREHOOK: type: QUERY
 POSTHOOK: query: -- numRows: 0 rawDataSize: 0
-explain extended select * from loc_orc limit 0
+explain select * from loc_orc limit 0
 POSTHOOK: type: QUERY
-ABSTRACT SYNTAX TREE:
-  
-TOK_QUERY
-   TOK_FROM
-      TOK_TABREF
-         TOK_TABNAME
-            loc_orc
-   TOK_INSERT
-      TOK_DESTINATION
-         TOK_DIR
-            TOK_TMP_FILE
-      TOK_SELECT
-         TOK_SELEXPR
-            TOK_ALLCOLREF
-      TOK_LIMIT
-         0
-
-
 STAGE DEPENDENCIES:
   Stage-0 is a root stage
 
@@ -225,7 +152,6 @@ STAGE PLANS:
         TableScan
           alias: loc_orc
           Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE
-          GatherStats: false
           Select Operator
             expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int)
             outputColumnNames: _col0, _col1, _col2, _col3

Modified: hive/trunk/ql/src/test/results/clientpositive/annotate_stats_part.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/annotate_stats_part.q.out?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
Files hive/trunk/ql/src/test/results/clientpositive/annotate_stats_part.q.out (original) and hive/trunk/ql/src/test/results/clientpositive/annotate_stats_part.q.out Wed Aug  6 17:58:01 2014 differ

Modified: hive/trunk/ql/src/test/results/clientpositive/annotate_stats_select.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/annotate_stats_select.q.out?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
Files hive/trunk/ql/src/test/results/clientpositive/annotate_stats_select.q.out (original) and hive/trunk/ql/src/test/results/clientpositive/annotate_stats_select.q.out Wed Aug  6 17:58:01 2014 differ

Modified: hive/trunk/ql/src/test/results/clientpositive/annotate_stats_table.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/annotate_stats_table.q.out?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
Files hive/trunk/ql/src/test/results/clientpositive/annotate_stats_table.q.out (original) and hive/trunk/ql/src/test/results/clientpositive/annotate_stats_table.q.out Wed Aug  6 17:58:01 2014 differ

Modified: hive/trunk/ql/src/test/results/clientpositive/annotate_stats_union.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/annotate_stats_union.q.out?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
Files hive/trunk/ql/src/test/results/clientpositive/annotate_stats_union.q.out (original) and hive/trunk/ql/src/test/results/clientpositive/annotate_stats_union.q.out Wed Aug  6 17:58:01 2014 differ

Modified: hive/trunk/ql/src/test/results/clientpositive/combine2.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/combine2.q.out?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/combine2.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/combine2.q.out Wed Aug  6 17:58:01 2014
@@ -671,12 +671,12 @@ STAGE PLANS:
                 keys: ds (type: string)
                 mode: hash
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE
                 Reduce Output Operator
                   key expressions: _col0 (type: string)
                   sort order: +
                   Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 0 Data size: 23248 Basic stats: PARTIAL Column stats: COMPLETE
+                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE
                   value expressions: _col1 (type: bigint)
       Reduce Operator Tree:
         Group By Operator

Modified: hive/trunk/ql/src/test/results/clientpositive/groupby_sort_11.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby_sort_11.q.out?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/groupby_sort_11.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/groupby_sort_11.q.out Wed Aug  6 17:58:01 2014
@@ -395,11 +395,11 @@ STAGE PLANS:
                 keys: 1 (type: int)
                 mode: hash
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE
                 Reduce Output Operator
                   key expressions: _col0 (type: int)
                   sort order: +
-                  Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE
       Reduce Operator Tree:
         Group By Operator
           aggregations: count(DISTINCT KEY._col0:0._col0)

Modified: hive/trunk/ql/src/test/results/clientpositive/input24.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/input24.q.out?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/input24.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/input24.q.out Wed Aug  6 17:58:01 2014
@@ -28,9 +28,9 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: x
-            Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+            Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE
             Select Operator
-              Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE
               Group By Operator
                 aggregations: count(1)
                 mode: hash

Modified: hive/trunk/ql/src/test/results/clientpositive/input25.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/input25.q.out?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/input25.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/input25.q.out Wed Aug  6 17:58:01 2014
@@ -45,14 +45,14 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: x
-            Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+            Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
             Select Operator
               expressions: a (type: int), b (type: int), d (type: string)
               outputColumnNames: _col0, _col1, _col2
-              Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
               Limit
                 Number of rows: 10
-                Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
                 Reduce Output Operator
                   sort order: 
                   Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
@@ -109,14 +109,14 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: x
-            Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+            Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
             Select Operator
               expressions: a (type: int), b (type: int), d (type: string)
               outputColumnNames: _col0, _col1, _col2
-              Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
               Limit
                 Number of rows: 10
-                Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
                 Reduce Output Operator
                   sort order: 
                   Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE

Modified: hive/trunk/ql/src/test/results/clientpositive/metadataonly1.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/metadataonly1.q.out?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
Files hive/trunk/ql/src/test/results/clientpositive/metadataonly1.q.out (original) and hive/trunk/ql/src/test/results/clientpositive/metadataonly1.q.out Wed Aug  6 17:58:01 2014 differ

Modified: hive/trunk/ql/src/test/results/clientpositive/nullgroup3.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/nullgroup3.q.out?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/nullgroup3.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/nullgroup3.q.out Wed Aug  6 17:58:01 2014
@@ -129,9 +129,9 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: tstparttbl2
-            Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+            Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE
             Select Operator
-              Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE
               Group By Operator
                 aggregations: count(1)
                 mode: hash
@@ -325,9 +325,9 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: tstparttbl2
-            Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+            Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE
             Select Operator
-              Statistics: Num rows: 0 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE
               Group By Operator
                 aggregations: count(1)
                 mode: hash

Modified: hive/trunk/ql/src/test/results/clientpositive/tez/metadataonly1.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/metadataonly1.q.out?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
Files hive/trunk/ql/src/test/results/clientpositive/tez/metadataonly1.q.out (original) and hive/trunk/ql/src/test/results/clientpositive/tez/metadataonly1.q.out Wed Aug  6 17:58:01 2014 differ

Modified: hive/trunk/ql/src/test/results/clientpositive/tez/union5.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/union5.q.out?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/union5.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/union5.q.out Wed Aug  6 17:58:01 2014
@@ -86,14 +86,14 @@ STAGE PLANS:
                 keys: KEY._col0 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE
                 Select Operator
                   expressions: _col0 (type: string), _col1 (type: bigint)
                   outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE
                   File Output Operator
                     compressed: false
-                    Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE
                     table:
                         input format: org.apache.hadoop.mapred.TextInputFormat
                         output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

Modified: hive/trunk/ql/src/test/results/clientpositive/tez/union7.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/union7.q.out?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/union7.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/union7.q.out Wed Aug  6 17:58:01 2014
@@ -90,14 +90,14 @@ STAGE PLANS:
                 keys: KEY._col0 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: PARTIAL
+                Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: PARTIAL
                 Select Operator
                   expressions: _col0 (type: string), _col1 (type: bigint)
                   outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL
+                  Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: PARTIAL
                   File Output Operator
                     compressed: false
-                    Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL
+                    Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: PARTIAL
                     table:
                         input format: org.apache.hadoop.mapred.TextInputFormat
                         output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

Modified: hive/trunk/ql/src/test/results/clientpositive/udf_explode.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/udf_explode.q.out?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
Files hive/trunk/ql/src/test/results/clientpositive/udf_explode.q.out (original) and hive/trunk/ql/src/test/results/clientpositive/udf_explode.q.out Wed Aug  6 17:58:01 2014 differ

Modified: hive/trunk/ql/src/test/results/clientpositive/udtf_explode.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/udtf_explode.q.out?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
Files hive/trunk/ql/src/test/results/clientpositive/udtf_explode.q.out (original) and hive/trunk/ql/src/test/results/clientpositive/udtf_explode.q.out Wed Aug  6 17:58:01 2014 differ

Modified: hive/trunk/ql/src/test/results/clientpositive/union11.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/union11.q.out?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/union11.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/union11.q.out Wed Aug  6 17:58:01 2014
@@ -73,12 +73,12 @@ STAGE PLANS:
                   keys: _col0 (type: string)
                   mode: hash
                   outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 3 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
                   Reduce Output Operator
                     key expressions: _col0 (type: string)
                     sort order: +
                     Map-reduce partition columns: _col0 (type: string)
-                    Statistics: Num rows: 3 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
                     value expressions: _col1 (type: bigint)
           TableScan
             Union
@@ -92,12 +92,12 @@ STAGE PLANS:
                   keys: _col0 (type: string)
                   mode: hash
                   outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 3 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
                   Reduce Output Operator
                     key expressions: _col0 (type: string)
                     sort order: +
                     Map-reduce partition columns: _col0 (type: string)
-                    Statistics: Num rows: 3 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
                     value expressions: _col1 (type: bigint)
           TableScan
             Union
@@ -111,12 +111,12 @@ STAGE PLANS:
                   keys: _col0 (type: string)
                   mode: hash
                   outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 3 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
                   Reduce Output Operator
                     key expressions: _col0 (type: string)
                     sort order: +
                     Map-reduce partition columns: _col0 (type: string)
-                    Statistics: Num rows: 3 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
                     value expressions: _col1 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
@@ -124,14 +124,14 @@ STAGE PLANS:
           keys: KEY._col0 (type: string)
           mode: mergepartial
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE
+          Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE
           Select Operator
             expressions: _col0 (type: string), _col1 (type: bigint)
             outputColumnNames: _col0, _col1
-            Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+            Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE
             File Output Operator
               compressed: false
-              Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+              Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE
               table:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

Modified: hive/trunk/ql/src/test/results/clientpositive/union14.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/union14.q.out?rev=1616292&r1=1616291&r2=1616292&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/union14.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/union14.q.out Wed Aug  6 17:58:01 2014
@@ -75,12 +75,12 @@ STAGE PLANS:
                     keys: _col0 (type: string)
                     mode: hash
                     outputColumnNames: _col0, _col1
-                    Statistics: Num rows: 3 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
                     Reduce Output Operator
                       key expressions: _col0 (type: string)
                       sort order: +
                       Map-reduce partition columns: _col0 (type: string)
-                      Statistics: Num rows: 3 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
                       value expressions: _col1 (type: bigint)
           TableScan
             Union
@@ -94,12 +94,12 @@ STAGE PLANS:
                   keys: _col0 (type: string)
                   mode: hash
                   outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 3 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
                   Reduce Output Operator
                     key expressions: _col0 (type: string)
                     sort order: +
                     Map-reduce partition columns: _col0 (type: string)
-                    Statistics: Num rows: 3 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
                     value expressions: _col1 (type: bigint)
       Reduce Operator Tree:
         Group By Operator
@@ -107,14 +107,14 @@ STAGE PLANS:
           keys: KEY._col0 (type: string)
           mode: mergepartial
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE
+          Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE
           Select Operator
             expressions: _col0 (type: string), _col1 (type: bigint)
             outputColumnNames: _col0, _col1
-            Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+            Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE
             File Output Operator
               compressed: false
-              Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+              Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE
               table:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat