You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by gu...@apache.org on 2014/08/07 02:21:48 UTC
svn commit: r1616379 [2/4] - in /hive/branches/cbo: ./ common/src/java/org/apache/hadoop/hive/common/ common/src/java/org/apache/hadoop/hive/conf/ conf/ data/files/ hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/ hcatalog/webhc...

Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateWithStatistics.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateWithStatistics.java?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateWithStatistics.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateWithStatistics.java Thu Aug  7 00:21:45 2014
@@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.exec.Fi
 import org.apache.hadoop.hive.ql.exec.GroupByOperator;
 import org.apache.hadoop.hive.ql.exec.LimitOperator;
 import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
 import org.apache.hadoop.hive.ql.exec.SelectOperator;
 import org.apache.hadoop.hive.ql.exec.TableScanOperator;
 import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
@@ -62,6 +63,8 @@ public class AnnotateWithStatistics impl
         + MapJoinOperator.getOperatorName() + "%"), StatsRulesProcFactory.getJoinRule());
     opRules.put(new RuleRegExp("LIM", LimitOperator.getOperatorName() + "%"),
         StatsRulesProcFactory.getLimitRule());
+    opRules.put(new RuleRegExp("RS", ReduceSinkOperator.getOperatorName() + "%"),
+        StatsRulesProcFactory.getReduceSinkRule());
 
     // The dispatcher fires the processor corresponding to the closest matching
     // rule and passes the context along

Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java Thu Aug  7 00:21:45 2014
@@ -601,12 +601,18 @@ public class StatsRulesProcFactory {
               }
               dvProd *= dv;
             } else {
-
-              // partial column statistics on grouping attributes case.
-              // if column statistics on grouping attribute is missing, then
-              // assume worst case.
-              // GBY rule will emit half the number of rows if dvProd is 0
-              dvProd = 0;
+              if (parentStats.getColumnStatsState().equals(Statistics.State.COMPLETE)) {
+                // the column must be an aggregate column inserted by GBY. We
+                // don't have to account for this column when computing product
+                // of NDVs
+                continue;
+              } else {
+                // partial column statistics on grouping attributes case.
+                // if column statistics on grouping attribute is missing, then
+                // assume worst case.
+                // GBY rule will emit half the number of rows if dvProd is 0
+                dvProd = 0;
+              }
               break;
             }
           }
@@ -687,7 +693,17 @@ public class StatsRulesProcFactory {
               aggColStats.add(cs);
             }
           }
-          stats.addToColumnStats(aggColStats);
+
+          // add the new aggregate column and recompute data size
+          if (aggColStats.size() > 0) {
+            stats.addToColumnStats(aggColStats);
+
+            // only if the column stats is available, update the data size from
+            // the column stats
+            if (!stats.getColumnStatsState().equals(Statistics.State.NONE)) {
+              updateStats(stats, stats.getNumRows(), true);
+            }
+          }
 
           // if UDAF present and if column expression map is empty then it must
           // be full aggregation query like count(*) in which case number of
@@ -734,15 +750,24 @@ public class StatsRulesProcFactory {
    * <p>
    * In the absence of histograms, we can use the following general case
    * <p>
-   * <b>Single attribute</b>
+   * <b>2 Relations, 1 attribute</b>
    * <p>
    * T(RXS) = (T(R)*T(S))/max(V(R,Y), V(S,Y)) where Y is the join attribute
    * <p>
-   * <b>Multiple attributes</b>
+   * <b>2 Relations, 2 attributes</b>
    * <p>
    * T(RXS) = T(R)*T(S)/max(V(R,y1), V(S,y1)) * max(V(R,y2), V(S,y2)), where y1 and y2 are the join
    * attributes
    * <p>
+   * <b>3 Relations, 1 attributes</b>
+   * <p>
+   * T(RXSXQ) = T(R)*T(S)*T(Q)/top2largest(V(R,y), V(S,y), V(Q,y)), where y is the join attribute
+   * <p>
+   * <b>3 Relations, 2 attributes</b>
+   * <p>
+   * T(RXSXQ) = T(R)*T(S)*T(Q)/top2largest(V(R,y1), V(S,y1), V(Q,y1)) * top2largest(V(R,y2), V(S,y2), V(Q,y2)),
+   * where y1 and y2 are the join attributes
+   * <p>
    * <i>Worst case:</i> If no column statistics are available, then T(RXS) = joinFactor * max(T(R),
    * T(S)) * (numParents - 1) will be used as heuristics. joinFactor is from hive.stats.join.factor
    * hive config. In the worst case, since we do not know any information about join keys (and hence
@@ -783,9 +808,12 @@ public class StatsRulesProcFactory {
           // statistics object that is combination of statistics from all
           // relations involved in JOIN
           Statistics stats = new Statistics();
-          long prodRows = 1;
+          List<Long> rowCountParents = Lists.newArrayList();
           List<Long> distinctVals = Lists.newArrayList();
+
+          // 2 relations, multiple attributes
           boolean multiAttr = false;
+          int numAttr = 1;
 
           Map<String, ColStatistics> joinedColStats = Maps.newHashMap();
           Map<Integer, List<String>> joinKeys = Maps.newHashMap();
@@ -795,12 +823,13 @@ public class StatsRulesProcFactory {
             ReduceSinkOperator parent = (ReduceSinkOperator) jop.getParentOperators().get(pos);
 
             Statistics parentStats = parent.getStatistics();
-            prodRows *= parentStats.getNumRows();
+            rowCountParents.add(parentStats.getNumRows());
             List<ExprNodeDesc> keyExprs = parent.getConf().getKeyCols();
 
             // multi-attribute join key
             if (keyExprs.size() > 1) {
               multiAttr = true;
+              numAttr = keyExprs.size();
             }
 
             // compute fully qualified join key column names. this name will be
@@ -811,16 +840,9 @@ public class StatsRulesProcFactory {
                 StatsUtils.getFullQualifedColNameFromExprs(keyExprs, parent.getColumnExprMap());
             joinKeys.put(pos, fqCols);
 
-            Map<String, ExprNodeDesc> colExprMap = parent.getColumnExprMap();
-            RowSchema rs = parent.getSchema();
-
             // get column statistics for all output columns
-            List<ColStatistics> cs =
-                StatsUtils.getColStatisticsFromExprMap(conf, parentStats, colExprMap, rs);
-            for (ColStatistics c : cs) {
-              if (c != null) {
-                joinedColStats.put(c.getFullyQualifiedColName(), c);
-              }
+            for (ColStatistics cs : parentStats.getColumnStats()) {
+              joinedColStats.put(cs.getFullyQualifiedColName(), cs);
             }
 
             // since new statistics is derived from all relations involved in
@@ -834,10 +856,10 @@ public class StatsRulesProcFactory {
           long denom = 1;
           if (multiAttr) {
             List<Long> perAttrDVs = Lists.newArrayList();
-            int numAttr = joinKeys.get(0).size();
             for (int idx = 0; idx < numAttr; idx++) {
               for (Integer i : joinKeys.keySet()) {
                 String col = joinKeys.get(i).get(idx);
+                col = StatsUtils.stripPrefixFromColumnName(col);
                 ColStatistics cs = joinedColStats.get(col);
                 if (cs != null) {
                   perAttrDVs.add(cs.getCountDistint());
@@ -853,6 +875,7 @@ public class StatsRulesProcFactory {
           } else {
             for (List<String> jkeys : joinKeys.values()) {
               for (String jk : jkeys) {
+                jk = StatsUtils.stripPrefixFromColumnName(jk);
                 ColStatistics cs = joinedColStats.get(jk);
                 if (cs != null) {
                   distinctVals.add(cs.getCountDistint());
@@ -862,6 +885,11 @@ public class StatsRulesProcFactory {
             denom = getDenominator(distinctVals);
           }
 
+          // Update NDV of joined columns to be min(V(R,y), V(S,y))
+          if (multiAttr) {
+            updateJoinColumnsNDV(joinKeys, joinedColStats, numAttr);
+          }
+
           // column statistics from different sources are put together and rename
           // fully qualified column names based on output schema of join operator
           Map<String, ExprNodeDesc> colExprMap = jop.getColumnExprMap();
@@ -878,7 +906,6 @@ public class StatsRulesProcFactory {
               ColStatistics cs = joinedColStats.get(fqColName);
               String outColName = key;
               String outTabAlias = ci.getTabAlias();
-              outColName = StatsUtils.stripPrefixFromColumnName(outColName);
               if (cs != null) {
                 cs.setColumnName(outColName);
                 cs.setTableAlias(outTabAlias);
@@ -889,13 +916,21 @@ public class StatsRulesProcFactory {
 
           // update join statistics
           stats.setColumnStats(outColStats);
-          long newRowCount = prodRows / denom;
+          long newRowCount = computeNewRowCount(rowCountParents, denom);
+
+          if (newRowCount <= 0 && LOG.isDebugEnabled()) {
+            newRowCount = 0;
+            LOG.debug("[0] STATS-" + jop.toString() + ": Product of #rows might be greater than"
+                + " denominator or overflow might have occurred. Resetting row count to 0."
+                + " #Rows of parents: " + rowCountParents.toString() + ". Denominator: " + denom);
+          }
+
           stats.setNumRows(newRowCount);
           stats.setDataSize(StatsUtils.getDataSizeFromColumnStats(newRowCount, outColStats));
           jop.setStatistics(stats);
 
           if (LOG.isDebugEnabled()) {
-            LOG.debug("[0] STATS-" + jop.toString() + ": " + stats.extendedToString());
+            LOG.debug("[1] STATS-" + jop.toString() + ": " + stats.extendedToString());
           }
         } else {
 
@@ -930,13 +965,72 @@ public class StatsRulesProcFactory {
           jop.setStatistics(wcStats);
 
           if (LOG.isDebugEnabled()) {
-            LOG.debug("[1] STATS-" + jop.toString() + ": " + wcStats.extendedToString());
+            LOG.debug("[2] STATS-" + jop.toString() + ": " + wcStats.extendedToString());
           }
         }
       }
       return null;
     }
 
+    private long computeNewRowCount(List<Long> rowCountParents, long denom) {
+      double factor = 0.0d;
+      long result = 1;
+      long max = rowCountParents.get(0);
+      long maxIdx = 0;
+
+      // To avoid long overflow, we will divide the max row count by denominator
+      // and use that factor to multiply with other row counts
+      for (int i = 1; i < rowCountParents.size(); i++) {
+        if (rowCountParents.get(i) > max) {
+          max = rowCountParents.get(i);
+          maxIdx = i;
+        }
+      }
+
+      factor = (double) max / (double) denom;
+
+      for (int i = 0; i < rowCountParents.size(); i++) {
+        if (i != maxIdx) {
+          result *= rowCountParents.get(i);
+        }
+      }
+
+      result = (long) (result * factor);
+
+      return result;
+    }
+
+    private void updateJoinColumnsNDV(Map<Integer, List<String>> joinKeys,
+        Map<String, ColStatistics> joinedColStats, int numAttr) {
+      int joinColIdx = 0;
+      while (numAttr > 0) {
+        long minNDV = Long.MAX_VALUE;
+
+        // find min NDV for joining columns
+        for (Map.Entry<Integer, List<String>> entry : joinKeys.entrySet()) {
+          String key = entry.getValue().get(joinColIdx);
+          key = StatsUtils.stripPrefixFromColumnName(key);
+          ColStatistics cs = joinedColStats.get(key);
+          if (cs != null && cs.getCountDistint() < minNDV) {
+            minNDV = cs.getCountDistint();
+          }
+        }
+
+        // set min NDV value to both columns involved in join
+        if (minNDV != Long.MAX_VALUE) {
+          for (Map.Entry<Integer, List<String>> entry : joinKeys.entrySet()) {
+            String key = entry.getValue().get(joinColIdx);
+            key = StatsUtils.stripPrefixFromColumnName(key);
+            ColStatistics cs = joinedColStats.get(key);
+            cs.setCountDistint(minNDV);
+          }
+        }
+
+        joinColIdx++;
+        numAttr--;
+      }
+    }
+
     private long getDenominator(List<Long> distinctVals) {
 
       if (distinctVals.isEmpty()) {
@@ -954,16 +1048,23 @@ public class StatsRulesProcFactory {
         return Collections.max(distinctVals);
       } else {
 
+        // remember min value and ignore it from the denominator
+        long minNDV = distinctVals.get(0);
+        int minIdx = 0;
+
+        for (int i = 1; i < distinctVals.size(); i++) {
+          if (distinctVals.get(i) < minNDV) {
+            minNDV = distinctVals.get(i);
+            minIdx = i;
+          }
+        }
+
         // join from multiple relations:
-        // denom = max(v1, v2) * max(v2, v3) * max(v3, v4)
+        // denom = Product of all NDVs except the least of all
         long denom = 1;
-        for (int i = 0; i < distinctVals.size() - 1; i++) {
-          long v1 = distinctVals.get(i);
-          long v2 = distinctVals.get(i + 1);
-          if (v1 >= v2) {
-            denom *= v1;
-          } else {
-            denom *= v2;
+        for (int i = 0; i < distinctVals.size(); i++) {
+          if (i != minIdx) {
+            denom *= distinctVals.get(i);
           }
         }
         return denom;
@@ -983,8 +1084,6 @@ public class StatsRulesProcFactory {
       LimitOperator lop = (LimitOperator) nd;
       Operator<? extends OperatorDesc> parent = lop.getParentOperators().get(0);
       Statistics parentStats = parent.getStatistics();
-      AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx;
-      HiveConf conf = aspCtx.getConf();
 
       try {
         long limit = -1;
@@ -1032,6 +1131,73 @@ public class StatsRulesProcFactory {
   }
 
   /**
+   * ReduceSink operator does not change any of the statistics. But it renames
+   * the column statistics from its parent based on the output key and value
+   * column names to make it easy for the downstream operators. This is different
+   * from the default stats which just aggregates and passes along the statistics
+   * without actually renaming based on output schema of the operator.
+   */
+  public static class ReduceSinkStatsRule extends DefaultStatsRule implements NodeProcessor {
+
+    @Override
+    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+        Object... nodeOutputs) throws SemanticException {
+      ReduceSinkOperator rop = (ReduceSinkOperator) nd;
+      Operator<? extends OperatorDesc> parent = rop.getParentOperators().get(0);
+      Statistics parentStats = parent.getStatistics();
+      if (parentStats != null) {
+        AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx;
+        HiveConf conf = aspCtx.getConf();
+
+        List<String> outKeyColNames = rop.getConf().getOutputKeyColumnNames();
+        List<String> outValueColNames = rop.getConf().getOutputValueColumnNames();
+        Map<String, ExprNodeDesc> colExprMap = rop.getColumnExprMap();
+        try {
+          Statistics outStats = parentStats.clone();
+          if (satisfyPrecondition(parentStats)) {
+            List<ColStatistics> colStats = Lists.newArrayList();
+            for (String key : outKeyColNames) {
+              String prefixedKey = "KEY." + key;
+              ExprNodeDesc end = colExprMap.get(prefixedKey);
+              if (end != null) {
+                ColStatistics cs = StatsUtils
+                    .getColStatisticsFromExpression(conf, parentStats, end);
+                if (cs != null) {
+                  cs.setColumnName(key);
+                  colStats.add(cs);
+                }
+              }
+            }
+
+            for (String val : outValueColNames) {
+              String prefixedVal = "VALUE." + val;
+              ExprNodeDesc end = colExprMap.get(prefixedVal);
+              if (end != null) {
+                ColStatistics cs = StatsUtils
+                    .getColStatisticsFromExpression(conf, parentStats, end);
+                if (cs != null) {
+                  cs.setColumnName(val);
+                  colStats.add(cs);
+                }
+              }
+            }
+
+            outStats.setColumnStats(colStats);
+          }
+          rop.setStatistics(outStats);
+          if (LOG.isDebugEnabled()) {
+            LOG.debug("[0] STATS-" + rop.toString() + ": " + outStats.extendedToString());
+          }
+        } catch (CloneNotSupportedException e) {
+          throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg());
+        }
+      }
+      return null;
+    }
+
+  }
+
+  /**
    * Default rule is to aggregate the statistics from all its parent operators.
    */
   public static class DefaultStatsRule implements NodeProcessor {
@@ -1108,6 +1274,10 @@ public class StatsRulesProcFactory {
     return new LimitStatsRule();
   }
 
+  public static NodeProcessor getReduceSinkRule() {
+    return new ReduceSinkStatsRule();
+  }
+
   public static NodeProcessor getDefaultRule() {
     return new DefaultStatsRule();
   }

Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java Thu Aug  7 00:21:45 2014
@@ -27,6 +27,7 @@ import java.util.Map;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.ql.Context;
 import org.apache.hadoop.hive.ql.ErrorMsg;
@@ -102,6 +103,10 @@ public class ColumnStatsSemanticAnalyzer
   private Map<String,String> getPartKeyValuePairsFromAST(ASTNode tree) {
     ASTNode child = ((ASTNode) tree.getChild(0).getChild(1));
     Map<String,String> partSpec = new HashMap<String, String>();
+    if (null == child) {
+      // case of analyze table T compute statistics for columns;
+      return partSpec;
+    }
     String partKey;
     String partValue;
     for (int i = 0; i < child.getChildCount(); i++) {
@@ -361,6 +366,9 @@ public class ColumnStatsSemanticAnalyzer
       checkIfTemporaryTable();
       checkForPartitionColumns(colNames, Utilities.getColumnNamesFromFieldSchema(tbl.getPartitionKeys()));
       validateSpecifiedColumnNames(colNames);
+      if (conf.getBoolVar(ConfVars.HIVE_STATS_COLLECT_PART_LEVEL_STATS) && tbl.isPartitioned()) {
+        isPartitionStats = true;
+      }
 
       if (isPartitionStats) {
         isTableLevel = false;

Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java Thu Aug  7 00:21:45 2014
@@ -52,6 +52,11 @@ public abstract class BaseWork extends A
 
   private String name;
 
+  // Vectorization.
+  protected Map<String, Map<Integer, String>> scratchColumnVectorTypes = null;
+  protected Map<String, Map<String, Integer>> scratchColumnMap = null;
+  protected boolean vectorMode = false;
+
   public void setGatheringStats(boolean gatherStats) {
     this.gatheringStats = gatherStats;
   }
@@ -107,5 +112,31 @@ public abstract class BaseWork extends A
     return returnSet;
   }
 
+  public Map<String, Map<Integer, String>> getScratchColumnVectorTypes() {
+    return scratchColumnVectorTypes;
+  }
+
+  public void setScratchColumnVectorTypes(
+      Map<String, Map<Integer, String>> scratchColumnVectorTypes) {
+    this.scratchColumnVectorTypes = scratchColumnVectorTypes;
+  }
+
+  public Map<String, Map<String, Integer>> getScratchColumnMap() {
+    return scratchColumnMap;
+  }
+
+  public void setScratchColumnMap(Map<String, Map<String, Integer>> scratchColumnMap) {
+    this.scratchColumnMap = scratchColumnMap;
+  }
+
+  @Override
+  public void setVectorMode(boolean vectorMode) {
+    this.vectorMode = vectorMode;
+  }
+
+  public boolean getVectorMode() {
+    return vectorMode;
+  }
+
   public abstract void configureJobConf(JobConf job);
 }

Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java Thu Aug  7 00:21:45 2014
@@ -116,10 +116,6 @@ public class MapWork extends BaseWork {
 
   private boolean useOneNullRowInputFormat;
 
-  private Map<String, Map<Integer, String>> scratchColumnVectorTypes = null;
-  private Map<String, Map<String, Integer>> scratchColumnMap = null;
-  private boolean vectorMode = false;
-
   public MapWork() {}
 
   public MapWork(String name) {
@@ -519,32 +515,6 @@ public class MapWork extends BaseWork {
     }
   }
 
-  public Map<String, Map<Integer, String>> getScratchColumnVectorTypes() {
-    return scratchColumnVectorTypes;
-  }
-
-  public void setScratchColumnVectorTypes(
-      Map<String, Map<Integer, String>> scratchColumnVectorTypes) {
-    this.scratchColumnVectorTypes = scratchColumnVectorTypes;
-  }
-
-  public Map<String, Map<String, Integer>> getScratchColumnMap() {
-    return scratchColumnMap;
-  }
-
-  public void setScratchColumnMap(Map<String, Map<String, Integer>> scratchColumnMap) {
-    this.scratchColumnMap = scratchColumnMap;
-  }
-
-  public boolean getVectorMode() {
-    return vectorMode;
-  }
-
-  @Override
-  public void setVectorMode(boolean vectorMode) {
-    this.vectorMode = vectorMode;
-  }
-
   public void logPathToAliases() {
     if (LOG.isDebugEnabled()) {
       LOG.debug("LOGGING PATH TO ALIASES");

Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java Thu Aug  7 00:21:45 2014
@@ -21,6 +21,7 @@ package org.apache.hadoop.hive.ql.plan;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.LinkedHashSet;
+import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -30,7 +31,18 @@ import org.apache.commons.logging.LogFac
 import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
 import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.exec.OperatorUtils;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.lib.NodeProcessor;
+import org.apache.hadoop.hive.ql.lib.Rule;
+import org.apache.hadoop.hive.serde2.Deserializer;
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.SerDeUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.util.ReflectionUtils;
 
 /**
  * ReduceWork represents all the information used to run a reduce task on the cluster.
@@ -84,6 +96,11 @@ public class ReduceWork extends BaseWork
   // for auto reduce parallelism - max reducers requested
   private int maxReduceTasks;
 
+  private ObjectInspector keyObjectInspector = null;
+  private ObjectInspector valueObjectInspector = null;
+
+  private Map<String, Integer> reduceColumnNameMap = new LinkedHashMap<String, Integer>();
+
   /**
    * If the plan has a reducer and correspondingly a reduce-sink, then store the TableDesc pointing
    * to keySerializeInfo of the ReduceSink
@@ -95,7 +112,90 @@ public class ReduceWork extends BaseWork
   }
 
   public TableDesc getKeyDesc() {
-    return keyDesc;
+     return keyDesc;
+  }
+
+  private ObjectInspector getObjectInspector(TableDesc desc) {
+    ObjectInspector objectInspector;
+    try {
+      Deserializer deserializer = (SerDe) ReflectionUtils.newInstance(desc
+                .getDeserializerClass(), null);
+      SerDeUtils.initializeSerDe(deserializer, null, desc.getProperties(), null);
+      objectInspector = deserializer.getObjectInspector();
+    } catch (Exception e) {
+      return null;
+    }
+    return objectInspector;
+  }
+
+  public ObjectInspector getKeyObjectInspector() {
+    if (keyObjectInspector == null) {
+      keyObjectInspector = getObjectInspector(keyDesc);
+    }
+    return keyObjectInspector;
+  }
+
+  // Only works when not tagging.
+  public ObjectInspector getValueObjectInspector() {
+    if (needsTagging) {
+      return null;
+    }
+    if (valueObjectInspector == null) {
+      valueObjectInspector = getObjectInspector(tagToValueDesc.get(0));
+    }
+    return valueObjectInspector;
+  }
+
+  private int addToReduceColumnNameMap(StructObjectInspector structObjectInspector, int startIndex, String prefix) {
+    List<? extends StructField> fields = structObjectInspector.getAllStructFieldRefs();
+    int index = startIndex;
+    for (StructField field: fields) {
+      reduceColumnNameMap.put(prefix + "." + field.getFieldName(), index);
+      index++;
+    }
+    return index;
+  }
+
+  public Boolean fillInReduceColumnNameMap() {
+    ObjectInspector keyObjectInspector = getKeyObjectInspector();
+    if (keyObjectInspector == null || !(keyObjectInspector instanceof StructObjectInspector)) {
+        return false;
+    }
+    StructObjectInspector keyStructObjectInspector = (StructObjectInspector) keyObjectInspector;
+
+    ObjectInspector valueObjectInspector = getValueObjectInspector();
+    if (valueObjectInspector == null || !(valueObjectInspector instanceof StructObjectInspector)) {
+        return false;
+    }
+    StructObjectInspector valueStructObjectInspector = (StructObjectInspector) valueObjectInspector;
+
+    int keyCount = addToReduceColumnNameMap(keyStructObjectInspector, 0, Utilities.ReduceField.KEY.toString());
+    addToReduceColumnNameMap(valueStructObjectInspector, keyCount, Utilities.ReduceField.VALUE.toString());
+    return true;
+  }
+
+  public Map<String, Integer> getReduceColumnNameMap() {
+    if (needsTagging) {
+      return null;
+    }
+    if (reduceColumnNameMap.size() == 0) {
+      if (!fillInReduceColumnNameMap()) {
+        return null;
+      }
+    }
+    return reduceColumnNameMap;
+  }
+
+  public List<String> getReduceColumnNames() {
+    if (needsTagging) {
+        return null;
+    }
+    if (reduceColumnNameMap.size() == 0) {
+        if (!fillInReduceColumnNameMap()) {
+            return null;
+        }
+    }
+    return new ArrayList<String>(reduceColumnNameMap.keySet());
   }
 
   public List<TableDesc> getTagToValueDesc() {

Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/StorageBasedAuthorizationProvider.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/StorageBasedAuthorizationProvider.java?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/StorageBasedAuthorizationProvider.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/StorageBasedAuthorizationProvider.java Thu Aug  7 00:21:45 2014
@@ -21,6 +21,7 @@ package org.apache.hadoop.hive.ql.securi
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.security.AccessControlException;
+import java.security.PrivilegedExceptionAction;
 import java.util.EnumSet;
 import java.util.List;
 
@@ -35,6 +36,9 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.FsAction;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.hive.common.FileUtils;
+import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.HiveMetaStore.HMSHandler;
 import org.apache.hadoop.hive.metastore.Warehouse;
 import org.apache.hadoop.hive.metastore.api.Database;
@@ -44,6 +48,7 @@ import org.apache.hadoop.hive.ql.metadat
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.metadata.Partition;
 import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.shims.ShimLoader;
 
 /**
  * StorageBasedAuthorizationProvider is an implementation of
@@ -288,7 +293,7 @@ public class StorageBasedAuthorizationPr
    * If the given path does not exists, it checks for its parent folder.
    */
   protected void checkPermissions(final Configuration conf, final Path path,
-      final EnumSet<FsAction> actions) throws IOException, LoginException {
+      final EnumSet<FsAction> actions) throws IOException, LoginException, HiveException {
 
     if (path == null) {
       throw new IllegalArgumentException("path is null");
@@ -297,8 +302,7 @@ public class StorageBasedAuthorizationPr
     final FileSystem fs = path.getFileSystem(conf);
 
     if (fs.exists(path)) {
-      checkPermissions(fs, path, actions,
-          authenticator.getUserName(), authenticator.getGroupNames());
+      checkPermissions(fs, path, actions, authenticator.getUserName());
     } else if (path.getParent() != null) {
       // find the ancestor which exists to check its permissions
       Path par = path.getParent();
@@ -309,8 +313,7 @@ public class StorageBasedAuthorizationPr
         par = par.getParent();
       }
 
-      checkPermissions(fs, par, actions,
-          authenticator.getUserName(), authenticator.getGroupNames());
+      checkPermissions(fs, par, actions, authenticator.getUserName());
     }
   }
 
@@ -320,56 +323,23 @@ public class StorageBasedAuthorizationPr
    */
   @SuppressWarnings("deprecation")
   protected static void checkPermissions(final FileSystem fs, final Path path,
-      final EnumSet<FsAction> actions, String user, List<String> groups) throws IOException,
-      AccessControlException {
-
-    String superGroupName = getSuperGroupName(fs.getConf());
-    if (userBelongsToSuperGroup(superGroupName, groups)) {
-      LOG.info("User \"" + user + "\" belongs to super-group \"" + superGroupName + "\". " +
-          "Permission granted for actions: (" + actions + ").");
-      return;
-    }
-
-    final FileStatus stat;
+      final EnumSet<FsAction> actions, String user) throws IOException,
+      AccessControlException, HiveException {
 
     try {
-      stat = fs.getFileStatus(path);
+      FileStatus stat = fs.getFileStatus(path);
+      for (FsAction action : actions) {
+        FileUtils.checkFileAccessWithImpersonation(fs, stat, action, user);
+      }
     } catch (FileNotFoundException fnfe) {
       // File named by path doesn't exist; nothing to validate.
       return;
     } catch (org.apache.hadoop.fs.permission.AccessControlException ace) {
       // Older hadoop version will throw this @deprecated Exception.
       throw accessControlException(ace);
+    } catch (Exception err) {
+      throw new HiveException(err);
     }
-
-    final FsPermission dirPerms = stat.getPermission();
-    final String grp = stat.getGroup();
-
-    for (FsAction action : actions) {
-      if (user.equals(stat.getOwner())) {
-        if (dirPerms.getUserAction().implies(action)) {
-          continue;
-        }
-      }
-      if (groups.contains(grp)) {
-        if (dirPerms.getGroupAction().implies(action)) {
-          continue;
-        }
-      }
-      if (dirPerms.getOtherAction().implies(action)) {
-        continue;
-      }
-      throw new AccessControlException("action " + action + " not permitted on path "
-          + path + " for user " + user);
-    }
-  }
-
-  private static String getSuperGroupName(Configuration configuration) {
-    return configuration.get(DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_KEY, "");
-  }
-
-  private static boolean userBelongsToSuperGroup(String superGroupName, List<String> groups) {
-    return groups.contains(superGroupName);
   }
 
   protected Path getDbLocation(Database db) throws HiveException {

Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLAuthorizationUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLAuthorizationUtils.java?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLAuthorizationUtils.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLAuthorizationUtils.java Thu Aug  7 00:21:45 2014
@@ -394,7 +394,7 @@ public class SQLAuthorizationUtils {
       if (FileUtils.isActionPermittedForFileHierarchy(fs, fileStatus, userName, FsAction.READ)) {
         availPrivs.addPrivilege(SQLPrivTypeGrant.SELECT_NOGRANT);
       }
-    } catch (IOException e) {
+    } catch (Exception e) {
       String msg = "Error getting permissions for " + filePath + ": " + e.getMessage();
       throw new HiveAuthzPluginException(msg, e);
     }

Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java Thu Aug  7 00:21:45 2014
@@ -212,7 +212,8 @@ public class StatsUtils {
       stats.addToDataSize(ds);
 
       // if at least a partition does not contain row count then mark basic stats state as PARTIAL
-      if (containsNonPositives(rowCounts)) {
+      if (containsNonPositives(rowCounts) &&
+          stats.getBasicStatsState().equals(State.COMPLETE)) {
         stats.setBasicStatsState(State.PARTIAL);
       }
       boolean haveFullStats = fetchColStats;
@@ -876,12 +877,9 @@ public class StatsUtils {
     if (colExprMap != null) {
       for (ColumnInfo ci : rowSchema.getSignature()) {
         String outColName = ci.getInternalName();
+        outColName = StatsUtils.stripPrefixFromColumnName(outColName);
         String outTabAlias = ci.getTabAlias();
         ExprNodeDesc end = colExprMap.get(outColName);
-        if (end == null) {
-          outColName = StatsUtils.stripPrefixFromColumnName(outColName);
-          end = colExprMap.get(outColName);
-        }
         ColStatistics colStat = getColStatisticsFromExpression(conf, parentStats, end);
         if (colStat != null) {
           outColName = StatsUtils.stripPrefixFromColumnName(outColName);
@@ -1142,7 +1140,7 @@ public class StatsUtils {
    */
   public static String stripPrefixFromColumnName(String colName) {
     String stripedName = colName;
-    if (colName.startsWith("KEY._") || colName.startsWith("VALUE._")) {
+    if (colName.startsWith("KEY") || colName.startsWith("VALUE")) {
       // strip off KEY./VALUE. from column name
       stripedName = colName.split("\\.")[1];
     }
@@ -1210,15 +1208,16 @@ public class StatsUtils {
         for (Map.Entry<String, ExprNodeDesc> entry : map.entrySet()) {
           if (entry.getValue().isSame(end)) {
             outColName = entry.getKey();
+            outColName = stripPrefixFromColumnName(outColName);
           }
         }
         if (end instanceof ExprNodeColumnDesc) {
           ExprNodeColumnDesc encd = (ExprNodeColumnDesc) end;
           if (outColName == null) {
             outColName = encd.getColumn();
+            outColName = stripPrefixFromColumnName(outColName);
           }
           String tabAlias = encd.getTabAlias();
-          outColName = stripPrefixFromColumnName(outColName);
           result.add(getFullyQualifiedColumnName(tabAlias, outColName));
         } else if (end instanceof ExprNodeGenericFuncDesc) {
           ExprNodeGenericFuncDesc enf = (ExprNodeGenericFuncDesc) end;

Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBasePad.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBasePad.java?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBasePad.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBasePad.java Thu Aug  7 00:21:45 2014
@@ -48,9 +48,9 @@ public abstract class GenericUDFBasePad 
       throw new UDFArgumentException(udfName + " requires three arguments. Found :"
 	  + arguments.length);
     }
-    converter1 = checkArguments(arguments, 0);
-    converter2 = checkArguments(arguments, 1);
-    converter3 = checkArguments(arguments, 2);
+    converter1 = checkTextArguments(arguments, 0);
+    converter2 = checkIntArguments(arguments, 1);
+    converter3 = checkTextArguments(arguments, 2);
     return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
   }
 
@@ -91,31 +91,39 @@ public abstract class GenericUDFBasePad 
   protected abstract void performOp(byte[] data, byte[] txt, byte[] padTxt, int len, Text str,
       Text pad);
 
-  private Converter checkArguments(ObjectInspector[] arguments, int i)
+  // Convert input arguments to Text, if necessary.
+  private Converter checkTextArguments(ObjectInspector[] arguments, int i)
     throws UDFArgumentException {
     if (arguments[i].getCategory() != ObjectInspector.Category.PRIMITIVE) {
       throw new UDFArgumentTypeException(i + 1, "Only primitive type arguments are accepted but "
-	  + arguments[i].getTypeName() + " is passed. as  arguments");
+      + arguments[i].getTypeName() + " is passed. as  arguments");
+    }
+
+    Converter converter = ObjectInspectorConverters.getConverter((PrimitiveObjectInspector) arguments[i],
+          PrimitiveObjectInspectorFactory.writableStringObjectInspector);
+
+    return converter;
+  }
+
+  private Converter checkIntArguments(ObjectInspector[] arguments, int i)
+    throws UDFArgumentException {
+    if (arguments[i].getCategory() != ObjectInspector.Category.PRIMITIVE) {
+      throw new UDFArgumentTypeException(i + 1, "Only primitive type arguments are accepted but "
+      + arguments[i].getTypeName() + " is passed. as  arguments");
     }
     PrimitiveCategory inputType = ((PrimitiveObjectInspector) arguments[i]).getPrimitiveCategory();
     Converter converter;
     switch (inputType) {
-    case STRING:
-    case CHAR:
-    case VARCHAR:
-      converter = ObjectInspectorConverters.getConverter((PrimitiveObjectInspector) arguments[i],
-	  PrimitiveObjectInspectorFactory.writableStringObjectInspector);
-      break;
     case INT:
     case SHORT:
     case BYTE:
       converter = ObjectInspectorConverters.getConverter((PrimitiveObjectInspector) arguments[i],
-	  PrimitiveObjectInspectorFactory.writableIntObjectInspector);
+      PrimitiveObjectInspectorFactory.writableIntObjectInspector);
       break;
     default:
       throw new UDFArgumentTypeException(i + 1, udfName
-	  + " only takes STRING/CHAR/INT/SHORT/BYTE/VARCHAR types as " + (i + 1) + "-ths argument, got "
-	  + inputType);
+      + " only takes INT/SHORT/BYTE types as " + (i + 1) + "-ths argument, got "
+      + inputType);
     }
     return converter;
   }

Modified: hive/branches/cbo/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java (original)
+++ hive/branches/cbo/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java Thu Aug  7 00:21:45 2014
@@ -107,7 +107,7 @@ public class TestVectorizer {
     gbyOp.setConf(desc);
 
     Vectorizer v = new Vectorizer();
-    Assert.assertTrue(v.validateOperator(gbyOp));
+    Assert.assertTrue(v.validateMapWorkOperator(gbyOp));
     VectorGroupByOperator vectorOp = (VectorGroupByOperator) v.vectorizeOperator(gbyOp, vContext);
     Assert.assertEquals(VectorUDAFSumLong.class, vectorOp.getAggregators()[0].getClass());
     VectorUDAFSumLong udaf = (VectorUDAFSumLong) vectorOp.getAggregators()[0];
@@ -187,7 +187,7 @@ public class TestVectorizer {
     mop.setConf(mjdesc);
  
     Vectorizer vectorizer = new Vectorizer();
-    Assert.assertTrue(vectorizer.validateOperator(mop));
+    Assert.assertTrue(vectorizer.validateMapWorkOperator(mop));
   }
 
   
@@ -203,6 +203,6 @@ public class TestVectorizer {
       mop.setConf(mjdesc);
     
       Vectorizer vectorizer = new Vectorizer();
-      Assert.assertTrue(vectorizer.validateOperator(mop)); 
+      Assert.assertTrue(vectorizer.validateMapWorkOperator(mop)); 
   }
 }

Modified: hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_filter.q
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_filter.q?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_filter.q (original)
+++ hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_filter.q Thu Aug  7 00:21:45 2014
@@ -15,76 +15,76 @@ load data local inpath '../../data/files
 insert overwrite table loc_orc select * from loc_staging;
 
 -- numRows: 8 rawDataSize: 796
-explain extended select * from loc_orc;
+explain select * from loc_orc;
 
 -- column stats are not COMPLETE, so stats are not updated
 -- numRows: 8 rawDataSize: 796
-explain extended select * from loc_orc where state='OH';
+explain select * from loc_orc where state='OH';
 
 analyze table loc_orc compute statistics for columns state,locid,zip,year;
 
 -- state column has 5 distincts. numRows/countDistincts
 -- numRows: 1 rawDataSize: 102
-explain extended select * from loc_orc where state='OH';
+explain select * from loc_orc where state='OH';
 
 -- not equals comparison shouldn't affect number of rows
 -- numRows: 8 rawDataSize: 804
-explain extended select * from loc_orc where state!='OH';
-explain extended select * from loc_orc where state<>'OH';
+explain select * from loc_orc where state!='OH';
+explain select * from loc_orc where state<>'OH';
 
 -- nulls are treated as constant equality comparison
 -- numRows: 1 rawDataSize: 102
-explain extended select * from loc_orc where zip is null;
+explain select * from loc_orc where zip is null;
 -- numRows: 1 rawDataSize: 102
-explain extended select * from loc_orc where !(zip is not null);
+explain select * from loc_orc where !(zip is not null);
 
 -- not nulls are treated as inverse of nulls
 -- numRows: 7 rawDataSize: 702
-explain extended select * from loc_orc where zip is not null;
+explain select * from loc_orc where zip is not null;
 -- numRows: 7 rawDataSize: 702
-explain extended select * from loc_orc where !(zip is null);
+explain select * from loc_orc where !(zip is null);
 
 -- NOT evaluation. true will pass all rows, false will not pass any rows
 -- numRows: 8 rawDataSize: 804
-explain extended select * from loc_orc where !false;
+explain select * from loc_orc where !false;
 -- numRows: 0 rawDataSize: 0
-explain extended select * from loc_orc where !true;
+explain select * from loc_orc where !true;
 
 -- Constant evaluation. true will pass all rows, false will not pass any rows
 -- numRows: 8 rawDataSize: 804
-explain extended select * from loc_orc where true;
+explain select * from loc_orc where true;
 -- numRows: 8 rawDataSize: 804
-explain extended select * from loc_orc where 'foo';
+explain select * from loc_orc where 'foo';
 -- numRows: 8 rawDataSize: 804
-explain extended select * from loc_orc where true = true;
+explain select * from loc_orc where true = true;
 -- numRows: 0 rawDataSize: 0
-explain extended select * from loc_orc where false = true;
+explain select * from loc_orc where false = true;
 -- numRows: 0 rawDataSize: 0
-explain extended select * from loc_orc where 'foo' = 'bar';
+explain select * from loc_orc where 'foo' = 'bar';
 -- numRows: 0 rawDataSize: 0
-explain extended select * from loc_orc where false;
+explain select * from loc_orc where false;
 
 -- OR evaluation. 1 row for OH and 1 row for CA
 -- numRows: 2 rawDataSize: 204
-explain extended select * from loc_orc where state='OH' or state='CA';
+explain select * from loc_orc where state='OH' or state='CA';
 
 -- AND evaluation. cascadingly apply rules. 8/2 = 4/2 = 2
 -- numRows: 2 rawDataSize: 204
-explain extended select * from loc_orc where year=2001 and year is null;
+explain select * from loc_orc where year=2001 and year is null;
 -- numRows: 1 rawDataSize: 102
-explain extended select * from loc_orc where year=2001 and state='OH' and state='FL';
+explain select * from loc_orc where year=2001 and state='OH' and state='FL';
 
 -- AND and OR together. left expr will yield 1 row and right will yield 1 row
 -- numRows: 3 rawDataSize: 306
-explain extended select * from loc_orc where (year=2001 and year is null) or (state='CA');
+explain select * from loc_orc where (year=2001 and year is null) or (state='CA');
 
 -- AND and OR together. left expr will yield 8 rows and right will yield 1 row
 -- numRows: 1 rawDataSize: 102
-explain extended select * from loc_orc where (year=2001 or year is null) and (state='CA');
+explain select * from loc_orc where (year=2001 or year is null) and (state='CA');
 
 -- all inequality conditions rows/3 is the rules
 -- numRows: 2 rawDataSize: 204
-explain extended select * from loc_orc where locid < 30;
-explain extended select * from loc_orc where locid > 30;
-explain extended select * from loc_orc where locid <= 30;
-explain extended select * from loc_orc where locid >= 30;
+explain select * from loc_orc where locid < 30;
+explain select * from loc_orc where locid > 30;
+explain select * from loc_orc where locid <= 30;
+explain select * from loc_orc where locid >= 30;

Modified: hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_groupby.q
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_groupby.q?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_groupby.q (original)
+++ hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_groupby.q Thu Aug  7 00:21:45 2014
@@ -15,14 +15,14 @@ load data local inpath '../../data/files
 insert overwrite table loc_orc select * from loc_staging;
 
 -- numRows: 8 rawDataSize: 796
-explain extended select * from loc_orc;
+explain select * from loc_orc;
 
 -- partial column stats
 analyze table loc_orc compute statistics for columns state;
 
 -- inner group by: map - numRows: 8 reduce - numRows: 4
 -- outer group by: map - numRows: 4 reduce numRows: 2
-explain extended select a, c, min(b)
+explain select a, c, min(b)
 from ( select state as a, locid as b, count(*) as c
        from loc_orc
        group by state,locid
@@ -34,36 +34,36 @@ analyze table loc_orc compute statistics
 -- only one distinct value in year column + 1 NULL value
 -- map-side GBY: numRows: 8 (map-side will not do any reduction)
 -- reduce-side GBY: numRows: 2
-explain extended select year from loc_orc group by year;
+explain select year from loc_orc group by year;
 
 -- map-side GBY: numRows: 8
 -- reduce-side GBY: numRows: 4
-explain extended select state,locid from loc_orc group by state,locid;
+explain select state,locid from loc_orc group by state,locid;
 
 -- map-side GBY numRows: 32 reduce-side GBY numRows: 16
-explain extended select state,locid from loc_orc group by state,locid with cube;
+explain select state,locid from loc_orc group by state,locid with cube;
 
 -- map-side GBY numRows: 24 reduce-side GBY numRows: 12
-explain extended select state,locid from loc_orc group by state,locid with rollup;
+explain select state,locid from loc_orc group by state,locid with rollup;
 
 -- map-side GBY numRows: 8 reduce-side GBY numRows: 4
-explain extended select state,locid from loc_orc group by state,locid grouping sets((state));
+explain select state,locid from loc_orc group by state,locid grouping sets((state));
 
 -- map-side GBY numRows: 16 reduce-side GBY numRows: 8
-explain extended select state,locid from loc_orc group by state,locid grouping sets((state),(locid));
+explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid));
 
 -- map-side GBY numRows: 24 reduce-side GBY numRows: 12
-explain extended select state,locid from loc_orc group by state,locid grouping sets((state),(locid),());
+explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid),());
 
 -- map-side GBY numRows: 32 reduce-side GBY numRows: 16
-explain extended select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),());
+explain select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),());
 
 set hive.stats.map.parallelism=10;
 
 -- map-side GBY: numRows: 80 (map-side will not do any reduction)
 -- reduce-side GBY: numRows: 2 Reason: numDistinct of year is 2. numRows = min(80/2, 2)
-explain extended select year from loc_orc group by year;
+explain select year from loc_orc group by year;
 
 -- map-side GBY numRows: 320 reduce-side GBY numRows: 42 Reason: numDistinct of state and locid are 6,7 resp. numRows = min(320/2, 6*7)
-explain extended select state,locid from loc_orc group by state,locid with cube;
+explain select state,locid from loc_orc group by state,locid with cube;
 

Modified: hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_join.q
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_join.q?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_join.q (original)
+++ hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_join.q Thu Aug  7 00:21:45 2014
@@ -1,81 +1,70 @@
 set hive.stats.fetch.column.stats=true;
+set hive.stats.ndv.error=0.0;
 
-create table if not exists emp_staging (
+create table if not exists emp (
   lastname string,
-  deptid int
+  deptid int,
+  locid int
 ) row format delimited fields terminated by '|' stored as textfile;
 
-create table if not exists dept_staging (
+create table if not exists dept (
   deptid int,
   deptname string
 ) row format delimited fields terminated by '|' stored as textfile;
 
-create table if not exists loc_staging (
+create table if not exists loc (
   state string,
   locid int,
   zip bigint,
   year int
 ) row format delimited fields terminated by '|' stored as textfile;
 
-create table if not exists emp_orc like emp_staging;
-alter table emp_orc set fileformat orc;
-
-create table if not exists dept_orc like dept_staging;
-alter table dept_orc set fileformat orc;
-
-create table loc_orc like loc_staging;
-alter table loc_orc set fileformat orc;
-
-LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging;
-LOAD DATA LOCAL INPATH '../../data/files/dept.txt' OVERWRITE INTO TABLE dept_staging;
-LOAD DATA LOCAL INPATH '../../data/files/loc.txt' OVERWRITE INTO TABLE loc_staging;
-
-insert overwrite table emp_orc select * from emp_staging;
-insert overwrite table dept_orc select * from dept_staging;
-insert overwrite table loc_orc select * from loc_staging;
-
-analyze table emp_orc compute statistics for columns lastname,deptid;
-analyze table dept_orc compute statistics for columns deptname,deptid;
-analyze table loc_orc compute statistics for columns state,locid,zip,year;
+LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp;
+LOAD DATA LOCAL INPATH '../../data/files/dept.txt' OVERWRITE INTO TABLE dept;
+LOAD DATA LOCAL INPATH '../../data/files/loc.txt' OVERWRITE INTO TABLE loc;
+
+analyze table emp compute statistics;
+analyze table dept compute statistics;
+analyze table loc compute statistics;
+analyze table emp compute statistics for columns lastname,deptid,locid;
+analyze table dept compute statistics for columns deptname,deptid;
+analyze table loc compute statistics for columns state,locid,zip,year;
 
 -- number of rows
--- emp_orc  - 6
--- dept_orc - 4
--- loc_orc  - 8
+-- emp  - 48
+-- dept - 6
+-- loc  - 8
 
 -- count distincts for relevant columns (since count distinct values are approximate in some cases count distint values will be greater than number of rows)
--- emp_orc.deptid - 3
--- emp_orc.lastname - 7
--- dept_orc.deptid - 6
--- dept_orc.deptname - 5
--- loc_orc.locid - 6
--- loc_orc.state - 7
-
--- Expected output rows: 4
--- Reason: #rows = (6*4)/max(3,6)
-explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid);
-
--- 3 way join
--- Expected output rows: 4
--- Reason: #rows = (6*4*6)/max(3,6)*max(6,3)
-explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join emp_orc e1 on (e.deptid = e1.deptid);
-
--- Expected output rows: 5
--- Reason: #rows = (6*4*8)/max(3,6)*max(6,6)
-explain extended select * from emp_orc e join dept_orc d  on (e.deptid = d.deptid) join loc_orc l on (e.deptid = l.locid);
-
--- join keys of different types
--- Expected output rows: 4
--- Reason: #rows = (6*4*8)/max(3,6)*max(6,7)
-explain extended select * from emp_orc e join dept_orc d  on (e.deptid = d.deptid) join loc_orc l on (e.deptid = l.state);
-
--- multi-attribute join
--- Expected output rows: 0
--- Reason: #rows = (6*4)/max(3,6)*max(7,5)
-explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid and e.lastname = d.deptname);
-
--- 3 way and multi-attribute join
--- Expected output rows: 0
--- Reason: #rows = (6*4*8)/max(3,6)*max(7,5)*max(3,6)*max(7,7)
-explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid and e.lastname = d.deptname) join loc_orc l on (e.deptid = l.locid and e.lastname = l.state);
+-- emp.deptid - 3
+-- emp.lastname - 6
+-- emp.locid - 7
+-- dept.deptid - 7
+-- dept.deptname - 6
+-- loc.locid - 7
+-- loc.state - 6
+
+-- 2 relations, 1 attribute
+-- Expected output rows: (48*6)/max(3,7) = 41
+explain select * from emp e join dept d on (e.deptid = d.deptid);
+
+-- 2 relations, 2 attributes
+-- Expected output rows: (48*6)/(max(3,7) * max(6,6)) = 6
+explain select * from emp,dept where emp.deptid = dept.deptid and emp.lastname = dept.deptname;
+explain select * from emp e join dept d on (e.deptid = d.deptid and e.lastname = d.deptname);
+
+-- 2 relations, 3 attributes
+-- Expected output rows: (48*6)/(max(3,7) * max(6,6) * max(6,6)) = 1
+explain select * from emp,dept where emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname;
+
+-- 3 relations, 1 attribute
+-- Expected output rows: (48*6*48)/top2largest(3,7,3) = 658
+explain select * from emp e join dept d on (e.deptid = d.deptid) join emp e1 on (e.deptid = e1.deptid);
+
+-- Expected output rows: (48*6*8)/top2largest(3,7,7) = 47
+explain select * from emp e join dept d  on (e.deptid = d.deptid) join loc l on (e.deptid = l.locid);
+
+-- 3 relations and 2 attribute
+-- Expected output rows: (48*6*8)/top2largest(3,7,7)*top2largest(6,6,6) = 1
+explain select * from emp e join dept d on (e.deptid = d.deptid and e.lastname = d.deptname) join loc l on (e.deptid = l.locid and e.lastname = l.state);
 

Modified: hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_limit.q
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_limit.q?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_limit.q (original)
+++ hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_limit.q Thu Aug  7 00:21:45 2014
@@ -17,14 +17,14 @@ insert overwrite table loc_orc select * 
 analyze table loc_orc compute statistics for columns state, locid, zip, year;
 
 -- numRows: 8 rawDataSize: 796
-explain extended select * from loc_orc;
+explain select * from loc_orc;
 
 -- numRows: 4 rawDataSize: 396
-explain extended select * from loc_orc limit 4;
+explain select * from loc_orc limit 4;
 
 -- greater than the available number of rows
 -- numRows: 8 rawDataSize: 796
-explain extended select * from loc_orc limit 16;
+explain select * from loc_orc limit 16;
 
 -- numRows: 0 rawDataSize: 0
-explain extended select * from loc_orc limit 0;
+explain select * from loc_orc limit 0;

Modified: hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_part.q
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_part.q?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_part.q (original)
+++ hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_part.q Thu Aug  7 00:21:45 2014
@@ -19,67 +19,67 @@ create table if not exists loc_orc (
 ) partitioned by(year string) stored as orc;
 
 -- basicStatState: NONE colStatState: NONE
-explain extended select * from loc_orc;
+explain select * from loc_orc;
 
 insert overwrite table loc_orc partition(year) select * from loc_staging;
 
 -- stats are disabled. basic stats will report the file size but not raw data size. so initial statistics will be PARTIAL
 
 -- basicStatState: PARTIAL colStatState: NONE
-explain extended select * from loc_orc;
+explain select * from loc_orc;
 
 -- partition level analyze statistics for specific parition
 analyze table loc_orc partition(year='2001') compute statistics;
 
 -- basicStatState: PARTIAL colStatState: NONE
-explain extended select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__';
+explain select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__';
 
 -- basicStatState: PARTIAL colStatState: NONE
-explain extended select * from loc_orc;
+explain select * from loc_orc;
 
 -- basicStatState: COMPLETE colStatState: NONE
-explain extended select * from loc_orc where year='2001';
+explain select * from loc_orc where year='2001';
 
 -- partition level analyze statistics for all partitions
 analyze table loc_orc partition(year) compute statistics;
 
 -- basicStatState: COMPLETE colStatState: NONE
-explain extended select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__';
+explain select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__';
 
 -- basicStatState: COMPLETE colStatState: NONE
-explain extended select * from loc_orc;
+explain select * from loc_orc;
 
 -- basicStatState: COMPLETE colStatState: NONE
-explain extended select * from loc_orc where year='2001' or year='__HIVE_DEFAULT_PARTITION__';
+explain select * from loc_orc where year='2001' or year='__HIVE_DEFAULT_PARTITION__';
 
 -- both partitions will be pruned
 -- basicStatState: NONE colStatState: NONE
-explain extended select * from loc_orc where year='2001' and year='__HIVE_DEFAULT_PARTITION__';
+explain select * from loc_orc where year='2001' and year='__HIVE_DEFAULT_PARTITION__';
 
 -- partition level partial column statistics
 analyze table loc_orc partition(year='2001') compute statistics for columns state,locid;
 
 -- basicStatState: COMPLETE colStatState: NONE
-explain extended select zip from loc_orc;
+explain select zip from loc_orc;
 
 -- basicStatState: COMPLETE colStatState: PARTIAL
-explain extended select state from loc_orc;
+explain select state from loc_orc;
 
 -- column statistics for __HIVE_DEFAULT_PARTITION__ is not supported yet. Hence colStatState reports PARTIAL
 -- basicStatState: COMPLETE colStatState: PARTIAL
-explain extended select state,locid from loc_orc;
+explain select state,locid from loc_orc;
 
 -- basicStatState: COMPLETE colStatState: COMPLETE
-explain extended select state,locid from loc_orc where year='2001';
+explain select state,locid from loc_orc where year='2001';
 
 -- basicStatState: COMPLETE colStatState: NONE
-explain extended select state,locid from loc_orc where year!='2001';
+explain select state,locid from loc_orc where year!='2001';
 
 -- basicStatState: COMPLETE colStatState: PARTIAL
-explain extended select * from loc_orc;
+explain select * from loc_orc;
 
 -- This is to test filter expression evaluation on partition column
 -- numRows: 2 dataSize: 8 basicStatState: COMPLETE colStatState: COMPLETE
-explain extended select locid from loc_orc where locid>0 and year='2001';
-explain extended select locid,year from loc_orc where locid>0 and year='2001';
-explain extended select * from (select locid,year from loc_orc) test where locid>0 and year='2001';
+explain select locid from loc_orc where locid>0 and year='2001';
+explain select locid,year from loc_orc where locid>0 and year='2001';
+explain select * from (select locid,year from loc_orc) test where locid>0 and year='2001';

Modified: hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_select.q
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_select.q?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_select.q (original)
+++ hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_select.q Thu Aug  7 00:21:45 2014
@@ -28,116 +28,116 @@ load data local inpath '../../data/files
 insert overwrite table alltypes_orc select * from alltypes;
 
 -- basicStatState: COMPLETE colStatState: NONE numRows: 2 rawDataSize: 1514
-explain extended select * from alltypes_orc;
+explain select * from alltypes_orc;
 
 -- statistics for complex types are not supported yet
 analyze table alltypes_orc compute statistics for columns bo1, ti1, si1, i1, bi1, f1, d1, s1, vc1;
 
 -- numRows: 2 rawDataSize: 1514
-explain extended select * from alltypes_orc;
+explain select * from alltypes_orc;
 
 -- numRows: 2 rawDataSize: 8
-explain extended select bo1 from alltypes_orc;
+explain select bo1 from alltypes_orc;
 
 -- col alias renaming
 -- numRows: 2 rawDataSize: 8
-explain extended select i1 as int1 from alltypes_orc;
+explain select i1 as int1 from alltypes_orc;
 
 -- numRows: 2 rawDataSize: 174
-explain extended select s1 from alltypes_orc;
+explain select s1 from alltypes_orc;
 
 -- column statistics for complex types unsupported and so statistics will not be updated
 -- numRows: 2 rawDataSize: 1514
-explain extended select m1 from alltypes_orc;
+explain select m1 from alltypes_orc;
 
 -- numRows: 2 rawDataSize: 246
-explain extended select bo1, ti1, si1, i1, bi1, f1, d1,s1 from alltypes_orc;
+explain select bo1, ti1, si1, i1, bi1, f1, d1,s1 from alltypes_orc;
 
 -- numRows: 2 rawDataSize: 0
-explain extended select null from alltypes_orc;
+explain select null from alltypes_orc;
 
 -- numRows: 2 rawDataSize: 8
-explain extended select 11 from alltypes_orc;
+explain select 11 from alltypes_orc;
 
 -- numRows: 2 rawDataSize: 16
-explain extended select 11L from alltypes_orc;
+explain select 11L from alltypes_orc;
 
 -- numRows: 2 rawDataSize: 16
-explain extended select 11.0 from alltypes_orc;
+explain select 11.0 from alltypes_orc;
 
 -- numRows: 2 rawDataSize: 178
-explain extended select "hello" from alltypes_orc;
-explain extended select cast("hello" as char(5)) from alltypes_orc;
-explain extended select cast("hello" as varchar(5)) from alltypes_orc;
+explain select "hello" from alltypes_orc;
+explain select cast("hello" as char(5)) from alltypes_orc;
+explain select cast("hello" as varchar(5)) from alltypes_orc;
 
 -- numRows: 2 rawDataSize: 96
-explain extended select unbase64("0xe23") from alltypes_orc;
+explain select unbase64("0xe23") from alltypes_orc;
 
 -- numRows: 2 rawDataSize: 16
-explain extended select cast("1" as TINYINT), cast("20" as SMALLINT) from alltypes_orc;
+explain select cast("1" as TINYINT), cast("20" as SMALLINT) from alltypes_orc;
 
 -- numRows: 2 rawDataSize: 80
-explain extended select cast("1970-12-31 15:59:58.174" as TIMESTAMP) from alltypes_orc;
+explain select cast("1970-12-31 15:59:58.174" as TIMESTAMP) from alltypes_orc;
 
 -- numRows: 2 rawDataSize: 112
-explain extended select cast("1970-12-31 15:59:58.174" as DATE) from alltypes_orc;
+explain select cast("1970-12-31 15:59:58.174" as DATE) from alltypes_orc;
 
 -- numRows: 2 rawDataSize: 224
-explain extended select cast("58.174" as DECIMAL) from alltypes_orc;
+explain select cast("58.174" as DECIMAL) from alltypes_orc;
 
 -- numRows: 2 rawDataSize: 112
-explain extended select array(1,2,3) from alltypes_orc;
+explain select array(1,2,3) from alltypes_orc;
 
 -- numRows: 2 rawDataSize: 1508
-explain extended select str_to_map("a=1 b=2 c=3", " ", "=") from alltypes_orc;
+explain select str_to_map("a=1 b=2 c=3", " ", "=") from alltypes_orc;
 
 -- numRows: 2 rawDataSize: 112
-explain extended select NAMED_STRUCT("a", 11, "b", 11) from alltypes_orc;
+explain select NAMED_STRUCT("a", 11, "b", 11) from alltypes_orc;
 
 -- numRows: 2 rawDataSize: 250
-explain extended select CREATE_UNION(0, "hello") from alltypes_orc;
+explain select CREATE_UNION(0, "hello") from alltypes_orc;
 
 -- COUNT(*) is projected as new column. It is not projected as GenericUDF and so datasize estimate will be based on number of rows
 -- numRows: 1 rawDataSize: 8
-explain extended select count(*) from alltypes_orc;
+explain select count(*) from alltypes_orc;
 
 -- COUNT(1) is projected as new column. It is not projected as GenericUDF and so datasize estimate will be based on number of rows
 -- numRows: 1 rawDataSize: 8
-explain extended select count(1) from alltypes_orc;
+explain select count(1) from alltypes_orc;
 
 -- column statistics for complex column types will be missing. data size will be calculated from available column statistics
 -- numRows: 2 rawDataSize: 254
-explain extended select *,11 from alltypes_orc;
+explain select *,11 from alltypes_orc;
 
 -- subquery selects
 -- inner select - numRows: 2 rawDataSize: 8
 -- outer select - numRows: 2 rawDataSize: 8
-explain extended select i1 from (select i1 from alltypes_orc limit 10) temp;
+explain select i1 from (select i1 from alltypes_orc limit 10) temp;
 
 -- inner select - numRows: 2 rawDataSize: 16
 -- outer select - numRows: 2 rawDataSize: 8
-explain extended select i1 from (select i1,11 from alltypes_orc limit 10) temp;
+explain select i1 from (select i1,11 from alltypes_orc limit 10) temp;
 
 -- inner select - numRows: 2 rawDataSize: 16
 -- outer select - numRows: 2 rawDataSize: 186
-explain extended select i1,"hello" from (select i1,11 from alltypes_orc limit 10) temp;
+explain select i1,"hello" from (select i1,11 from alltypes_orc limit 10) temp;
 
 -- inner select - numRows: 2 rawDataSize: 24
 -- outer select - numRows: 2 rawDataSize: 16
-explain extended select x from (select i1,11.0 as x from alltypes_orc limit 10) temp;
+explain select x from (select i1,11.0 as x from alltypes_orc limit 10) temp;
 
 -- inner select - numRows: 2 rawDataSize: 104
 -- outer select - numRows: 2 rawDataSize: 186
-explain extended select x,"hello" from (select i1 as x, unbase64("0xe23") as ub from alltypes_orc limit 10) temp;
+explain select x,"hello" from (select i1 as x, unbase64("0xe23") as ub from alltypes_orc limit 10) temp;
 
 -- inner select -  numRows: 2 rawDataSize: 186
 -- middle select - numRows: 2 rawDataSize: 178
 -- outer select -  numRows: 2 rawDataSize: 194
-explain extended select h, 11.0 from (select hell as h from (select i1, "hello" as hell from alltypes_orc limit 10) in1 limit 10) in2;
+explain select h, 11.0 from (select hell as h from (select i1, "hello" as hell from alltypes_orc limit 10) in1 limit 10) in2;
 
 -- This test is for FILTER operator where filter expression is a boolean column
 -- numRows: 2 rawDataSize: 8
-explain extended select bo1 from alltypes_orc where bo1;
+explain select bo1 from alltypes_orc where bo1;
 
 -- numRows: 0 rawDataSize: 0
-explain extended select bo1 from alltypes_orc where !bo1;
+explain select bo1 from alltypes_orc where !bo1;

Modified: hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_table.q
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_table.q?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_table.q (original)
+++ hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_table.q Thu Aug  7 00:21:45 2014
@@ -10,7 +10,7 @@ create table if not exists emp_orc like 
 alter table emp_orc set fileformat orc;
 
 -- basicStatState: NONE colStatState: NONE
-explain extended select * from emp_orc;
+explain select * from emp_orc;
 
 LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging;
 
@@ -19,35 +19,35 @@ insert overwrite table emp_orc select * 
 -- stats are disabled. basic stats will report the file size but not raw data size. so initial statistics will be PARTIAL
 
 -- basicStatState: PARTIAL colStatState: NONE
-explain extended select * from emp_orc;
+explain select * from emp_orc;
 
 -- table level analyze statistics
 analyze table emp_orc compute statistics;
 
 -- basicStatState: COMPLETE colStatState: NONE
-explain extended select * from emp_orc;
+explain select * from emp_orc;
 
 -- column level partial statistics
 analyze table emp_orc compute statistics for columns deptid;
 
 -- basicStatState: COMPLETE colStatState: PARTIAL
-explain extended select * from emp_orc;
+explain select * from emp_orc;
 
 -- all selected columns have statistics
 -- basicStatState: COMPLETE colStatState: COMPLETE
-explain extended select deptid from emp_orc;
+explain select deptid from emp_orc;
 
 -- column level complete statistics
 analyze table emp_orc compute statistics for columns lastname,deptid;
 
 -- basicStatState: COMPLETE colStatState: COMPLETE
-explain extended select * from emp_orc;
+explain select * from emp_orc;
 
 -- basicStatState: COMPLETE colStatState: COMPLETE
-explain extended select lastname from emp_orc;
+explain select lastname from emp_orc;
 
 -- basicStatState: COMPLETE colStatState: COMPLETE
-explain extended select deptid from emp_orc;
+explain select deptid from emp_orc;
 
 -- basicStatState: COMPLETE colStatState: COMPLETE
-explain extended select lastname,deptid from emp_orc;
+explain select lastname,deptid from emp_orc;

Modified: hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_union.q
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_union.q?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_union.q (original)
+++ hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_union.q Thu Aug  7 00:21:45 2014
@@ -17,16 +17,16 @@ insert overwrite table loc_orc select * 
 analyze table loc_orc compute statistics for columns state,locid,zip,year;
 
 -- numRows: 8 rawDataSize: 688
-explain extended select state from loc_orc;
+explain select state from loc_orc;
 
 -- numRows: 16 rawDataSize: 1376
-explain extended select * from (select state from loc_orc union all select state from loc_orc) tmp;
+explain select * from (select state from loc_orc union all select state from loc_orc) tmp;
 
 -- numRows: 8 rawDataSize: 796
-explain extended select * from loc_orc;
+explain select * from loc_orc;
 
 -- numRows: 16 rawDataSize: 1592
-explain extended select * from (select * from loc_orc union all select * from loc_orc) tmp;
+explain select * from (select * from loc_orc union all select * from loc_orc) tmp;
 
 create database test;
 use test;
@@ -49,7 +49,7 @@ analyze table loc_staging compute statis
 analyze table loc_orc compute statistics for columns state,locid,zip,year;
 
 -- numRows: 16 rawDataSize: 1376
-explain extended select * from (select state from default.loc_orc union all select state from test.loc_orc) temp;
+explain select * from (select state from default.loc_orc union all select state from test.loc_orc) temp;
 
 -- numRows: 16 rawDataSize: 1376
-explain extended select * from (select state from test.loc_staging union all select state from test.loc_orc) temp;
+explain select * from (select state from test.loc_staging union all select state from test.loc_orc) temp;

Modified: hive/branches/cbo/ql/src/test/queries/clientpositive/columnstats_partlvl.q
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/queries/clientpositive/columnstats_partlvl.q?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/test/queries/clientpositive/columnstats_partlvl.q (original)
+++ hive/branches/cbo/ql/src/test/queries/clientpositive/columnstats_partlvl.q Thu Aug  7 00:21:45 2014
@@ -30,4 +30,12 @@ explain 
 analyze table Employee_Part  compute statistics for columns;
 analyze table Employee_Part  compute statistics for columns;
 
+describe formatted Employee_Part.employeeID partition(employeeSalary=2000.0);
+describe formatted Employee_Part.employeeID partition(employeeSalary=4000.0);
+
+set hive.analyze.stmt.collect.partlevel.stats=false;
+explain 
+analyze table Employee_Part  compute statistics for columns;
+analyze table Employee_Part  compute statistics for columns;
+
 describe formatted Employee_Part.employeeID;

Modified: hive/branches/cbo/ql/src/test/queries/clientpositive/parquet_columnar.q
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/queries/clientpositive/parquet_columnar.q?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/test/queries/clientpositive/parquet_columnar.q (original)
+++ hive/branches/cbo/ql/src/test/queries/clientpositive/parquet_columnar.q Thu Aug  7 00:21:45 2014
@@ -13,15 +13,16 @@ CREATE TABLE parquet_columnar_access_sta
 
 CREATE TABLE parquet_columnar_access (
     s string,
-    i int,
+    x int,
+    y int,
     f float
   ) STORED AS PARQUET;
 
 LOAD DATA LOCAL INPATH '../../data/files/parquet_columnar.txt' OVERWRITE INTO TABLE parquet_columnar_access_stage;
 
-INSERT OVERWRITE TABLE parquet_columnar_access SELECT * FROM parquet_columnar_access_stage;
+INSERT OVERWRITE TABLE parquet_columnar_access SELECT s, i, (i + 1), f FROM parquet_columnar_access_stage;
 SELECT * FROM parquet_columnar_access;
 
-ALTER TABLE parquet_columnar_access REPLACE COLUMNS (s1 string, i1 int, f1 float);
+ALTER TABLE parquet_columnar_access REPLACE COLUMNS (s1 string, x1 int, y1 int, f1 float);
 
 SELECT * FROM parquet_columnar_access;

Modified: hive/branches/cbo/ql/src/test/queries/clientpositive/vectorization_14.q
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/queries/clientpositive/vectorization_14.q?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/test/queries/clientpositive/vectorization_14.q (original)
+++ hive/branches/cbo/ql/src/test/queries/clientpositive/vectorization_14.q Thu Aug  7 00:21:45 2014
@@ -1,4 +1,38 @@
 SET hive.vectorized.execution.enabled=true;
+
+EXPLAIN 
+SELECT   ctimestamp1,
+         cfloat,
+         cstring1,
+         cboolean1,
+         cdouble,
+         (-26.28 + cdouble),
+         (-((-26.28 + cdouble))),
+         STDDEV_SAMP((-((-26.28 + cdouble)))),
+         (cfloat * -26.28),
+         MAX(cfloat),
+         (-(cfloat)),
+         (-(MAX(cfloat))),
+         ((-((-26.28 + cdouble))) / 10.175),
+         STDDEV_POP(cfloat),
+         COUNT(cfloat),
+         (-(((-((-26.28 + cdouble))) / 10.175))),
+         (-1.389 % STDDEV_SAMP((-((-26.28 + cdouble))))),
+         (cfloat - cdouble),
+         VAR_POP(cfloat),
+         (VAR_POP(cfloat) % 10.175),
+         VAR_SAMP(cfloat),
+         (-((cfloat - cdouble)))
+FROM     alltypesorc
+WHERE    (((ctinyint <= cbigint)
+           AND ((cint <= cdouble)
+                OR (ctimestamp2 < ctimestamp1)))
+          AND ((cdouble < ctinyint)
+              AND ((cbigint > -257)
+                  OR (cfloat < cint))))
+GROUP BY ctimestamp1, cfloat, cstring1, cboolean1, cdouble
+ORDER BY cstring1, cfloat, cdouble, ctimestamp1;
+
 SELECT   ctimestamp1,
          cfloat,
          cstring1,

Modified: hive/branches/cbo/ql/src/test/queries/clientpositive/vectorization_15.q
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/queries/clientpositive/vectorization_15.q?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/test/queries/clientpositive/vectorization_15.q (original)
+++ hive/branches/cbo/ql/src/test/queries/clientpositive/vectorization_15.q Thu Aug  7 00:21:45 2014
@@ -1,4 +1,36 @@
 SET hive.vectorized.execution.enabled=true;
+
+EXPLAIN 
+SELECT   cfloat,
+         cboolean1,
+         cdouble,
+         cstring1,
+         ctinyint,
+         cint,
+         ctimestamp1,
+         STDDEV_SAMP(cfloat),
+         (-26.28 - cint),
+         MIN(cdouble),
+         (cdouble * 79.553),
+         (33 % cfloat),
+         STDDEV_SAMP(ctinyint),
+         VAR_POP(ctinyint),
+         (-23 % cdouble),
+         (-(ctinyint)),
+         VAR_SAMP(cint),
+         (cint - cfloat),
+         (-23 % ctinyint),
+         (-((-26.28 - cint))),
+         STDDEV_POP(cint)
+FROM     alltypesorc
+WHERE    (((cstring2 LIKE '%ss%')
+           OR (cstring1 LIKE '10%'))
+          OR ((cint >= -75)
+              AND ((ctinyint = csmallint)
+                   AND (cdouble >= -3728))))
+GROUP BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1
+ORDER BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1;
+
 SELECT   cfloat,
          cboolean1,
          cdouble,

Modified: hive/branches/cbo/ql/src/test/queries/clientpositive/vectorization_16.q
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/queries/clientpositive/vectorization_16.q?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/test/queries/clientpositive/vectorization_16.q (original)
+++ hive/branches/cbo/ql/src/test/queries/clientpositive/vectorization_16.q Thu Aug  7 00:21:45 2014
@@ -1,4 +1,25 @@
 SET hive.vectorized.execution.enabled=true;
+
+EXPLAIN 
+SELECT   cstring1,
+         cdouble,
+         ctimestamp1,
+         (cdouble - 9763215.5639),
+         (-((cdouble - 9763215.5639))),
+         COUNT(cdouble),
+         STDDEV_SAMP(cdouble),
+         (-(STDDEV_SAMP(cdouble))),
+         (STDDEV_SAMP(cdouble) * COUNT(cdouble)),
+         MIN(cdouble),
+         (9763215.5639 / cdouble),
+         (COUNT(cdouble) / -1.389),
+         STDDEV_SAMP(cdouble)
+FROM     alltypesorc
+WHERE    ((cstring2 LIKE '%b%')
+          AND ((cdouble >= -1.389)
+              OR (cstring1 < 'a')))
+GROUP BY cstring1, cdouble, ctimestamp1;
+
 SELECT   cstring1,
          cdouble,
          ctimestamp1,

Modified: hive/branches/cbo/ql/src/test/queries/clientpositive/vectorization_9.q
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/queries/clientpositive/vectorization_9.q?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/test/queries/clientpositive/vectorization_9.q (original)
+++ hive/branches/cbo/ql/src/test/queries/clientpositive/vectorization_9.q Thu Aug  7 00:21:45 2014
@@ -1,4 +1,25 @@
 SET hive.vectorized.execution.enabled=true;
+
+EXPLAIN 
+SELECT   cstring1,
+         cdouble,
+         ctimestamp1,
+         (cdouble - 9763215.5639),
+         (-((cdouble - 9763215.5639))),
+         COUNT(cdouble),
+         STDDEV_SAMP(cdouble),
+         (-(STDDEV_SAMP(cdouble))),
+         (STDDEV_SAMP(cdouble) * COUNT(cdouble)),
+         MIN(cdouble),
+         (9763215.5639 / cdouble),
+         (COUNT(cdouble) / -1.389),
+         STDDEV_SAMP(cdouble)
+FROM     alltypesorc
+WHERE    ((cstring2 LIKE '%b%')
+          AND ((cdouble >= -1.389)
+              OR (cstring1 < 'a')))
+GROUP BY cstring1, cdouble, ctimestamp1;
+
 SELECT   cfloat,
          cstring1,
          cint,

Modified: hive/branches/cbo/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/results/clientpositive/annotate_stats_filter.q.out?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
Files hive/branches/cbo/ql/src/test/results/clientpositive/annotate_stats_filter.q.out (original) and hive/branches/cbo/ql/src/test/results/clientpositive/annotate_stats_filter.q.out Thu Aug  7 00:21:45 2014 differ

Modified: hive/branches/cbo/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
Files hive/branches/cbo/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out (original) and hive/branches/cbo/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out Thu Aug  7 00:21:45 2014 differ

Modified: hive/branches/cbo/ql/src/test/results/clientpositive/annotate_stats_join.q.out
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/results/clientpositive/annotate_stats_join.q.out?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
Files hive/branches/cbo/ql/src/test/results/clientpositive/annotate_stats_join.q.out (original) and hive/branches/cbo/ql/src/test/results/clientpositive/annotate_stats_join.q.out Thu Aug  7 00:21:45 2014 differ