You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2014/08/12 20:42:55 UTC

svn commit: r1617559 - in /hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq: RelOptHiveTable.java reloperators/HiveTableScanRel.java rules/HivePartitionPrunerRule.java stats/HiveRelMdDistinctRowCount.java

Author: hashutosh
Date: Tue Aug 12 18:42:54 2014
New Revision: 1617559

URL: http://svn.apache.org/r1617559
Log:
HIVE-7686 : CBO fails to retrieve stats if there are no predicates on partitioning columns of partitioned table (Ashutosh Chauhan via Harish Butani)

Modified:
    hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/RelOptHiveTable.java
    hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveTableScanRel.java
    hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePartitionPrunerRule.java
    hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/HiveRelMdDistinctRowCount.java

Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/RelOptHiveTable.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/RelOptHiveTable.java?rev=1617559&r1=1617558&r2=1617559&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/RelOptHiveTable.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/RelOptHiveTable.java Tue Aug 12 18:42:54 2014
@@ -28,6 +28,7 @@ import org.eigenbase.rel.RelNode;
 import org.eigenbase.rel.TableAccessRel;
 import org.eigenbase.relopt.RelOptAbstractTable;
 import org.eigenbase.relopt.RelOptSchema;
+import org.eigenbase.relopt.RelOptUtil.InputFinder;
 import org.eigenbase.reltype.RelDataType;
 import org.eigenbase.rex.RexNode;
 
@@ -140,16 +141,30 @@ public class RelOptHiveTable extends Rel
     return sb.toString();
   }
 
-  public void computePartitionList(HiveConf conf, RexNode pruneNode) throws HiveException {
+  public void computePartitionList(HiveConf conf, RexNode pruneNode) {
     partitionList = null;
-    if (pruneNode == null) {
+
+    if (!m_hiveTblMetadata.isPartitioned()) {
+      // no partitions for unpartitioned tables.
       return;
     }
 
-    ExprNodeDesc pruneExpr = pruneNode.accept(new ExprNodeConverter(getName(), getRowType(), true));
+    try {
+      if (pruneNode == null || InputFinder.bits(pruneNode).length() == 0 ) {
+        // there is no predicate on partitioning column, we need all partitions in this case.
+        partitionList = PartitionPruner.prune(m_hiveTblMetadata, null, conf, getName(),
+            new HashMap<String, PrunedPartitionList>());
+        return;
+      }
 
-    partitionList = PartitionPruner.prune(m_hiveTblMetadata, pruneExpr, conf, getName(),
-        new HashMap<String, PrunedPartitionList>());
+      // We have valid pruning expressions, only retrieve qualifying partitions
+      ExprNodeDesc pruneExpr = pruneNode.accept(new ExprNodeConverter(getName(), getRowType(), true));
+
+      partitionList = PartitionPruner.prune(m_hiveTblMetadata, pruneExpr, conf, getName(),
+          new HashMap<String, PrunedPartitionList>());
+    } catch (HiveException he) {
+      throw new RuntimeException(he);
+    }
   }
 
   private void updateColStats(Set<Integer> projIndxLst) {
@@ -182,8 +197,15 @@ public class RelOptHiveTable extends Rel
     if (nonPartColNamesThatRqrStats.size() > 0) {
       List<ColStatistics> hiveColStats;
 
-      // 2.1 Handle the case where we are scanning only a set of partitions
+      if (null == partitionList) {
+        // We could be here either because its an unpartitioned table or because
+        // there are no pruning predicates on a partitioned table. If its latter,
+        // we need to fetch all partitions, so do that now.
+        computePartitionList(m_hiveConf, null);
+      }
+
       if (partitionList == null) {
+        // 2.1 Handle the case for unpartitioned table.
         hiveColStats = StatsUtils.getTableColumnStats(m_hiveTblMetadata, m_hiveNonPartitionCols,
             nonPartColNamesThatRqrStats);
 
@@ -202,7 +224,7 @@ public class RelOptHiveTable extends Rel
           colNamesFailedStats.addAll(setOfFiledCols);
         }
       } else {
-        // 2.2 Obtain col stats for full table scan
+        // 2.2 Obtain col stats for partitioned table.
         try {
           Statistics stats = StatsUtils.collectStatistics(m_hiveConf, partitionList,
               m_hiveTblMetadata, m_hiveNonPartitionCols, nonPartColNamesThatRqrStats, true, true);
@@ -234,24 +256,7 @@ public class RelOptHiveTable extends Rel
     // TODO: Just using no of partitions for NDV is a gross approximation for
     // multi col partitions; Hack till HIVE-7392 gets fixed.
     if (colNamesFailedStats.isEmpty() && !partColNamesThatRqrStats.isEmpty()) {
-      if (m_numPartitions == null) {
-        try {
-          if (partitionList != null) {
-            m_numPartitions = partitionList.getPartitions().size();
-          } else {
-            m_numPartitions = Hive
-                .get()
-                .getPartitionNames(m_hiveTblMetadata.getDbName(), m_hiveTblMetadata.getTableName(),
-                    (short) -1).size();
-          }
-        } catch (HiveException e) {
-          String logMsg = "Could not get stats, number of Partitions for "
-              + m_hiveTblMetadata.getCompleteName();
-          LOG.error(logMsg);
-          throw new RuntimeException(logMsg);
-        }
-      }
-
+       m_numPartitions = partitionList.getPartitions().size();
       ColStatistics cStats = null;
       for (int i = 0; i < partColNamesThatRqrStats.size(); i++) {
         cStats = new ColStatistics(m_hiveTblMetadata.getTableName(),
@@ -295,7 +300,7 @@ public class RelOptHiveTable extends Rel
   }
 
   /*
-   * use to check if a set of columns are all partition columns. 
+   * use to check if a set of columns are all partition columns.
    * true only if:
    * - there is a prunedPartList in place
    * - all columns in BitSet are partition

Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveTableScanRel.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveTableScanRel.java?rev=1617559&r1=1617558&r2=1617559&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveTableScanRel.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/reloperators/HiveTableScanRel.java Tue Aug 12 18:42:54 2014
@@ -60,6 +60,7 @@ public class HiveTableScanRel extends Ta
 
   }
 
+  @Override
   public void implement(Implementor implementor) {
 
   }
@@ -68,8 +69,8 @@ public class HiveTableScanRel extends Ta
   public double getRows() {
     return ((RelOptHiveTable) table).getRowCount();
   }
-  
+
   public List<ColStatistics> getColStat(List<Integer> projIndxLst) {
-    return ((RelOptHiveTable) table).getColStat(projIndxLst);    
+    return ((RelOptHiveTable) table).getColStat(projIndxLst);
   }
 }
\ No newline at end of file

Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePartitionPrunerRule.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePartitionPrunerRule.java?rev=1617559&r1=1617558&r2=1617559&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePartitionPrunerRule.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePartitionPrunerRule.java Tue Aug 12 18:42:54 2014
@@ -18,14 +18,12 @@
 package org.apache.hadoop.hive.ql.optimizer.optiq.rules;
 
 import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.optimizer.optiq.RelOptHiveTable;
 import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveFilterRel;
 import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveTableScanRel;
 import org.eigenbase.rel.FilterRelBase;
 import org.eigenbase.relopt.RelOptRule;
 import org.eigenbase.relopt.RelOptRuleCall;
-import org.eigenbase.relopt.RelOptUtil.InputFinder;
 import org.eigenbase.rex.RexNode;
 import org.eigenbase.util.Pair;
 
@@ -57,15 +55,6 @@ public class HivePartitionPrunerRule ext
     RexNode remainingExpr = predicates.right;
     remainingExpr = remainingExpr == null ? filter.getCluster().getRexBuilder()
         .makeLiteral(true) : remainingExpr;
-
-    if (partColExpr == null || InputFinder.bits(partColExpr).length() == 0 ) {
-      return;
-    }
-
-    try {
-      hiveTable.computePartitionList(conf, partColExpr);
-    } catch (HiveException he) {
-      throw new RuntimeException(he);
-    }
+    hiveTable.computePartitionList(conf, partColExpr);
   }
 }

Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/HiveRelMdDistinctRowCount.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/HiveRelMdDistinctRowCount.java?rev=1617559&r1=1617558&r2=1617559&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/HiveRelMdDistinctRowCount.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/HiveRelMdDistinctRowCount.java Tue Aug 12 18:42:54 2014
@@ -93,7 +93,7 @@ public class HiveRelMdDistinctRowCount e
   }
 
   /*
-   * Favor Broad Plans over Deep Plans. 
+   * Favor Broad Plans over Deep Plans.
    */
   public RelOptCost getCumulativeCost(HiveJoinRel rel) {
     RelOptCost cost = RelMetadataQuery.getNonCumulativeCost(rel);