You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2009/08/19 16:26:38 UTC

svn commit: r805816 - in /hadoop/hive/branches/branch-0.4: ./ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/ ql/src/java/org/apache/hadoop/hive/ql/parse/ ql/src/java/org/apache/hadoop/hive/ql/plan/...

Author: namit
Date: Wed Aug 19 14:26:37 2009
New Revision: 805816

URL: http://svn.apache.org/viewvc?rev=805816&view=rev
Log:
HIVE-769. Fix bug in partition pruning (Zheng Shao via namit)


Modified:
    hadoop/hive/branches/branch-0.4/CHANGES.txt
    hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
    hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
    hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/parse/ASTPartitionPruner.java
    hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/parse/ErrorMsg.java
    hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
    hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeDesc.java
    hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeFieldDesc.java
    hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeFuncDesc.java
    hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeGenericFuncDesc.java
    hadoop/hive/branches/branch-0.4/ql/src/test/results/clientnegative/strict_pruning.q.out

Modified: hadoop/hive/branches/branch-0.4/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.4/CHANGES.txt?rev=805816&r1=805815&r2=805816&view=diff
==============================================================================
--- hadoop/hive/branches/branch-0.4/CHANGES.txt (original)
+++ hadoop/hive/branches/branch-0.4/CHANGES.txt Wed Aug 19 14:26:37 2009
@@ -512,6 +512,8 @@
     HIVE-772. Fix genConversionSelect for init serde with null.
     (Namit Jain via zshao)
 
+    HIVE-769. Fix bug in partition pruning (Zheng Shao via namit)
+
 Release 0.3.1 - Unreleased
 
   INCOMPATIBLE CHANGES

Modified: hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java?rev=805816&r1=805815&r2=805816&view=diff
==============================================================================
--- hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (original)
+++ hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java Wed Aug 19 14:26:37 2009
@@ -402,8 +402,12 @@
       else {
         partsList = org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner.prune(
                                                     parseCtx.getTopToTable().get(topOp), 
-                                                    parseCtx.getOpToPartPruner().get(topOp));
+                                                    parseCtx.getOpToPartPruner().get(topOp),
+                                                    opProcCtx.getConf(),
+                                                    alias_id);
       }
+    } catch (SemanticException e) {
+      throw e;
     } catch (HiveException e) {
       LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e));
       throw new SemanticException(e.getMessage(), e);

Modified: hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java?rev=805816&r1=805815&r2=805816&view=diff
==============================================================================
--- hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java (original)
+++ hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java Wed Aug 19 14:26:37 2009
@@ -21,11 +21,12 @@
 import java.util.ArrayList;
 import java.util.LinkedHashMap;
 import java.util.LinkedHashSet;
+import java.util.List;
 import java.util.Map;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.metastore.MetaStoreUtils;
+import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.Warehouse;
 import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
 import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
@@ -42,9 +43,11 @@
 import org.apache.hadoop.hive.ql.metadata.Partition;
 import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.ql.optimizer.Transform;
+import org.apache.hadoop.hive.ql.parse.ErrorMsg;
 import org.apache.hadoop.hive.ql.parse.ParseContext;
 import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.exprNodeColumnDesc;
 import org.apache.hadoop.hive.ql.plan.exprNodeDesc;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
@@ -87,7 +90,19 @@
     return pctx;
   }
 
-  public static PrunedPartitionList prune(Table tab, exprNodeDesc prunerExpr) throws HiveException {
+  /**
+   * Get the partition list for the table that satisfies the partition pruner
+   * condition.
+   * 
+   * @param tab    the table object for the alias
+   * @param prunerExpr  the pruner expression for the alias
+   * @param conf   for checking whether "strict" mode is on.
+   * @param alias  for generating error message only.
+   * @return
+   * @throws HiveException
+   */
+  public static PrunedPartitionList prune(Table tab, exprNodeDesc prunerExpr,
+      HiveConf conf, String alias) throws HiveException {
     LOG.trace("Started pruning partiton");
     LOG.trace("tabname = " + tab.getName());
     LOG.trace("prune Expression = " + prunerExpr);
@@ -121,6 +136,14 @@
           ois.add(partObjectInspector);
           StructObjectInspector rowWithPartObjectInspector = ObjectInspectorFactory.getUnionStructObjectInspector(ois);
 
+          // If the "strict" mode is on, we have to provide partition pruner for each table.  
+          if ("strict".equalsIgnoreCase(HiveConf.getVar(conf, HiveConf.ConfVars.HIVEMAPREDMODE))) {
+            if (!hasColumnExpr(prunerExpr)) {
+              throw new SemanticException(ErrorMsg.NO_PARTITION_PREDICATE.getMsg( 
+                  "for Alias \"" + alias + "\" Table \"" + tab.getName() + "\""));
+            }
+          }
+          
           // evaluate the expression tree
           if (prunerExpr != null) {
             ExprNodeEvaluator evaluator = ExprNodeEvaluatorFactory.get(prunerExpr);
@@ -152,11 +175,37 @@
       } else {
         true_parts.addAll(Hive.get().getPartitions(tab));
       }
+    } catch (HiveException e) {
+      throw e;
     } catch (Exception e) {
       throw new HiveException(e);
     }
 
     // Now return the set of partitions
     return new PrunedPartitionList(true_parts, unkn_parts, denied_parts);
-  }  
+  }
+  
+  /**
+   * Whether the expression contains a column node or not.
+   */
+  public static boolean hasColumnExpr(exprNodeDesc desc) {
+    // Return false for null 
+    if (desc == null) {
+      return false;
+    }
+    // Return true for exprNodeColumnDesc
+    if (desc instanceof exprNodeColumnDesc) {
+      return true;
+    }
+    // Return true in case one of the children is column expr.
+    List<exprNodeDesc> children = desc.getChildren();
+    for (int i = 0; i < children.size(); i++) {
+      if (hasColumnExpr(children.get(i))) {
+        return true;
+      }
+    }
+    // Return false otherwise
+    return false;
+  }
+  
 }

Modified: hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/parse/ASTPartitionPruner.java
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/parse/ASTPartitionPruner.java?rev=805816&r1=805815&r2=805816&view=diff
==============================================================================
--- hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/parse/ASTPartitionPruner.java (original)
+++ hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/parse/ASTPartitionPruner.java Wed Aug 19 14:26:37 2009
@@ -20,6 +20,7 @@
 
 import java.util.*;
 
+import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
@@ -64,6 +65,8 @@
 
   private exprNodeDesc prunerExpr;
   
+  private HiveConf conf;
+  
   // is set to true if the expression only contains partitioning columns and not any other column reference.
   // This is used to optimize select * from table where ... scenario, when the where condition only references
   // partitioning columns - the partitions are identified and streamed directly to the client without requiring 
@@ -74,11 +77,12 @@
   }
   
   /** Creates a new instance of PartitionPruner */
-  public ASTPartitionPruner(String tableAlias, QBMetaData metaData) {
+  public ASTPartitionPruner(String tableAlias, QBMetaData metaData, HiveConf conf) {
     this.tableAlias = tableAlias;
     this.metaData = metaData;
     this.tab = metaData.getTableForAlias(tableAlias);
     this.prunerExpr = null;
+    this.conf = conf;
     onlyContainsPartCols = true;
   }
 
@@ -428,7 +432,8 @@
    */
   @SuppressWarnings("nls")
   public PrunedPartitionList prune() throws HiveException {
-    return org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner.prune(this.tab, this.prunerExpr);
+    return org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner.prune(this.tab,
+        this.prunerExpr, conf, this.tableAlias);
   }
 
   public Table getTable() {

Modified: hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/parse/ErrorMsg.java
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/parse/ErrorMsg.java?rev=805816&r1=805815&r2=805816&view=diff
==============================================================================
--- hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/parse/ErrorMsg.java (original)
+++ hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/parse/ErrorMsg.java Wed Aug 19 14:26:37 2009
@@ -132,6 +132,10 @@
     return getMsg((ASTNode)tree, reason);
   }
 
+  public String getMsg(String reason) {
+    return mesg + " " + reason;
+  }
+
   public String getMsg() {
     return mesg;
   }

Modified: hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=805816&r1=805815&r2=805816&view=diff
==============================================================================
--- hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Wed Aug 19 14:26:37 2009
@@ -108,6 +108,7 @@
 import org.apache.hadoop.hive.ql.plan.tableDesc;
 import org.apache.hadoop.hive.ql.plan.tableScanDesc;
 import org.apache.hadoop.hive.ql.plan.unionDesc;
+import org.apache.hadoop.hive.ql.ppd.PredicatePushDown;
 import org.apache.hadoop.hive.ql.session.SessionState;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash;
@@ -610,7 +611,7 @@
       String alias_id = (qb.getId() == null ? alias : qb.getId() + ":" + alias);
 
       org.apache.hadoop.hive.ql.parse.ASTPartitionPruner pruner = 
-        new org.apache.hadoop.hive.ql.parse.ASTPartitionPruner(alias, qb.getMetaData());
+        new org.apache.hadoop.hive.ql.parse.ASTPartitionPruner(alias, qb.getMetaData(), conf);
       
       // Pass each where clause to the pruner
       for(String clause: qbp.getClauseNames()) {
@@ -656,21 +657,26 @@
       }
     }
 
-    for (String alias : qb.getTabAliases()) {
-      String alias_id = (qb.getId() == null ? alias : qb.getId() + ":" + alias);
-      org.apache.hadoop.hive.ql.parse.ASTPartitionPruner pruner = this.aliasToPruner.get(alias_id);
-      if (joinPartnPruner.get(alias_id) == null) {
-        // Pass each where clause to the pruner
-         for(String clause: qbp.getClauseNames()) {
-          
-           ASTNode whexp = (ASTNode)qbp.getWhrForClause(clause);
-           if (pruner.getTable().isPartitioned() &&
-               conf.getVar(HiveConf.ConfVars.HIVEMAPREDMODE).equalsIgnoreCase("strict") &&
-               (whexp == null || !pruner.hasPartitionPredicate((ASTNode)whexp.getChild(0)))) {
-             throw new SemanticException(ErrorMsg.NO_PARTITION_PREDICATE.getMsg(whexp != null ? whexp : qbp.getSelForClause(clause), 
-                                                                                " for Alias " + alias + " Table " + pruner.getTable().getName()));
+    // Do old-style partition pruner check only if the new partition pruner
+    // is not enabled.
+    if (!HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEOPTPPD)
+        || !HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEOPTPPR)) {
+      for (String alias : qb.getTabAliases()) {
+        String alias_id = (qb.getId() == null ? alias : qb.getId() + ":" + alias);
+        org.apache.hadoop.hive.ql.parse.ASTPartitionPruner pruner = this.aliasToPruner.get(alias_id);
+        if (joinPartnPruner.get(alias_id) == null) {
+          // Pass each where clause to the pruner
+           for(String clause: qbp.getClauseNames()) {
+            
+             ASTNode whexp = (ASTNode)qbp.getWhrForClause(clause);
+             if (pruner.getTable().isPartitioned() &&
+                 conf.getVar(HiveConf.ConfVars.HIVEMAPREDMODE).equalsIgnoreCase("strict") &&
+                 (whexp == null || !pruner.hasPartitionPredicate((ASTNode)whexp.getChild(0)))) {
+               throw new SemanticException(ErrorMsg.NO_PARTITION_PREDICATE.getMsg(whexp != null ? whexp : qbp.getSelForClause(clause), 
+                                                                                  " for Alias " + alias + " Table " + pruner.getTable().getName()));
+             }
            }
-         }
+        }
       }
     }
   }
@@ -4403,7 +4409,7 @@
     init(pCtx);
     qb = pCtx.getQB();
     
-    // Do any partition pruning
+    // Do any partition pruning using ASTPartitionPruner
     genPartitionPruners(qb);
     LOG.info("Completed partition pruning");
     

Modified: hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeDesc.java
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeDesc.java?rev=805816&r1=805815&r2=805816&view=diff
==============================================================================
--- hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeDesc.java (original)
+++ hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeDesc.java Wed Aug 19 14:26:37 2009
@@ -65,7 +65,7 @@
   }
   
   @Override
-  public List<? extends Node> getChildren() {
+  public List<exprNodeDesc> getChildren() {
     return null;
   }
   

Modified: hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeFieldDesc.java
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeFieldDesc.java?rev=805816&r1=805815&r2=805816&view=diff
==============================================================================
--- hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeFieldDesc.java (original)
+++ hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeFieldDesc.java Wed Aug 19 14:26:37 2009
@@ -46,8 +46,8 @@
   }
   
   @Override
-  public List<? extends Node> getChildren() {
-    List<Node> children = new ArrayList<Node>(2);
+  public List<exprNodeDesc> getChildren() {
+    List<exprNodeDesc> children = new ArrayList<exprNodeDesc>(2);
     children.add(desc);
     return children;
   }

Modified: hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeFuncDesc.java
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeFuncDesc.java?rev=805816&r1=805815&r2=805816&view=diff
==============================================================================
--- hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeFuncDesc.java (original)
+++ hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeFuncDesc.java Wed Aug 19 14:26:37 2009
@@ -75,8 +75,8 @@
     this.childExprs = children;
   }
   @Override
-  public List<? extends Node> getChildren() {
-    return (List<? extends Node>)this.childExprs;
+  public List<exprNodeDesc> getChildren() {
+    return this.childExprs;
   }
   public String toString() {
     StringBuilder sb = new StringBuilder();

Modified: hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeGenericFuncDesc.java
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeGenericFuncDesc.java?rev=805816&r1=805815&r2=805816&view=diff
==============================================================================
--- hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeGenericFuncDesc.java (original)
+++ hadoop/hive/branches/branch-0.4/ql/src/java/org/apache/hadoop/hive/ql/plan/exprNodeGenericFuncDesc.java Wed Aug 19 14:26:37 2009
@@ -64,8 +64,8 @@
     this.childExprs = children;
   }
   @Override
-  public List<? extends Node> getChildren() {
-    return (List<? extends Node>)this.childExprs;
+  public List<exprNodeDesc> getChildren() {
+    return this.childExprs;
   }
   public String toString() {
     StringBuilder sb = new StringBuilder();

Modified: hadoop/hive/branches/branch-0.4/ql/src/test/results/clientnegative/strict_pruning.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.4/ql/src/test/results/clientnegative/strict_pruning.q.out?rev=805816&r1=805815&r2=805816&view=diff
==============================================================================
--- hadoop/hive/branches/branch-0.4/ql/src/test/results/clientnegative/strict_pruning.q.out (original)
+++ hadoop/hive/branches/branch-0.4/ql/src/test/results/clientnegative/strict_pruning.q.out Wed Aug 19 14:26:37 2009
@@ -1 +1 @@
-FAILED: Error in semantic analysis: line 4:7 No Partition Predicate Found 1:  for Alias srcpart Table srcpart
+FAILED: Error in semantic analysis: No Partition Predicate Found for Alias "srcpart" Table "srcpart"