You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2009/06/04 06:29:56 UTC

svn commit: r781650 - in /hadoop/hive/trunk: ./ ql/src/java/org/apache/hadoop/hive/ql/ppd/ ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/

Author: namit
Date: Thu Jun  4 04:29:55 2009
New Revision: 781650

URL: http://svn.apache.org/viewvc?rev=781650&view=rev
Log:
HIVE-532. Predicates containing rand() not pushed above
(Prasad Chakka via namit)


Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
    hadoop/hive/trunk/ql/src/test/queries/clientpositive/rand_partitionpruner3.q
    hadoop/hive/trunk/ql/src/test/results/clientpositive/rand_partitionpruner3.q.out

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=781650&r1=781649&r2=781650&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Thu Jun  4 04:29:55 2009
@@ -217,6 +217,9 @@
     HIVE-479. Change RCFileOutputFormat's generic signatute.
     (Yongqiang He via zshao)
 
+    HIVE-532. Predicates containing rand() not pushed above
+    (Prasad Chakka via namit)
+
 Release 0.3.1 - Unreleased
 
   INCOMPATIBLE CHANGES

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java?rev=781650&r1=781649&r2=781650&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java Thu Jun  4 04:29:55 2009
@@ -31,7 +31,6 @@
 import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
 import org.apache.hadoop.hive.ql.parse.RowResolver;
 import org.apache.hadoop.hive.ql.plan.exprNodeDesc;
-import org.apache.hadoop.hive.ql.plan.exprNodeFuncDesc;
 
 /**
  * Context for Expression Walker for determining predicate pushdown candidates
@@ -71,6 +70,7 @@
    * this is a map from the alias to predicates.
    */
   private Map<exprNodeDesc,ExprInfo> exprInfoMap;
+  private boolean isDeterministic = true;
   
   public ExprWalkerInfo() {
     this.pushdownPreds = new HashMap<String, List<exprNodeDesc>>();
@@ -212,4 +212,19 @@
       }
     }
   }
+
+  /**
+   * sets the deterministic flag for this expression 
+   * @param b deterministic or not
+   */
+  public void setDeterministic(boolean b) {
+    isDeterministic = b;
+  }
+  
+  /**
+   * @return whether this expression is deterministic or not
+   */
+  public boolean isDeterministic() {
+    return isDeterministic;
+  }
 }

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java?rev=781650&r1=781649&r2=781650&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java Thu Jun  4 04:29:55 2009
@@ -106,6 +106,7 @@
       if(note != null && !note.deterministic()) {
         // this UDF can't be pushed down
         ctx.setIsCandidate(expr, false);
+        ctx.setDeterministic(false);
         return false;
       }
       
@@ -157,6 +158,7 @@
       if(note != null && !note.deterministic()) {
         // this GenericUDF can't be pushed down
         ctx.setIsCandidate(expr, false);
+        ctx.setDeterministic(false);
         return false;
       }
       

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java?rev=781650&r1=781649&r2=781650&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java Thu Jun  4 04:29:55 2009
@@ -167,6 +167,11 @@
       exprNodeFuncDesc predicate = (exprNodeFuncDesc) (((FilterOperator)nd).getConf()).getPredicate();
       // get pushdown predicates for this operato's predicate
       ExprWalkerInfo ewi = ExprWalkerProcFactory.extractPushdownPreds(owi, op, predicate);
+      if (!ewi.isDeterministic()) {
+        /* predicate is not deterministic */
+        return null;
+      }
+
       logExpr(nd, ewi);
       owi.putPrunedPreds(op, ewi);
       // merge it with children predicates

Modified: hadoop/hive/trunk/ql/src/test/queries/clientpositive/rand_partitionpruner3.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/rand_partitionpruner3.q?rev=781650&r1=781649&r2=781650&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/rand_partitionpruner3.q (original)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/rand_partitionpruner3.q Thu Jun  4 04:29:55 2009
@@ -1,4 +1,6 @@
+set hive.optimize.ppd=true;
 -- complex predicates in the where clause
+
 explain extended select a.* from srcpart a where rand(1) < 0.1 and a.ds = '2008-04-08' and not(key > 50 or key < 10) and a.hr like '%2';
 select a.* from srcpart a where rand(1) < 0.1 and a.ds = '2008-04-08' and not(key > 50 or key < 10) and a.hr like '%2';
 

Modified: hadoop/hive/trunk/ql/src/test/results/clientpositive/rand_partitionpruner3.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/rand_partitionpruner3.q.out?rev=781650&r1=781649&r2=781650&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/rand_partitionpruner3.q.out (original)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/rand_partitionpruner3.q.out Thu Jun  4 04:29:55 2009
@@ -1,4 +1,5 @@
 query: -- complex predicates in the where clause
+
 explain extended select a.* from srcpart a where rand(1) < 0.1 and a.ds = '2008-04-08' and not(key > 50 or key < 10) and a.hr like '%2'
 ABSTRACT SYNTAX TREE:
   (TOK_QUERY (TOK_FROM (TOK_TABREF srcpart a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF a))) (TOK_WHERE (and (and (and (< (TOK_FUNCTION rand 1) 0.1) (= (. (TOK_TABLE_OR_COL a) ds) '2008-04-08')) (not (or (> (TOK_TABLE_OR_COL key) 50) (< (TOK_TABLE_OR_COL key) 10)))) (like (. (TOK_TABLE_OR_COL a) hr) '%2')))))
@@ -29,7 +30,7 @@
                 File Output Operator
                   compressed: false
                   GlobalTableId: 0
-                  directory: /data/users/njain/hive5/hive5/ql/../build/ql/tmp/45335645/29239923.10001.insclause-0
+                  directory: file:/data/users/pchakka/workspace/oshive2/build/ql/tmp/868971644/10001
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -38,9 +39,9 @@
                         serialization.format 1
       Needs Tagging: false
       Path -> Alias:
-        file:/data/users/njain/hive5/hive5/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12 
+        file:/data/users/pchakka/workspace/oshive2/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12 
       Path -> Partition:
-        file:/data/users/njain/hive5/hive5/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12 
+        file:/data/users/pchakka/workspace/oshive2/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12 
           Partition
             partition values:
               ds 2008-04-08
@@ -59,7 +60,7 @@
                 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                 file.inputformat org.apache.hadoop.mapred.TextInputFormat
                 file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                location file:/data/users/njain/hive5/hive5/build/ql/test/data/warehouse/srcpart
+                location file:/data/users/pchakka/workspace/oshive2/build/ql/test/data/warehouse/srcpart
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: srcpart
 
@@ -70,7 +71,7 @@
 
 query: select a.* from srcpart a where rand(1) < 0.1 and a.ds = '2008-04-08' and not(key > 50 or key < 10) and a.hr like '%2'
 Input: default/srcpart/ds=2008-04-08/hr=12
-Output: /data/users/njain/hive5/hive5/ql/../build/ql/tmp/47920681/27148542.10000
+Output: file:/data/users/pchakka/workspace/oshive2/build/ql/tmp/499802678/10000
 42	val_42	2008-04-08	12
 44	val_44	2008-04-08	12
 26	val_26	2008-04-08	12
@@ -94,31 +95,35 @@
               predicate:
                   expr: (((ds = '2008-04-08') and not ((UDFToDouble(key) > UDFToDouble(50)) or (UDFToDouble(key) < UDFToDouble(10)))) and (hr like '%2'))
                   type: boolean
-              Select Operator
-                expressions:
-                      expr: key
-                      type: string
-                      expr: value
-                      type: string
-                      expr: ds
-                      type: string
-                      expr: hr
-                      type: string
-                File Output Operator
-                  compressed: false
-                  GlobalTableId: 0
-                  directory: /data/users/njain/hive5/hive5/ql/../build/ql/tmp/1286189180/888381841.10001.insclause-0
-                  table:
-                      input format: org.apache.hadoop.mapred.TextInputFormat
-                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                      properties:
-                        columns a.key,a.value,a.ds,a.hr
-                        serialization.format 1
+              Filter Operator
+                predicate:
+                    expr: (((ds = '2008-04-08') and not ((UDFToDouble(key) > UDFToDouble(50)) or (UDFToDouble(key) < UDFToDouble(10)))) and (hr like '%2'))
+                    type: boolean
+                Select Operator
+                  expressions:
+                        expr: key
+                        type: string
+                        expr: value
+                        type: string
+                        expr: ds
+                        type: string
+                        expr: hr
+                        type: string
+                  File Output Operator
+                    compressed: false
+                    GlobalTableId: 0
+                    directory: file:/data/users/pchakka/workspace/oshive2/build/ql/tmp/1606994614/10001
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        properties:
+                          columns a.key,a.value,a.ds,a.hr
+                          serialization.format 1
       Needs Tagging: false
       Path -> Alias:
-        file:/data/users/njain/hive5/hive5/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12 
+        file:/data/users/pchakka/workspace/oshive2/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12 
       Path -> Partition:
-        file:/data/users/njain/hive5/hive5/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12 
+        file:/data/users/pchakka/workspace/oshive2/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12 
           Partition
             partition values:
               ds 2008-04-08
@@ -137,7 +142,7 @@
                 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                 file.inputformat org.apache.hadoop.mapred.TextInputFormat
                 file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                location file:/data/users/njain/hive5/hive5/build/ql/test/data/warehouse/srcpart
+                location file:/data/users/pchakka/workspace/oshive2/build/ql/test/data/warehouse/srcpart
               serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
               name: srcpart
 
@@ -148,7 +153,7 @@
 
 query: select a.* from srcpart a where a.ds = '2008-04-08' and not(key > 50 or key < 10) and a.hr like '%2'
 Input: default/srcpart/ds=2008-04-08/hr=12
-Output: /data/users/njain/hive5/hive5/ql/../build/ql/tmp/249549357/307540214.10000
+Output: file:/data/users/pchakka/workspace/oshive2/build/ql/tmp/1256518849/10000
 27	val_27	2008-04-08	12
 37	val_37	2008-04-08	12
 15	val_15	2008-04-08	12