You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2009/06/04 06:29:56 UTC
svn commit: r781650 - in /hadoop/hive/trunk: ./
ql/src/java/org/apache/hadoop/hive/ql/ppd/
ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/
Author: namit
Date: Thu Jun 4 04:29:55 2009
New Revision: 781650
URL: http://svn.apache.org/viewvc?rev=781650&view=rev
Log:
HIVE-532. Predicates containing rand() not pushed above
(Prasad Chakka via namit)
Modified:
hadoop/hive/trunk/CHANGES.txt
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
hadoop/hive/trunk/ql/src/test/queries/clientpositive/rand_partitionpruner3.q
hadoop/hive/trunk/ql/src/test/results/clientpositive/rand_partitionpruner3.q.out
Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=781650&r1=781649&r2=781650&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Thu Jun 4 04:29:55 2009
@@ -217,6 +217,9 @@
HIVE-479. Change RCFileOutputFormat's generic signatute.
(Yongqiang He via zshao)
+ HIVE-532. Predicates containing rand() not pushed above
+ (Prasad Chakka via namit)
+
Release 0.3.1 - Unreleased
INCOMPATIBLE CHANGES
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java?rev=781650&r1=781649&r2=781650&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java Thu Jun 4 04:29:55 2009
@@ -31,7 +31,6 @@
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
import org.apache.hadoop.hive.ql.parse.RowResolver;
import org.apache.hadoop.hive.ql.plan.exprNodeDesc;
-import org.apache.hadoop.hive.ql.plan.exprNodeFuncDesc;
/**
* Context for Expression Walker for determining predicate pushdown candidates
@@ -71,6 +70,7 @@
* this is a map from the alias to predicates.
*/
private Map<exprNodeDesc,ExprInfo> exprInfoMap;
+ private boolean isDeterministic = true;
public ExprWalkerInfo() {
this.pushdownPreds = new HashMap<String, List<exprNodeDesc>>();
@@ -212,4 +212,19 @@
}
}
}
+
+ /**
+ * sets the deterministic flag for this expression
+ * @param b deterministic or not
+ */
+ public void setDeterministic(boolean b) {
+ isDeterministic = b;
+ }
+
+ /**
+ * @return whether this expression is deterministic or not
+ */
+ public boolean isDeterministic() {
+ return isDeterministic;
+ }
}
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java?rev=781650&r1=781649&r2=781650&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java Thu Jun 4 04:29:55 2009
@@ -106,6 +106,7 @@
if(note != null && !note.deterministic()) {
// this UDF can't be pushed down
ctx.setIsCandidate(expr, false);
+ ctx.setDeterministic(false);
return false;
}
@@ -157,6 +158,7 @@
if(note != null && !note.deterministic()) {
// this GenericUDF can't be pushed down
ctx.setIsCandidate(expr, false);
+ ctx.setDeterministic(false);
return false;
}
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java?rev=781650&r1=781649&r2=781650&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java Thu Jun 4 04:29:55 2009
@@ -167,6 +167,11 @@
exprNodeFuncDesc predicate = (exprNodeFuncDesc) (((FilterOperator)nd).getConf()).getPredicate();
// get pushdown predicates for this operato's predicate
ExprWalkerInfo ewi = ExprWalkerProcFactory.extractPushdownPreds(owi, op, predicate);
+ if (!ewi.isDeterministic()) {
+ /* predicate is not deterministic */
+ return null;
+ }
+
logExpr(nd, ewi);
owi.putPrunedPreds(op, ewi);
// merge it with children predicates
Modified: hadoop/hive/trunk/ql/src/test/queries/clientpositive/rand_partitionpruner3.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/rand_partitionpruner3.q?rev=781650&r1=781649&r2=781650&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/rand_partitionpruner3.q (original)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/rand_partitionpruner3.q Thu Jun 4 04:29:55 2009
@@ -1,4 +1,6 @@
+set hive.optimize.ppd=true;
-- complex predicates in the where clause
+
explain extended select a.* from srcpart a where rand(1) < 0.1 and a.ds = '2008-04-08' and not(key > 50 or key < 10) and a.hr like '%2';
select a.* from srcpart a where rand(1) < 0.1 and a.ds = '2008-04-08' and not(key > 50 or key < 10) and a.hr like '%2';
Modified: hadoop/hive/trunk/ql/src/test/results/clientpositive/rand_partitionpruner3.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/rand_partitionpruner3.q.out?rev=781650&r1=781649&r2=781650&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/rand_partitionpruner3.q.out (original)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/rand_partitionpruner3.q.out Thu Jun 4 04:29:55 2009
@@ -1,4 +1,5 @@
query: -- complex predicates in the where clause
+
explain extended select a.* from srcpart a where rand(1) < 0.1 and a.ds = '2008-04-08' and not(key > 50 or key < 10) and a.hr like '%2'
ABSTRACT SYNTAX TREE:
(TOK_QUERY (TOK_FROM (TOK_TABREF srcpart a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF a))) (TOK_WHERE (and (and (and (< (TOK_FUNCTION rand 1) 0.1) (= (. (TOK_TABLE_OR_COL a) ds) '2008-04-08')) (not (or (> (TOK_TABLE_OR_COL key) 50) (< (TOK_TABLE_OR_COL key) 10)))) (like (. (TOK_TABLE_OR_COL a) hr) '%2')))))
@@ -29,7 +30,7 @@
File Output Operator
compressed: false
GlobalTableId: 0
- directory: /data/users/njain/hive5/hive5/ql/../build/ql/tmp/45335645/29239923.10001.insclause-0
+ directory: file:/data/users/pchakka/workspace/oshive2/build/ql/tmp/868971644/10001
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -38,9 +39,9 @@
serialization.format 1
Needs Tagging: false
Path -> Alias:
- file:/data/users/njain/hive5/hive5/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12
+ file:/data/users/pchakka/workspace/oshive2/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12
Path -> Partition:
- file:/data/users/njain/hive5/hive5/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12
+ file:/data/users/pchakka/workspace/oshive2/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12
Partition
partition values:
ds 2008-04-08
@@ -59,7 +60,7 @@
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- location file:/data/users/njain/hive5/hive5/build/ql/test/data/warehouse/srcpart
+ location file:/data/users/pchakka/workspace/oshive2/build/ql/test/data/warehouse/srcpart
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: srcpart
@@ -70,7 +71,7 @@
query: select a.* from srcpart a where rand(1) < 0.1 and a.ds = '2008-04-08' and not(key > 50 or key < 10) and a.hr like '%2'
Input: default/srcpart/ds=2008-04-08/hr=12
-Output: /data/users/njain/hive5/hive5/ql/../build/ql/tmp/47920681/27148542.10000
+Output: file:/data/users/pchakka/workspace/oshive2/build/ql/tmp/499802678/10000
42 val_42 2008-04-08 12
44 val_44 2008-04-08 12
26 val_26 2008-04-08 12
@@ -94,31 +95,35 @@
predicate:
expr: (((ds = '2008-04-08') and not ((UDFToDouble(key) > UDFToDouble(50)) or (UDFToDouble(key) < UDFToDouble(10)))) and (hr like '%2'))
type: boolean
- Select Operator
- expressions:
- expr: key
- type: string
- expr: value
- type: string
- expr: ds
- type: string
- expr: hr
- type: string
- File Output Operator
- compressed: false
- GlobalTableId: 0
- directory: /data/users/njain/hive5/hive5/ql/../build/ql/tmp/1286189180/888381841.10001.insclause-0
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- properties:
- columns a.key,a.value,a.ds,a.hr
- serialization.format 1
+ Filter Operator
+ predicate:
+ expr: (((ds = '2008-04-08') and not ((UDFToDouble(key) > UDFToDouble(50)) or (UDFToDouble(key) < UDFToDouble(10)))) and (hr like '%2'))
+ type: boolean
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ expr: ds
+ type: string
+ expr: hr
+ type: string
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ directory: file:/data/users/pchakka/workspace/oshive2/build/ql/tmp/1606994614/10001
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns a.key,a.value,a.ds,a.hr
+ serialization.format 1
Needs Tagging: false
Path -> Alias:
- file:/data/users/njain/hive5/hive5/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12
+ file:/data/users/pchakka/workspace/oshive2/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12
Path -> Partition:
- file:/data/users/njain/hive5/hive5/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12
+ file:/data/users/pchakka/workspace/oshive2/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12
Partition
partition values:
ds 2008-04-08
@@ -137,7 +142,7 @@
serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
file.inputformat org.apache.hadoop.mapred.TextInputFormat
file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- location file:/data/users/njain/hive5/hive5/build/ql/test/data/warehouse/srcpart
+ location file:/data/users/pchakka/workspace/oshive2/build/ql/test/data/warehouse/srcpart
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: srcpart
@@ -148,7 +153,7 @@
query: select a.* from srcpart a where a.ds = '2008-04-08' and not(key > 50 or key < 10) and a.hr like '%2'
Input: default/srcpart/ds=2008-04-08/hr=12
-Output: /data/users/njain/hive5/hive5/ql/../build/ql/tmp/249549357/307540214.10000
+Output: file:/data/users/pchakka/workspace/oshive2/build/ql/tmp/1256518849/10000
27 val_27 2008-04-08 12
37 val_37 2008-04-08 12
15 val_15 2008-04-08 12