You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/08/14 17:18:10 UTC

svn commit: r1513926 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java test/queries/clientpositive/push_or.q test/results/clientpositive/push_or.q.out

Author: hashutosh
Date: Wed Aug 14 15:18:10 2013
New Revision: 1513926

URL: http://svn.apache.org/r1513926
Log:
HIVE-5047 : Hive client filters partitions incorrectly via pushdown in certain cases involving or (Sergey Shelukhin via Ashutosh Chauhan)

Added:
    hive/trunk/ql/src/test/queries/clientpositive/push_or.q
    hive/trunk/ql/src/test/results/clientpositive/push_or.q.out
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java?rev=1513926&r1=1513925&r2=1513926&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java Wed Aug 14 15:18:10 2013
@@ -201,19 +201,21 @@ public class PartitionPruner implements 
       }
     } else if (expr instanceof ExprNodeGenericFuncDesc) {
       GenericUDF udf = ((ExprNodeGenericFuncDesc)expr).getGenericUDF();
-      if (udf instanceof GenericUDFOPAnd ||
-          udf instanceof GenericUDFOPOr) {
-        List<ExprNodeDesc> children = expr.getChildren();
+      boolean isAnd = udf instanceof GenericUDFOPAnd;
+      if (isAnd || udf instanceof GenericUDFOPOr) {
+        List<ExprNodeDesc> children = ((ExprNodeGenericFuncDesc)expr).getChildren();
         ExprNodeDesc left = children.get(0);
         children.set(0, compactExpr(left));
         ExprNodeDesc right = children.get(1);
         children.set(1, compactExpr(right));
+        // Note that one does not simply compact (not-null or null) to not-null.
+        // Only if we have an "and" is it valid to send one side to metastore.
         if (children.get(0) == null && children.get(1) == null) {
           return null;
         } else if (children.get(0) == null) {
-          return children.get(1);
+          return isAnd ? children.get(1) : null;
         } else if (children.get(1) == null) {
-          return children.get(0);
+          return isAnd ? children.get(0) : null;
         }
       }
       return expr;

Added: hive/trunk/ql/src/test/queries/clientpositive/push_or.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/push_or.q?rev=1513926&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/push_or.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/push_or.q Wed Aug 14 15:18:10 2013
@@ -0,0 +1,10 @@
+drop table push_or;
+
+create table push_or (key int, value string) partitioned by (ds string);
+
+insert overwrite table push_or partition (ds='2000-04-08') select * from src where key < 20 order by key;
+insert overwrite table push_or partition (ds='2000-04-09') select * from src where key < 20 order by key;
+
+explain extended select key, value, ds from push_or where ds='2000-04-09' or key=5 order by key, ds;
+select key, value, ds from push_or where ds='2000-04-09' or key=5 order by key, ds;
+

Added: hive/trunk/ql/src/test/results/clientpositive/push_or.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/push_or.q.out?rev=1513926&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/push_or.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/push_or.q.out Wed Aug 14 15:18:10 2013
@@ -0,0 +1,245 @@
+PREHOOK: query: drop table push_or
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table push_or
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table push_or (key int, value string) partitioned by (ds string)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table push_or (key int, value string) partitioned by (ds string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@push_or
+PREHOOK: query: insert overwrite table push_or partition (ds='2000-04-08') select * from src where key < 20 order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@push_or@ds=2000-04-08
+POSTHOOK: query: insert overwrite table push_or partition (ds='2000-04-08') select * from src where key < 20 order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@push_or@ds=2000-04-08
+POSTHOOK: Lineage: push_or PARTITION(ds=2000-04-08).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: push_or PARTITION(ds=2000-04-08).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert overwrite table push_or partition (ds='2000-04-09') select * from src where key < 20 order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@push_or@ds=2000-04-09
+POSTHOOK: query: insert overwrite table push_or partition (ds='2000-04-09') select * from src where key < 20 order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@push_or@ds=2000-04-09
+POSTHOOK: Lineage: push_or PARTITION(ds=2000-04-08).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: push_or PARTITION(ds=2000-04-08).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: push_or PARTITION(ds=2000-04-09).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: push_or PARTITION(ds=2000-04-09).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: explain extended select key, value, ds from push_or where ds='2000-04-09' or key=5 order by key, ds
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended select key, value, ds from push_or where ds='2000-04-09' or key=5 order by key, ds
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: push_or PARTITION(ds=2000-04-08).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: push_or PARTITION(ds=2000-04-08).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: push_or PARTITION(ds=2000-04-09).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: push_or PARTITION(ds=2000-04-09).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME push_or))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL ds))) (TOK_WHERE (or (= (TOK_TABLE_OR_COL ds) '2000-04-09') (= (TOK_TABLE_OR_COL key) 5))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL ds)))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        push_or 
+          TableScan
+            alias: push_or
+            GatherStats: false
+            Filter Operator
+              isSamplingPred: false
+              predicate:
+                  expr: ((ds = '2000-04-09') or (key = 5))
+                  type: boolean
+              Select Operator
+                expressions:
+                      expr: key
+                      type: int
+                      expr: value
+                      type: string
+                      expr: ds
+                      type: string
+                outputColumnNames: _col0, _col1, _col2
+                Reduce Output Operator
+                  key expressions:
+                        expr: _col0
+                        type: int
+                        expr: _col2
+                        type: string
+                  sort order: ++
+                  tag: -1
+                  value expressions:
+                        expr: _col0
+                        type: int
+                        expr: _col1
+                        type: string
+                        expr: _col2
+                        type: string
+      Path -> Alias:
+#### A masked pattern was here ####
+      Path -> Partition:
+#### A masked pattern was here ####
+          Partition
+            base file name: ds=2000-04-08
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2000-04-08
+            properties:
+              bucket_count -1
+              columns key,value
+              columns.types int:string
+#### A masked pattern was here ####
+              name default.push_or
+              numFiles 1
+              numRows 20
+              partition_columns ds
+              rawDataSize 160
+              serialization.ddl struct push_or { i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 180
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.types int:string
+#### A masked pattern was here ####
+                name default.push_or
+                numFiles 2
+                numPartitions 2
+                numRows 40
+                partition_columns ds
+                rawDataSize 320
+                serialization.ddl struct push_or { i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                totalSize 360
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.push_or
+            name: default.push_or
+#### A masked pattern was here ####
+          Partition
+            base file name: ds=2000-04-09
+            input format: org.apache.hadoop.mapred.TextInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 2000-04-09
+            properties:
+              bucket_count -1
+              columns key,value
+              columns.types int:string
+#### A masked pattern was here ####
+              name default.push_or
+              numFiles 1
+              numRows 20
+              partition_columns ds
+              rawDataSize 160
+              serialization.ddl struct push_or { i32 key, string value}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              totalSize 180
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns key,value
+                columns.types int:string
+#### A masked pattern was here ####
+                name default.push_or
+                numFiles 2
+                numPartitions 2
+                numRows 40
+                partition_columns ds
+                rawDataSize 320
+                serialization.ddl struct push_or { i32 key, string value}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                totalSize 360
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.push_or
+            name: default.push_or
+      Truncated Path -> Alias:
+        /push_or/ds=2000-04-08 [push_or]
+        /push_or/ds=2000-04-09 [push_or]
+      Needs Tagging: false
+      Reduce Operator Tree:
+        Extract
+          File Output Operator
+            compressed: false
+            GlobalTableId: 0
+#### A masked pattern was here ####
+            NumFilesPerFileSink: 1
+#### A masked pattern was here ####
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                properties:
+                  columns _col0,_col1,_col2
+                  columns.types int:string:string
+                  escape.delim \
+                  hive.serialization.extend.nesting.levels true
+                  serialization.format 1
+            TotalFiles: 1
+            GatherStats: false
+            MultiFileSpray: false
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: select key, value, ds from push_or where ds='2000-04-09' or key=5 order by key, ds
+PREHOOK: type: QUERY
+PREHOOK: Input: default@push_or
+PREHOOK: Input: default@push_or@ds=2000-04-08
+PREHOOK: Input: default@push_or@ds=2000-04-09
+#### A masked pattern was here ####
+POSTHOOK: query: select key, value, ds from push_or where ds='2000-04-09' or key=5 order by key, ds
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@push_or
+POSTHOOK: Input: default@push_or@ds=2000-04-08
+POSTHOOK: Input: default@push_or@ds=2000-04-09
+#### A masked pattern was here ####
+POSTHOOK: Lineage: push_or PARTITION(ds=2000-04-08).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: push_or PARTITION(ds=2000-04-08).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: push_or PARTITION(ds=2000-04-09).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: push_or PARTITION(ds=2000-04-09).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+0	val_0	2000-04-09
+0	val_0	2000-04-09
+0	val_0	2000-04-09
+2	val_2	2000-04-09
+4	val_4	2000-04-09
+5	val_5	2000-04-08
+5	val_5	2000-04-08
+5	val_5	2000-04-08
+5	val_5	2000-04-09
+5	val_5	2000-04-09
+5	val_5	2000-04-09
+8	val_8	2000-04-09
+9	val_9	2000-04-09
+10	val_10	2000-04-09
+11	val_11	2000-04-09
+12	val_12	2000-04-09
+12	val_12	2000-04-09
+15	val_15	2000-04-09
+15	val_15	2000-04-09
+17	val_17	2000-04-09
+18	val_18	2000-04-09
+18	val_18	2000-04-09
+19	val_19	2000-04-09