You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/08/14 17:18:10 UTC
svn commit: r1513926 - in /hive/trunk/ql/src:
java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
test/queries/clientpositive/push_or.q
test/results/clientpositive/push_or.q.out
Author: hashutosh
Date: Wed Aug 14 15:18:10 2013
New Revision: 1513926
URL: http://svn.apache.org/r1513926
Log:
HIVE-5047 : Hive client filters partitions incorrectly via pushdown in certain cases involving or (Sergey Shelukhin via Ashutosh Chauhan)
Added:
hive/trunk/ql/src/test/queries/clientpositive/push_or.q
hive/trunk/ql/src/test/results/clientpositive/push_or.q.out
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java?rev=1513926&r1=1513925&r2=1513926&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java Wed Aug 14 15:18:10 2013
@@ -201,19 +201,21 @@ public class PartitionPruner implements
}
} else if (expr instanceof ExprNodeGenericFuncDesc) {
GenericUDF udf = ((ExprNodeGenericFuncDesc)expr).getGenericUDF();
- if (udf instanceof GenericUDFOPAnd ||
- udf instanceof GenericUDFOPOr) {
- List<ExprNodeDesc> children = expr.getChildren();
+ boolean isAnd = udf instanceof GenericUDFOPAnd;
+ if (isAnd || udf instanceof GenericUDFOPOr) {
+ List<ExprNodeDesc> children = ((ExprNodeGenericFuncDesc)expr).getChildren();
ExprNodeDesc left = children.get(0);
children.set(0, compactExpr(left));
ExprNodeDesc right = children.get(1);
children.set(1, compactExpr(right));
+ // Note that one does not simply compact (not-null or null) to not-null.
+ // Only if we have an "and" is it valid to send one side to metastore.
if (children.get(0) == null && children.get(1) == null) {
return null;
} else if (children.get(0) == null) {
- return children.get(1);
+ return isAnd ? children.get(1) : null;
} else if (children.get(1) == null) {
- return children.get(0);
+ return isAnd ? children.get(0) : null;
}
}
return expr;
Added: hive/trunk/ql/src/test/queries/clientpositive/push_or.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/push_or.q?rev=1513926&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/push_or.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/push_or.q Wed Aug 14 15:18:10 2013
@@ -0,0 +1,10 @@
+drop table push_or;
+
+create table push_or (key int, value string) partitioned by (ds string);
+
+insert overwrite table push_or partition (ds='2000-04-08') select * from src where key < 20 order by key;
+insert overwrite table push_or partition (ds='2000-04-09') select * from src where key < 20 order by key;
+
+explain extended select key, value, ds from push_or where ds='2000-04-09' or key=5 order by key, ds;
+select key, value, ds from push_or where ds='2000-04-09' or key=5 order by key, ds;
+
Added: hive/trunk/ql/src/test/results/clientpositive/push_or.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/push_or.q.out?rev=1513926&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/push_or.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/push_or.q.out Wed Aug 14 15:18:10 2013
@@ -0,0 +1,245 @@
+PREHOOK: query: drop table push_or
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table push_or
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table push_or (key int, value string) partitioned by (ds string)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table push_or (key int, value string) partitioned by (ds string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@push_or
+PREHOOK: query: insert overwrite table push_or partition (ds='2000-04-08') select * from src where key < 20 order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@push_or@ds=2000-04-08
+POSTHOOK: query: insert overwrite table push_or partition (ds='2000-04-08') select * from src where key < 20 order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@push_or@ds=2000-04-08
+POSTHOOK: Lineage: push_or PARTITION(ds=2000-04-08).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: push_or PARTITION(ds=2000-04-08).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert overwrite table push_or partition (ds='2000-04-09') select * from src where key < 20 order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@push_or@ds=2000-04-09
+POSTHOOK: query: insert overwrite table push_or partition (ds='2000-04-09') select * from src where key < 20 order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@push_or@ds=2000-04-09
+POSTHOOK: Lineage: push_or PARTITION(ds=2000-04-08).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: push_or PARTITION(ds=2000-04-08).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: push_or PARTITION(ds=2000-04-09).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: push_or PARTITION(ds=2000-04-09).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: explain extended select key, value, ds from push_or where ds='2000-04-09' or key=5 order by key, ds
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended select key, value, ds from push_or where ds='2000-04-09' or key=5 order by key, ds
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: push_or PARTITION(ds=2000-04-08).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: push_or PARTITION(ds=2000-04-08).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: push_or PARTITION(ds=2000-04-09).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: push_or PARTITION(ds=2000-04-09).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME push_or))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL ds))) (TOK_WHERE (or (= (TOK_TABLE_OR_COL ds) '2000-04-09') (= (TOK_TABLE_OR_COL key) 5))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL ds)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ push_or
+ TableScan
+ alias: push_or
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate:
+ expr: ((ds = '2000-04-09') or (key = 5))
+ type: boolean
+ Select Operator
+ expressions:
+ expr: key
+ type: int
+ expr: value
+ type: string
+ expr: ds
+ type: string
+ outputColumnNames: _col0, _col1, _col2
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: int
+ expr: _col2
+ type: string
+ sort order: ++
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: ds=2000-04-08
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2000-04-08
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.push_or
+ numFiles 1
+ numRows 20
+ partition_columns ds
+ rawDataSize 160
+ serialization.ddl struct push_or { i32 key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 180
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.push_or
+ numFiles 2
+ numPartitions 2
+ numRows 40
+ partition_columns ds
+ rawDataSize 320
+ serialization.ddl struct push_or { i32 key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 360
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.push_or
+ name: default.push_or
+#### A masked pattern was here ####
+ Partition
+ base file name: ds=2000-04-09
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 2000-04-09
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.push_or
+ numFiles 1
+ numRows 20
+ partition_columns ds
+ rawDataSize 160
+ serialization.ddl struct push_or { i32 key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 180
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns key,value
+ columns.types int:string
+#### A masked pattern was here ####
+ name default.push_or
+ numFiles 2
+ numPartitions 2
+ numRows 40
+ partition_columns ds
+ rawDataSize 320
+ serialization.ddl struct push_or { i32 key, string value}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ totalSize 360
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.push_or
+ name: default.push_or
+ Truncated Path -> Alias:
+ /push_or/ds=2000-04-08 [push_or]
+ /push_or/ds=2000-04-09 [push_or]
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Extract
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0,_col1,_col2
+ columns.types int:string:string
+ escape.delim \
+ hive.serialization.extend.nesting.levels true
+ serialization.format 1
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: select key, value, ds from push_or where ds='2000-04-09' or key=5 order by key, ds
+PREHOOK: type: QUERY
+PREHOOK: Input: default@push_or
+PREHOOK: Input: default@push_or@ds=2000-04-08
+PREHOOK: Input: default@push_or@ds=2000-04-09
+#### A masked pattern was here ####
+POSTHOOK: query: select key, value, ds from push_or where ds='2000-04-09' or key=5 order by key, ds
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@push_or
+POSTHOOK: Input: default@push_or@ds=2000-04-08
+POSTHOOK: Input: default@push_or@ds=2000-04-09
+#### A masked pattern was here ####
+POSTHOOK: Lineage: push_or PARTITION(ds=2000-04-08).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: push_or PARTITION(ds=2000-04-08).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: push_or PARTITION(ds=2000-04-09).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: push_or PARTITION(ds=2000-04-09).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+0 val_0 2000-04-09
+0 val_0 2000-04-09
+0 val_0 2000-04-09
+2 val_2 2000-04-09
+4 val_4 2000-04-09
+5 val_5 2000-04-08
+5 val_5 2000-04-08
+5 val_5 2000-04-08
+5 val_5 2000-04-09
+5 val_5 2000-04-09
+5 val_5 2000-04-09
+8 val_8 2000-04-09
+9 val_9 2000-04-09
+10 val_10 2000-04-09
+11 val_11 2000-04-09
+12 val_12 2000-04-09
+12 val_12 2000-04-09
+15 val_15 2000-04-09
+15 val_15 2000-04-09
+17 val_17 2000-04-09
+18 val_18 2000-04-09
+18 val_18 2000-04-09
+19 val_19 2000-04-09