You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2014/10/21 20:29:34 UTC
svn commit: r1633430 - in /hive/trunk/ql/src:
java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
test/queries/clientpositive/partition_boolexpr.q
test/results/clientpositive/partition_boolexpr.q.out
Author: hashutosh
Date: Tue Oct 21 18:29:33 2014
New Revision: 1633430
URL: http://svn.apache.org/r1633430
Log:
HIVE-6934 : PartitionPruner doesn't handle top level constant expression correctly (Hari Sankar Sivarama via Ashutosh Chauhan)
Added:
hive/trunk/ql/src/test/queries/clientpositive/partition_boolexpr.q
hive/trunk/ql/src/test/results/clientpositive/partition_boolexpr.q.out
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java?rev=1633430&r1=1633429&r2=1633430&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java Tue Oct 21 18:29:33 2014
@@ -56,7 +56,9 @@ import org.apache.hadoop.hive.ql.plan.Ex
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr;
+import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
/**
@@ -188,12 +190,18 @@ public class PartitionPruner implements
// Replace virtual columns with nulls. See javadoc for details.
prunerExpr = removeNonPartCols(prunerExpr, extractPartColNames(tab), partColsUsedInFilter);
// Remove all parts that are not partition columns. See javadoc for details.
- ExprNodeGenericFuncDesc compactExpr = (ExprNodeGenericFuncDesc)compactExpr(prunerExpr.clone());
+ ExprNodeDesc compactExpr = compactExpr(prunerExpr.clone());
String oldFilter = prunerExpr.getExprString();
- if (compactExpr == null) {
- // Non-strict mode, and all the predicates are on non-partition columns - get everything.
- LOG.debug("Filter " + oldFilter + " was null after compacting");
- return getAllPartsFromCacheOrServer(tab, key, true, prunedPartitionsMap);
+ if (isBooleanExpr(compactExpr)) {
+ // For null and true values, return every partition
+ if (!isFalseExpr(compactExpr)) {
+ // Non-strict mode, and all the predicates are on non-partition columns - get everything.
+ LOG.debug("Filter " + oldFilter + " was null after compacting");
+ return getAllPartsFromCacheOrServer(tab, key, true, prunedPartitionsMap);
+ } else {
+ return new PrunedPartitionList(tab, new LinkedHashSet<Partition>(new ArrayList<Partition>()),
+ new ArrayList<String>(), false);
+ }
}
LOG.debug("Filter w/ compacting: " + compactExpr.getExprString()
+ "; filter w/o compacting: " + oldFilter);
@@ -204,7 +212,7 @@ public class PartitionPruner implements
return ppList;
}
- ppList = getPartitionsFromServer(tab, compactExpr, conf, alias, partColsUsedInFilter, oldFilter.equals(compactExpr.getExprString()));
+ ppList = getPartitionsFromServer(tab, (ExprNodeGenericFuncDesc)compactExpr, conf, alias, partColsUsedInFilter, oldFilter.equals(compactExpr.getExprString()));
prunedPartitionsMap.put(key, ppList);
return ppList;
}
@@ -225,16 +233,22 @@ public class PartitionPruner implements
partsCache.put(key, ppList);
return ppList;
}
-
- private static ExprNodeDesc removeTruePredciates(ExprNodeDesc e) {
- if (e instanceof ExprNodeConstantDesc) {
- ExprNodeConstantDesc eC = (ExprNodeConstantDesc) e;
- if (e.getTypeInfo() == TypeInfoFactory.booleanTypeInfo
- && eC.getValue() == Boolean.TRUE) {
- return null;
- }
- }
- return e;
+
+ static private boolean isBooleanExpr(ExprNodeDesc expr) {
+ return expr != null && expr instanceof ExprNodeConstantDesc &&
+ ((ExprNodeConstantDesc)expr).getTypeInfo() instanceof PrimitiveTypeInfo &&
+ ((PrimitiveTypeInfo)(((ExprNodeConstantDesc)expr).getTypeInfo())).
+ getTypeName().equals(serdeConstants.BOOLEAN_TYPE_NAME);
+ }
+ static private boolean isTrueExpr(ExprNodeDesc expr) {
+ return isBooleanExpr(expr) &&
+ ((ExprNodeConstantDesc)expr).getValue() != null &&
+ ((ExprNodeConstantDesc)expr).getValue().equals(Boolean.TRUE);
+ }
+ static private boolean isFalseExpr(ExprNodeDesc expr) {
+ return isBooleanExpr(expr) &&
+ ((ExprNodeConstantDesc)expr).getValue() != null &&
+ ((ExprNodeConstantDesc)expr).getValue().equals(Boolean.FALSE);
}
/**
@@ -245,10 +259,13 @@ public class PartitionPruner implements
* @return partition pruning expression that only contains partition columns.
*/
static private ExprNodeDesc compactExpr(ExprNodeDesc expr) {
- if (expr instanceof ExprNodeConstantDesc) {
- expr = removeTruePredciates(expr);
- if (expr == null || ((ExprNodeConstantDesc)expr).getValue() == null) {
- return null;
+ // If this is a constant boolean expression, return the value.
+ if (expr == null) {
+ return null;
+ }
+ if (expr instanceof ExprNodeConstantDesc) {
+ if (isBooleanExpr(expr)) {
+ return expr;
} else {
throw new IllegalStateException("Unexpected non-null ExprNodeConstantDesc: "
+ expr.getExprString());
@@ -256,22 +273,29 @@ public class PartitionPruner implements
} else if (expr instanceof ExprNodeGenericFuncDesc) {
GenericUDF udf = ((ExprNodeGenericFuncDesc)expr).getGenericUDF();
boolean isAnd = udf instanceof GenericUDFOPAnd;
- if (isAnd || udf instanceof GenericUDFOPOr) {
+ boolean isOr = udf instanceof GenericUDFOPOr;
+
+ if (isAnd || isOr) {
List<ExprNodeDesc> children = expr.getChildren();
- ExprNodeDesc left = removeTruePredciates(children.get(0));
- children.set(0, left == null ? null : compactExpr(left));
- ExprNodeDesc right = removeTruePredciates(children.get(1));
- children.set(1, right == null ? null : compactExpr(right));
-
- // Note that one does not simply compact (not-null or null) to not-null.
- // Only if we have an "and" is it valid to send one side to metastore.
- if (children.get(0) == null && children.get(1) == null) {
- return null;
- } else if (children.get(0) == null) {
- return isAnd ? children.get(1) : null;
- } else if (children.get(1) == null) {
- return isAnd ? children.get(0) : null;
- }
+ ExprNodeDesc left = children.get(0);
+ children.set(0, compactExpr(left));
+ ExprNodeDesc right = children.get(1);
+ children.set(1, compactExpr(right));
+
+ if (isTrueExpr(children.get(0)) && isTrueExpr(children.get(1))) {
+ return new ExprNodeConstantDesc(Boolean.TRUE);
+ } else if (isTrueExpr(children.get(0))) {
+ return isAnd ? children.get(1) : new ExprNodeConstantDesc(Boolean.TRUE);
+ } else if (isTrueExpr(children.get(1))) {
+ return isAnd ? children.get(0) : new ExprNodeConstantDesc(Boolean.TRUE);
+ } else if (isFalseExpr(children.get(0)) && isFalseExpr(children.get(1))) {
+ return new ExprNodeConstantDesc(Boolean.FALSE);
+ } else if (isFalseExpr(children.get(0))) {
+ return isAnd ? new ExprNodeConstantDesc(Boolean.FALSE) : children.get(1);
+ } else if (isFalseExpr(children.get(1))) {
+ return isAnd ? new ExprNodeConstantDesc(Boolean.FALSE) : children.get(0);
+ }
+
}
return expr;
} else {
@@ -296,9 +320,9 @@ public class PartitionPruner implements
if (!partCols.contains(column)) {
// Column doesn't appear to be a partition column for the table.
return new ExprNodeConstantDesc(expr.getTypeInfo(), null);
- }
+ }
referred.add(column);
- }
+ }
if (expr instanceof ExprNodeGenericFuncDesc) {
List<ExprNodeDesc> children = expr.getChildren();
for (int i = 0; i < children.size(); ++i) {
Added: hive/trunk/ql/src/test/queries/clientpositive/partition_boolexpr.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/partition_boolexpr.q?rev=1633430&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/partition_boolexpr.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/partition_boolexpr.q Tue Oct 21 18:29:33 2014
@@ -0,0 +1,12 @@
+-- create testing table.
+create table part_boolexpr(key int, value string) partitioned by (dt int, ts string);
+
+-- both the below queries should return 0 rows
+select count(*) from part_boolexpr where key = 'abc';
+select * from part_boolexpr where dt = 'abc';
+explain select count(1) from srcpart where true;
+explain select count(1) from srcpart where false;
+explain select count(1) from srcpart where true and hr='11';
+explain select count(1) from srcpart where true or hr='11';
+explain select count(1) from srcpart where false or hr='11';
+explain select count(1) from srcpart where false and hr='11';
\ No newline at end of file
Added: hive/trunk/ql/src/test/results/clientpositive/partition_boolexpr.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/partition_boolexpr.q.out?rev=1633430&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/partition_boolexpr.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/partition_boolexpr.q.out Tue Oct 21 18:29:33 2014
@@ -0,0 +1,299 @@
+PREHOOK: query: -- create testing table.
+create table part_boolexpr(key int, value string) partitioned by (dt int, ts string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@part_boolexpr
+POSTHOOK: query: -- create testing table.
+create table part_boolexpr(key int, value string) partitioned by (dt int, ts string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@part_boolexpr
+PREHOOK: query: -- both the below queries should return 0 rows
+select count(*) from part_boolexpr where key = 'abc'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part_boolexpr
+#### A masked pattern was here ####
+POSTHOOK: query: -- both the below queries should return 0 rows
+select count(*) from part_boolexpr where key = 'abc'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part_boolexpr
+#### A masked pattern was here ####
+0
+PREHOOK: query: select * from part_boolexpr where dt = 'abc'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part_boolexpr
+#### A masked pattern was here ####
+POSTHOOK: query: select * from part_boolexpr where dt = 'abc'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part_boolexpr
+#### A masked pattern was here ####
+PREHOOK: query: explain select count(1) from srcpart where true
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(1) from srcpart where true
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: srcpart
+ Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(1)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain select count(1) from srcpart where false
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(1) from srcpart where false
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain select count(1) from srcpart where true and hr='11'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(1) from srcpart where true and hr='11'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: srcpart
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(1)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain select count(1) from srcpart where true or hr='11'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(1) from srcpart where true or hr='11'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: srcpart
+ Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(1)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain select count(1) from srcpart where false or hr='11'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(1) from srcpart where false or hr='11'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: srcpart
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count(1)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain select count(1) from srcpart where false and hr='11'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(1) from srcpart where false and hr='11'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+