You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2014/10/21 20:29:34 UTC

svn commit: r1633430 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java test/queries/clientpositive/partition_boolexpr.q test/results/clientpositive/partition_boolexpr.q.out

Author: hashutosh
Date: Tue Oct 21 18:29:33 2014
New Revision: 1633430

URL: http://svn.apache.org/r1633430
Log:
HIVE-6934 : PartitionPruner doesn't handle top level constant expression correctly (Hari Sankar Sivarama via Ashutosh Chauhan)

Added:
    hive/trunk/ql/src/test/queries/clientpositive/partition_boolexpr.q
    hive/trunk/ql/src/test/results/clientpositive/partition_boolexpr.q.out
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java?rev=1633430&r1=1633429&r2=1633430&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java Tue Oct 21 18:29:33 2014
@@ -56,7 +56,9 @@ import org.apache.hadoop.hive.ql.plan.Ex
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr;
+import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
 
 /**
@@ -188,12 +190,18 @@ public class PartitionPruner implements 
     // Replace virtual columns with nulls. See javadoc for details.
     prunerExpr = removeNonPartCols(prunerExpr, extractPartColNames(tab), partColsUsedInFilter);
     // Remove all parts that are not partition columns. See javadoc for details.
-    ExprNodeGenericFuncDesc compactExpr = (ExprNodeGenericFuncDesc)compactExpr(prunerExpr.clone());
+    ExprNodeDesc compactExpr = compactExpr(prunerExpr.clone());
     String oldFilter = prunerExpr.getExprString();
-    if (compactExpr == null) {
-      // Non-strict mode, and all the predicates are on non-partition columns - get everything.
-      LOG.debug("Filter " + oldFilter + " was null after compacting");
-      return getAllPartsFromCacheOrServer(tab, key, true, prunedPartitionsMap);
+    if (isBooleanExpr(compactExpr)) {
+    	// For null and true values, return every partition
+    	if (!isFalseExpr(compactExpr)) {
+    		// Non-strict mode, and all the predicates are on non-partition columns - get everything.
+    		LOG.debug("Filter " + oldFilter + " was null after compacting");
+    		return getAllPartsFromCacheOrServer(tab, key, true, prunedPartitionsMap);
+    	} else {
+    		return new PrunedPartitionList(tab, new LinkedHashSet<Partition>(new ArrayList<Partition>()),
+    				new ArrayList<String>(), false);
+    	}
     }
     LOG.debug("Filter w/ compacting: " + compactExpr.getExprString()
         + "; filter w/o compacting: " + oldFilter);
@@ -204,7 +212,7 @@ public class PartitionPruner implements 
       return ppList;
     }
 
-    ppList = getPartitionsFromServer(tab, compactExpr, conf, alias, partColsUsedInFilter, oldFilter.equals(compactExpr.getExprString()));
+    ppList = getPartitionsFromServer(tab, (ExprNodeGenericFuncDesc)compactExpr, conf, alias, partColsUsedInFilter, oldFilter.equals(compactExpr.getExprString()));
     prunedPartitionsMap.put(key, ppList);
     return ppList;
   }
@@ -225,16 +233,22 @@ public class PartitionPruner implements 
     partsCache.put(key, ppList);
     return ppList;
   }
-
-  private static ExprNodeDesc removeTruePredciates(ExprNodeDesc e) {
-    if (e instanceof ExprNodeConstantDesc) {
-      ExprNodeConstantDesc eC = (ExprNodeConstantDesc) e;
-      if (e.getTypeInfo() == TypeInfoFactory.booleanTypeInfo
-          && eC.getValue() == Boolean.TRUE) {
-        return null;
-      }
-    }
-    return e;
+  
+  static private boolean isBooleanExpr(ExprNodeDesc expr) {
+	  return  expr != null && expr instanceof ExprNodeConstantDesc && 
+              ((ExprNodeConstantDesc)expr).getTypeInfo() instanceof PrimitiveTypeInfo &&
+              ((PrimitiveTypeInfo)(((ExprNodeConstantDesc)expr).getTypeInfo())).
+              getTypeName().equals(serdeConstants.BOOLEAN_TYPE_NAME);	  
+  }
+  static private boolean isTrueExpr(ExprNodeDesc expr) {
+      return  isBooleanExpr(expr) &&  
+              ((ExprNodeConstantDesc)expr).getValue() != null &&
+              ((ExprNodeConstantDesc)expr).getValue().equals(Boolean.TRUE);
+  }
+  static private boolean isFalseExpr(ExprNodeDesc expr) {
+      return  isBooleanExpr(expr) && 
+              ((ExprNodeConstantDesc)expr).getValue() != null &&
+              ((ExprNodeConstantDesc)expr).getValue().equals(Boolean.FALSE);	  
   }
 
   /**
@@ -245,10 +259,13 @@ public class PartitionPruner implements 
    * @return partition pruning expression that only contains partition columns.
    */
   static private ExprNodeDesc compactExpr(ExprNodeDesc expr) {
-    if (expr instanceof ExprNodeConstantDesc) {
-      expr = removeTruePredciates(expr);
-      if (expr == null || ((ExprNodeConstantDesc)expr).getValue() == null) {
-        return null;
+    // If this is a constant boolean expression, return the value.
+	if (expr == null) {
+		return null;
+	}
+	if (expr instanceof ExprNodeConstantDesc) {
+      if (isBooleanExpr(expr)) {
+        return expr;
       } else {
         throw new IllegalStateException("Unexpected non-null ExprNodeConstantDesc: "
           + expr.getExprString());
@@ -256,22 +273,29 @@ public class PartitionPruner implements 
     } else if (expr instanceof ExprNodeGenericFuncDesc) {
       GenericUDF udf = ((ExprNodeGenericFuncDesc)expr).getGenericUDF();
       boolean isAnd = udf instanceof GenericUDFOPAnd;
-      if (isAnd || udf instanceof GenericUDFOPOr) {
+      boolean isOr = udf instanceof GenericUDFOPOr;
+      
+      if (isAnd || isOr) {
         List<ExprNodeDesc> children = expr.getChildren();
-        ExprNodeDesc left = removeTruePredciates(children.get(0));
-        children.set(0, left == null ? null : compactExpr(left));
-        ExprNodeDesc right = removeTruePredciates(children.get(1));
-        children.set(1, right == null ? null : compactExpr(right));
-
-        // Note that one does not simply compact (not-null or null) to not-null.
-        // Only if we have an "and" is it valid to send one side to metastore.
-        if (children.get(0) == null && children.get(1) == null) {
-          return null;
-        } else if (children.get(0) == null) {
-          return isAnd ? children.get(1) : null;
-        } else if (children.get(1) == null) {
-          return isAnd ? children.get(0) : null;
-        }
+        ExprNodeDesc left = children.get(0);
+        children.set(0, compactExpr(left));
+        ExprNodeDesc right = children.get(1);
+        children.set(1, compactExpr(right));
+
+        if (isTrueExpr(children.get(0)) && isTrueExpr(children.get(1))) {
+        	return new ExprNodeConstantDesc(Boolean.TRUE);
+        } else if (isTrueExpr(children.get(0)))  {
+        	return isAnd ? children.get(1) :  new ExprNodeConstantDesc(Boolean.TRUE);
+        } else if (isTrueExpr(children.get(1))) {
+        	return isAnd ? children.get(0) : new ExprNodeConstantDesc(Boolean.TRUE);
+        } else if (isFalseExpr(children.get(0)) && isFalseExpr(children.get(1))) {
+        	return new ExprNodeConstantDesc(Boolean.FALSE);
+        } else if (isFalseExpr(children.get(0)))  {
+            return isAnd ? new ExprNodeConstantDesc(Boolean.FALSE) : children.get(1);
+        } else if (isFalseExpr(children.get(1))) {
+            return isAnd ? new ExprNodeConstantDesc(Boolean.FALSE) : children.get(0);
+        } 
+        
       }
       return expr;
     } else {
@@ -296,9 +320,9 @@ public class PartitionPruner implements 
       if (!partCols.contains(column)) {
         // Column doesn't appear to be a partition column for the table.
         return new ExprNodeConstantDesc(expr.getTypeInfo(), null);
-      }
+      } 
       referred.add(column);
-    }
+    }	        
     if (expr instanceof ExprNodeGenericFuncDesc) {
       List<ExprNodeDesc> children = expr.getChildren();
       for (int i = 0; i < children.size(); ++i) {

Added: hive/trunk/ql/src/test/queries/clientpositive/partition_boolexpr.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/partition_boolexpr.q?rev=1633430&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/partition_boolexpr.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/partition_boolexpr.q Tue Oct 21 18:29:33 2014
@@ -0,0 +1,12 @@
+-- create testing table.
+create table part_boolexpr(key int, value string) partitioned by (dt int, ts string);
+
+-- both the below queries should return 0 rows 
+select count(*) from part_boolexpr where key = 'abc';
+select * from part_boolexpr where dt = 'abc';
+explain select count(1) from srcpart where true;
+explain select count(1) from srcpart where false;
+explain select count(1) from srcpart where true and hr='11';
+explain select count(1) from srcpart where true or hr='11';
+explain select count(1) from srcpart where false or hr='11';
+explain select count(1) from srcpart where false and hr='11';
\ No newline at end of file

Added: hive/trunk/ql/src/test/results/clientpositive/partition_boolexpr.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/partition_boolexpr.q.out?rev=1633430&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/partition_boolexpr.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/partition_boolexpr.q.out Tue Oct 21 18:29:33 2014
@@ -0,0 +1,299 @@
+PREHOOK: query: -- create testing table.
+create table part_boolexpr(key int, value string) partitioned by (dt int, ts string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@part_boolexpr
+POSTHOOK: query: -- create testing table.
+create table part_boolexpr(key int, value string) partitioned by (dt int, ts string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@part_boolexpr
+PREHOOK: query: -- both the below queries should return 0 rows 
+select count(*) from part_boolexpr where key = 'abc'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part_boolexpr
+#### A masked pattern was here ####
+POSTHOOK: query: -- both the below queries should return 0 rows 
+select count(*) from part_boolexpr where key = 'abc'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part_boolexpr
+#### A masked pattern was here ####
+0
+PREHOOK: query: select * from part_boolexpr where dt = 'abc'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part_boolexpr
+#### A masked pattern was here ####
+POSTHOOK: query: select * from part_boolexpr where dt = 'abc'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part_boolexpr
+#### A masked pattern was here ####
+PREHOOK: query: explain select count(1) from srcpart where true
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(1) from srcpart where true
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: srcpart
+            Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: count(1)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: bigint)
+            outputColumnNames: _col0
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain select count(1) from srcpart where false
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(1) from srcpart where false
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: bigint)
+            outputColumnNames: _col0
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain select count(1) from srcpart where true and hr='11'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(1) from srcpart where true and hr='11'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: srcpart
+            Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: count(1)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: bigint)
+            outputColumnNames: _col0
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain select count(1) from srcpart where true or hr='11'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(1) from srcpart where true or hr='11'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: srcpart
+            Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: count(1)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: bigint)
+            outputColumnNames: _col0
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain select count(1) from srcpart where false or hr='11'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(1) from srcpart where false or hr='11'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: srcpart
+            Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+              Group By Operator
+                aggregations: count(1)
+                mode: hash
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: bigint)
+            outputColumnNames: _col0
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain select count(1) from srcpart where false and hr='11'
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(1) from srcpart where false and hr='11'
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: bigint)
+            outputColumnNames: _col0
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+