You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2015/10/12 19:51:10 UTC
[02/10] hive git commit: HIVE-12021: HivePreFilteringRule may introduce wrong common operands (Jesus Camacho Rodriguez, reviewed by Laljo John Pullokkaran)

HIVE-12021: HivePreFilteringRule may introduce wrong common operands (Jesus Camacho Rodriguez, reviewed by Laljo John Pullokkaran)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/be05e32e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/be05e32e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/be05e32e

Branch: refs/heads/llap
Commit: be05e32e58f54c0185551cb61d9769d2ceb5bbdd
Parents: 5201f18
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Sun Oct 4 11:40:30 2015 +0100
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Fri Oct 9 09:04:49 2015 +0100

----------------------------------------------------------------------
 .../calcite/rules/HivePreFilteringRule.java     | 58 ++++++++------
 .../clientpositive/filter_cond_pushdown.q       |  5 ++
 .../clientpositive/filter_cond_pushdown.q.out   | 80 ++++++++++++++++++++
 3 files changed, 121 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/be05e32e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java
index 349c7f8..5824127 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java
@@ -47,6 +47,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.LinkedHashMultimap;
 import com.google.common.collect.Multimap;
+import com.google.common.collect.Sets;
 
 
 public class HivePreFilteringRule extends RelOptRule {
@@ -178,54 +179,67 @@ public class HivePreFilteringRule extends RelOptRule {
     assert condition.getKind() == SqlKind.OR;
     Multimap<String,RexNode> reductionCondition = LinkedHashMultimap.create();
 
+    // Data structure to control whether a certain reference is present in every operand
+    Set<String> refsInAllOperands = null;
+
     // 1. We extract the information necessary to create the predicate for the new
     //    filter; currently we support comparison functions, in and between
     ImmutableList<RexNode> operands = RexUtil.flattenOr(((RexCall) condition).getOperands());
-    for (RexNode operand: operands) {
+    for (int i = 0; i < operands.size(); i++) {
+      final RexNode operand = operands.get(i);
+
       final RexNode operandCNF = RexUtil.toCnf(rexBuilder, operand);
       final List<RexNode> conjunctions = RelOptUtil.conjunctions(operandCNF);
-      boolean addedToReductionCondition = false; // Flag to control whether we have added a new factor
-                                                 // to the reduction predicate
+
+      Set<String> refsInCurrentOperand = Sets.newHashSet();
       for (RexNode conjunction: conjunctions) {
+        // We do not know what it is, we bail out for safety
         if (!(conjunction instanceof RexCall)) {
-          continue;
+          return new ArrayList<>();
         }
         RexCall conjCall = (RexCall) conjunction;
+        RexNode ref = null;
         if(COMPARISON.contains(conjCall.getOperator().getKind())) {
           if (conjCall.operands.get(0) instanceof RexInputRef &&
                   conjCall.operands.get(1) instanceof RexLiteral) {
-            reductionCondition.put(conjCall.operands.get(0).toString(),
-                    conjCall);
-            addedToReductionCondition = true;
+            ref = conjCall.operands.get(0);
           } else if (conjCall.operands.get(1) instanceof RexInputRef &&
                   conjCall.operands.get(0) instanceof RexLiteral) {
-            reductionCondition.put(conjCall.operands.get(1).toString(),
-                    conjCall);
-            addedToReductionCondition = true;
+            ref = conjCall.operands.get(1);
+          } else {
+            // We do not know what it is, we bail out for safety
+            return new ArrayList<>();
           }
         } else if(conjCall.getOperator().getKind().equals(SqlKind.IN)) {
-          reductionCondition.put(conjCall.operands.get(0).toString(),
-                  conjCall);
-          addedToReductionCondition = true;
+          ref = conjCall.operands.get(0);
         } else if(conjCall.getOperator().getKind().equals(SqlKind.BETWEEN)) {
-          reductionCondition.put(conjCall.operands.get(1).toString(),
-                  conjCall);
-          addedToReductionCondition = true;
+          ref = conjCall.operands.get(1);
+        } else {
+          // We do not know what it is, we bail out for safety
+          return new ArrayList<>();
         }
+
+        String stringRef = ref.toString();
+        reductionCondition.put(stringRef, conjCall);
+        refsInCurrentOperand.add(stringRef);
       }
 
-      // If we did not add any factor, we can bail out
-      if (!addedToReductionCondition) {
+      // Updates the references that are present in every operand up till now
+      if (i == 0) {
+        refsInAllOperands = refsInCurrentOperand;
+      } else {
+        refsInAllOperands = Sets.intersection(refsInAllOperands, refsInCurrentOperand);
+      }
+      // If we did not add any factor or there are no common factors, we can bail out
+      if (refsInAllOperands.isEmpty()) {
         return new ArrayList<>();
       }
     }
 
     // 2. We gather the common factors and return them
     List<RexNode> commonOperands = new ArrayList<>();
-    for (Entry<String,Collection<RexNode>> pair : reductionCondition.asMap().entrySet()) {
-      if (pair.getValue().size() == operands.size()) {
-        commonOperands.add(RexUtil.composeDisjunction(rexBuilder, pair.getValue(), false));
-      }
+    for (String ref : refsInAllOperands) {
+      commonOperands.add(RexUtil.composeDisjunction(rexBuilder, reductionCondition.get(ref), false));
     }
     return commonOperands;
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/be05e32e/ql/src/test/queries/clientpositive/filter_cond_pushdown.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/filter_cond_pushdown.q b/ql/src/test/queries/clientpositive/filter_cond_pushdown.q
index 5e23b71..2425706 100644
--- a/ql/src/test/queries/clientpositive/filter_cond_pushdown.q
+++ b/ql/src/test/queries/clientpositive/filter_cond_pushdown.q
@@ -17,3 +17,8 @@ JOIN (
   JOIN (SELECT * FROM cbo_t3 t3 WHERE c_int=1) t3 ON t2.key=t3.c_int
   WHERE ((t2.key=t3.key) AND (t2.c_float + t3.c_float > 2)) OR
       ((t2.key=t3.key) AND (t2.c_int + t3.c_int > 2))) t4 ON t1.key=t4.key;
+
+EXPLAIN
+SELECT f.key, f.value, m.value
+FROM src f JOIN src m ON(f.key = m.key AND m.value is not null AND m.value !='')
+WHERE (f.value IN ('2008-04-08','2008-04-10') AND f.value IN ('2008-04-08','2008-04-09') AND m.value='2008-04-10') OR (m.value='2008-04-08');

http://git-wip-us.apache.org/repos/asf/hive/blob/be05e32e/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out b/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out
index af42d5c..99eb3f7 100644
--- a/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out
+++ b/ql/src/test/results/clientpositive/filter_cond_pushdown.q.out
@@ -380,3 +380,83 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+PREHOOK: query: EXPLAIN
+SELECT f.key, f.value, m.value
+FROM src f JOIN src m ON(f.key = m.key AND m.value is not null AND m.value !='')
+WHERE (f.value IN ('2008-04-08','2008-04-10') AND f.value IN ('2008-04-08','2008-04-09') AND m.value='2008-04-10') OR (m.value='2008-04-08')
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT f.key, f.value, m.value
+FROM src f JOIN src m ON(f.key = m.key AND m.value is not null AND m.value !='')
+WHERE (f.value IN ('2008-04-08','2008-04-10') AND f.value IN ('2008-04-08','2008-04-09') AND m.value='2008-04-10') OR (m.value='2008-04-08')
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: f
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: key is not null (type: boolean)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: key (type: string), value (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col1 (type: string)
+          TableScan
+            alias: f
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (((((value = '2008-04-10') or (value = '2008-04-08')) and value is not null) and (value <> '')) and key is not null) (type: boolean)
+              Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: key (type: string), value (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: string)
+                  Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col1 (type: string)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col0 (type: string)
+            1 _col0 (type: string)
+          outputColumnNames: _col0, _col1, _col3
+          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+          Filter Operator
+            predicate: (((_col1) IN ('2008-04-08', '2008-04-10') and (_col1) IN ('2008-04-08', '2008-04-09') and (_col3 = '2008-04-10')) or (_col3 = '2008-04-08')) (type: boolean)
+            Statistics: Num rows: 171 Data size: 1816 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string)
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 171 Data size: 1816 Basic stats: COMPLETE Column stats: NONE
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 171 Data size: 1816 Basic stats: COMPLETE Column stats: NONE
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+