You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2020/04/10 01:57:57 UTC

[impala] branch master updated: IMPALA-9620: Ensure group-by and cnf exprs are analyzed

This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new 293dc2e  IMPALA-9620: Ensure group-by and cnf exprs are analyzed
293dc2e is described below

commit 293dc2ec92d0cadf1d3803a22e66e762d0ff6cf1
Author: Aman Sinha <am...@cloudera.com>
AuthorDate: Wed Apr 8 17:47:18 2020 -0700

    IMPALA-9620: Ensure group-by and cnf exprs are analyzed
    
    This change initializes the SelectStmt's groupingExprs_
    with the analyzed version. It also analyzes the new predicates
    created by the Conjunctive Normal Form rewrite rule such that
    potential consumers of this rewrite don't encounter problems.
    
    Before this change, the SelectStmt.analyzeGroupingExprs() made
    a deep copy of the original grouping exprs, then analyzed the
    copy but left the original intact. This causes problems because
    a rewrite rule (invoked by SelectStmt.rewriteExprs()) may try to
    process the original grouping exprs and encounter INVALID_TYPE
    (types are only assigned after analyze). This was the root cause
    of the problem described in the JIRA. Although this was a pre-
    existing behavior, it gets exposed when enable_cnf_rewrites=true.
    Note that the deep-copied analyzed grouping exprs are supplied
    to MultiAggregateInfo and since many operations are using
    this data structure, we don't see widespread issues.
    
    This patch fixes it and as a conservative measure, does the
    analyze of new predicates in the CNF rule. (note: there are likely
    other rewrite rules where explicit analyze should be done but
    that is outside the scope for this issue).
    
    Testing:
     - Added new unit tests with predicates in SELECT and GROUP BY
     - Ran 'mvn test' for the FE
    
    Change-Id: I6da4a17c6e648f466ce118c4646520ff68f9878e
    Reviewed-on: http://gerrit.cloudera.org:8080/15693
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../org/apache/impala/analysis/SelectStmt.java     |  2 ++
 .../apache/impala/rewrite/ConvertToCNFRule.java    | 16 +++++----
 .../queries/PlannerTest/convert-to-cnf.test        | 41 ++++++++++++++++++++++
 3 files changed, 53 insertions(+), 6 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
index 6f174d7..00323de 100644
--- a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
@@ -706,6 +706,8 @@ public class SelectStmt extends QueryStmt {
                   + groupingExprsCopy_.get(i).toSql());
         }
       }
+      // initialize groupingExprs_ with the analyzed version
+      groupingExprs_ = groupingExprsCopy_;
     }
 
     private void collectAggExprs() {
diff --git a/fe/src/main/java/org/apache/impala/rewrite/ConvertToCNFRule.java b/fe/src/main/java/org/apache/impala/rewrite/ConvertToCNFRule.java
index da8f2ac..b955a21 100644
--- a/fe/src/main/java/org/apache/impala/rewrite/ConvertToCNFRule.java
+++ b/fe/src/main/java/org/apache/impala/rewrite/ConvertToCNFRule.java
@@ -22,6 +22,7 @@ import org.apache.impala.analysis.CompoundPredicate;
 import org.apache.impala.analysis.Expr;
 import org.apache.impala.analysis.Predicate;
 import org.apache.impala.analysis.TupleId;
+import org.apache.impala.common.AnalysisException;
 
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -82,11 +83,11 @@ public class ConvertToCNFRule implements ExprRewriteRule {
   private final boolean forMultiTablesOnly_;
 
   @Override
-  public Expr apply(Expr expr, Analyzer analyzer) {
-    return convertToCNF(expr);
+  public Expr apply(Expr expr, Analyzer analyzer) throws AnalysisException {
+    return convertToCNF(expr, analyzer);
   }
 
-  private Expr convertToCNF(Expr pred) {
+  private Expr convertToCNF(Expr pred, Analyzer analyzer) throws AnalysisException {
     if (!(pred instanceof CompoundPredicate)) {
       return pred;
     }
@@ -121,13 +122,13 @@ public class ConvertToCNFRule implements ExprRewriteRule {
           // predicate: (a AND b) OR c
           // convert to (a OR c) AND (b OR c)
           return createPredAndIncrementCount(lhs.getChild(0), rhs,
-                  lhs.getChild(1), rhs);
+                  lhs.getChild(1), rhs, analyzer);
         } else if (rhs instanceof CompoundPredicate &&
             ((CompoundPredicate)rhs).getOp() == CompoundPredicate.Operator.AND) {
           // predicate: a OR (b AND c)
           // convert to (a OR b) AND (a or c)
           return createPredAndIncrementCount(lhs, rhs.getChild(0),
-                  lhs, rhs.getChild(1));
+                  lhs, rhs.getChild(1), analyzer);
         }
       } else if (cpred.getOp() == CompoundPredicate.Operator.NOT) {
         Expr child = cpred.getChild(0);
@@ -141,6 +142,7 @@ public class ConvertToCNFRule implements ExprRewriteRule {
           Expr rhs1 = new CompoundPredicate(CompoundPredicate.Operator.NOT, rhs, null);
           Predicate newPredicate =
               (CompoundPredicate) CompoundPredicate.createConjunction(lhs1, rhs1);
+          newPredicate.analyze(analyzer);
           numCnfExprs_++;
           return newPredicate;
         }
@@ -154,7 +156,8 @@ public class ConvertToCNFRule implements ExprRewriteRule {
    * the disjuncts into a top level conjunct. Increment the CNF exprs count.
    */
   private Predicate createPredAndIncrementCount(Expr first_lhs, Expr second_lhs,
-                                                Expr first_rhs, Expr second_rhs) {
+      Expr first_rhs, Expr second_rhs,
+      Analyzer analyzer) throws AnalysisException {
     List<Expr> disjuncts = Arrays.asList(first_lhs, second_lhs);
     Expr lhs1 = (CompoundPredicate)
         CompoundPredicate.createDisjunctivePredicate(disjuncts);
@@ -163,6 +166,7 @@ public class ConvertToCNFRule implements ExprRewriteRule {
         CompoundPredicate.createDisjunctivePredicate(disjuncts);
     Predicate newPredicate = (CompoundPredicate)
         CompoundPredicate.createConjunction(lhs1, rhs1);
+    newPredicate.analyze(analyzer);
     numCnfExprs_++;
     return newPredicate;
   }
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/convert-to-cnf.test b/testdata/workloads/functional-planner/queries/PlannerTest/convert-to-cnf.test
index 272a2b4..5fdc116 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/convert-to-cnf.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/convert-to-cnf.test
@@ -287,3 +287,44 @@ PLAN-ROOT SINK
    runtime filters: RF000 -> o_orderkey
    row-size=16B cardinality=1.50M
 ====
+# IMPALA-9620: query1
+# Test predicates in the SELECT and GROUP-BY
+# with enable_cnf_rewrites = true. No rewrite is expected
+# but query was failing without the patch.
+select l_quantity,
+  if(l_quantity < 5 or l_quantity > 45, 'invalid', 'valid')
+ from lineitem
+ group by l_quantity,
+  if(l_quantity < 5 or l_quantity > 45, 'invalid', 'valid')
+  limit 5
+---- QUERYOPTIONS
+ENABLE_CNF_REWRITES=true
+---- PLAN
+PLAN-ROOT SINK
+|
+01:AGGREGATE [FINALIZE]
+|  group by: l_quantity, if(l_quantity < 5 OR l_quantity > 45, 'invalid', 'valid')
+|  limit: 5
+|  row-size=20B cardinality=5
+|
+00:SCAN HDFS [tpch_parquet.lineitem]
+   HDFS partitions=1/1 files=3 size=193.99MB
+   row-size=8B cardinality=6.00M
+====
+# IMPALA-9620: query2
+select case when not (l_quantity = 5) then 0 else 1 end
+ from lineitem
+ group by case when not (l_quantity = 5) then 0 else 1 end
+---- QUERYOPTIONS
+ENABLE_CNF_REWRITES=true
+---- PLAN
+PLAN-ROOT SINK
+|
+01:AGGREGATE [FINALIZE]
+|  group by: CASE WHEN NOT (l_quantity = 5) THEN 0 ELSE 1 END
+|  row-size=1B cardinality=2
+|
+00:SCAN HDFS [tpch_parquet.lineitem]
+   HDFS partitions=1/1 files=3 size=193.99MB
+   row-size=8B cardinality=6.00M
+====