You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2022/06/24 22:30:20 UTC

[impala] branch master updated: IMPALA-10865: Fix initialize SelectStmt's groupingExprs_ in analyzeGroupingExprs

This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new f561daa24 IMPALA-10865: Fix initialize SelectStmt's groupingExprs_ in analyzeGroupingExprs
f561daa24 is described below

commit f561daa247d6e6bd2f898e9b637830bcd51b6cc5
Author: guojingfeng <gu...@tencent.com>
AuthorDate: Tue Aug 17 11:43:06 2021 +0800

    IMPALA-10865: Fix initialize SelectStmt's groupingExprs_ in analyzeGroupingExprs
    
    This patch rollback some changes of IMPALA-9620. IMPALA-9620 re-
    initialize SelectStmt's groupingExprs_ to ensure that group-by and
    cnf exprs are analyzed. But the following patch of IMPALA-9693
    explicitly analyzes exprs which is equivalent to IMPALA-9620. So this
    rollback is safe here.
    
    In general, the analyze algorithm is that:
    1. Analyze the stmt tree and make copies of expressions
    2. Rewrite selected expressions, **rewrite rules should ensure
       rewritten exprs are analyzed**
    3. Make copied expressions analyzed
    4. ReAnalyze the tree
    
    The problem is that if we change the groupingExprs_ of SelectStmt,
    in re-analyze phase column alias will be substitude to Expr that
    duplicate with origin column which will be removed in
    `buildAggregateExprs`.
    
    Another reason why this patch is submitted is that re-initialize
    SelectStmt's groupingExprs_ will cause other problems. IMPALA-10096 is
    a typical case. See jira for detail execeptions.
    
    Beside, this patch modifies ExtractCompundVerticalBarExprRule to do a
    explicit analyze to ensure expr are rewritten.
    
    Test:
    - Add new test into aggregation.test and passed
    - Ran all fe tests and passed
    
    Change-Id: I9d1779e6c282d9fd02beacf5ddfafcc5c0baf3b0
    Reviewed-on: http://gerrit.cloudera.org:8080/17781
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../org/apache/impala/analysis/SelectStmt.java     | 15 +++-----------
 .../ExtractCompoundVerticalBarExprRule.java        |  6 ++++--
 .../queries/PlannerTest/aggregation.test           | 23 +++++++++++++++++++++-
 3 files changed, 29 insertions(+), 15 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
index e8fbe4821..3b52c8173 100644
--- a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
@@ -461,6 +461,9 @@ public class SelectStmt extends QueryStmt {
           colLabels_.add(label);
         }
       }
+      if (LOG.isTraceEnabled()) {
+        LOG.trace("Analyzed select clause aliasSmap={}", aliasSmap_.debugString());
+      }
     }
 
     private void verifyResultExprs() throws AnalysisException {
@@ -967,18 +970,6 @@ public class SelectStmt extends QueryStmt {
                   + groupingExprsCopy_.get(i).toSql());
         }
       }
-      // initialize groupingExprs_ with the analyzed version
-      // use the original ordinal if the analyzed expr is a INT literal
-      List<Expr> groupingExprs = new ArrayList<>();
-      for (int i = 0; i < groupingExprsCopy_.size(); ++i) {
-        Expr expr = groupingExprsCopy_.get(i);
-        if (expr instanceof NumericLiteral && Expr.IS_INT_LITERAL.apply(expr)) {
-          groupingExprs.add(groupingExprs_.get(i).clone());
-        } else {
-          groupingExprs.add(expr);
-        }
-      }
-      groupingExprs_ = groupingExprs;
 
       if (groupByClause_ != null && groupByClause_.hasGroupingSets()) {
         groupByClause_.analyzeGroupingSets(groupingExprsCopy_);
diff --git a/fe/src/main/java/org/apache/impala/rewrite/ExtractCompoundVerticalBarExprRule.java b/fe/src/main/java/org/apache/impala/rewrite/ExtractCompoundVerticalBarExprRule.java
index e51797021..8fc27b826 100644
--- a/fe/src/main/java/org/apache/impala/rewrite/ExtractCompoundVerticalBarExprRule.java
+++ b/fe/src/main/java/org/apache/impala/rewrite/ExtractCompoundVerticalBarExprRule.java
@@ -33,10 +33,12 @@ public class ExtractCompoundVerticalBarExprRule implements ExprRewriteRule {
 
   @Override
   public Expr apply(Expr expr, Analyzer analyzer) {
-    if (!expr.isAnalyzed()) return expr;
-
     if (expr instanceof CompoundVerticalBarExpr) {
       CompoundVerticalBarExpr pred = (CompoundVerticalBarExpr) expr;
+      if (!expr.isAnalyzed()) {
+        pred = (CompoundVerticalBarExpr) expr.clone();
+        pred.analyzeNoThrow(analyzer);
+      }
       return pred.getEncapsulatedExpr();
     }
     return expr;
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/aggregation.test b/testdata/workloads/functional-planner/queries/PlannerTest/aggregation.test
index 2ac3f53d4..5d10e41a2 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/aggregation.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/aggregation.test
@@ -1665,10 +1665,31 @@ PLAN-ROOT SINK
 |
 01:AGGREGATE [FINALIZE]
 |  output: count(*)
-|  group by: 2 + 1, id
+|  group by: 3, id
 |  row-size=18B cardinality=10
 |
 00:SCAN HDFS [functional.dimtbl]
    HDFS partitions=1/1 files=1 size=171B
    row-size=8B cardinality=10
 ====
+# IMPALA-10865: Group by expr with column alias reference errors in
+# re-analyze phase.
+SELECT ss_item_sk ss_item_sk_group, ss_item_sk+300 ss_item_sk,
+    count(ss_ticket_number)
+FROM tpcds.store_sales a
+WHERE ss_sold_date_sk > cast('245263' AS INT)
+GROUP BY ss_item_sk_group,
+         ss_item_sk
+---- PLAN
+PLAN-ROOT SINK
+|
+01:AGGREGATE [FINALIZE]
+|  output: count(ss_ticket_number)
+|  group by: ss_item_sk, ss_item_sk + 300
+|  row-size=24B cardinality=2.75M
+|
+00:SCAN HDFS [tpcds.store_sales a]
+   partition predicates: ss_sold_date_sk > 245263
+   HDFS partitions=1823/1824 files=1823 size=195.68MB
+   row-size=16B cardinality=2.75M
+====