You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mi...@apache.org on 2023/09/14 19:19:16 UTC

[impala] 02/03: IMPALA-11284: Do non-optional rewrites for || and Between predicate

This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 130a55e5269ea023b43ef2c0b495989cb0759800
Author: Abhishek Rawat <ar...@cloudera.com>
AuthorDate: Wed Jun 1 17:50:20 2022 -0700

    IMPALA-11284: Do non-optional rewrites for || and Between predicate
    
    IMPALA-6590 disabled expression rewrites for ValuesStmt. However,
    CompoundVerticalBarExpr (||) cannot be executed directly without
    rewrite. This is because it could either be an OR operation with boolean
    arguments or CONCAT function call with string arguments.
    
    Backend cannot evaluate a BetweenPredicate and relies on rewriting
    BetweenPredicate into a conjunctive or disjunctive CompoundPredicate.
    
    This patch enables non-optional expression rewrites for ValuesStmt with
    CompoundVerticalBarExpr or BetweenPredicate.
    
    Testing:
    - Extended ExprRewriterTest and Planner test to have values clause
      with || and Between predicate
    
    Change-Id: I99b8b33bf6468d12b9e26f0a6e744feb7072619c
    Reviewed-on: http://gerrit.cloudera.org:8080/18581
    Reviewed-by: Michael Smith <mi...@cloudera.com>
    Reviewed-by: Daniel Becker <da...@cloudera.com>
    Tested-by: Riza Suminto <ri...@cloudera.com>
---
 .../java/org/apache/impala/analysis/ValuesStmt.java  | 18 ++++++++++++++----
 .../java/org/apache/impala/rewrite/ExprRewriter.java |  7 +++++++
 .../org/apache/impala/analysis/ExprRewriterTest.java | 16 ++++++++++++++--
 .../functional-query/queries/QueryTest/values.test   | 20 +++++++++++++++++++-
 4 files changed, 54 insertions(+), 7 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/analysis/ValuesStmt.java b/fe/src/main/java/org/apache/impala/analysis/ValuesStmt.java
index 6231497dc..d6a3d7348 100644
--- a/fe/src/main/java/org/apache/impala/analysis/ValuesStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/ValuesStmt.java
@@ -17,13 +17,16 @@
 
 package org.apache.impala.analysis;
 
+import java.util.Arrays;
 import java.util.List;
 
 import com.google.common.base.Preconditions;
 
 import static org.apache.impala.analysis.ToSqlOptions.DEFAULT;
 import org.apache.impala.common.AnalysisException;
+import org.apache.impala.rewrite.BetweenToCompoundRule;
 import org.apache.impala.rewrite.ExprRewriter;
+import org.apache.impala.rewrite.ExtractCompoundVerticalBarExprRule;
 
 /**
  * Representation of a values() statement with a list of constant-expression lists.
@@ -84,11 +87,18 @@ public class ValuesStmt extends UnionStmt {
   @Override
   public ValuesStmt clone() { return new ValuesStmt(this); }
 
-  /**
-   * Intentionally left empty to disable expression rewrite for values clause.
-   */
   @Override
-  public void rewriteExprs(ExprRewriter rewriter) {}
+  public void rewriteExprs(ExprRewriter rewriter) throws AnalysisException {
+    // IMPALA-11284: Expression rewrites for VALUES() could result in performance
+    // regression since overhead can be huge and there is virtually no benefit of
+    // rewrite if the expression will only ever be evaluated once (IMPALA-6590).
+    // The following code only does the non-optional rewrites for || and BETWEEN
+    // operator as the backend cannot execute them directly.
+    ExprRewriter mandatoryRewriter = new ExprRewriter(Arrays.asList(
+        BetweenToCompoundRule.INSTANCE, ExtractCompoundVerticalBarExprRule.INSTANCE));
+    super.rewriteExprs(mandatoryRewriter);
+    rewriter.addNumChanges(mandatoryRewriter);
+  }
 
   @Override
   protected boolean shouldAvoidLossyCharPadding(Analyzer analyzer) {
diff --git a/fe/src/main/java/org/apache/impala/rewrite/ExprRewriter.java b/fe/src/main/java/org/apache/impala/rewrite/ExprRewriter.java
index 37c092733..d30531ac6 100644
--- a/fe/src/main/java/org/apache/impala/rewrite/ExprRewriter.java
+++ b/fe/src/main/java/org/apache/impala/rewrite/ExprRewriter.java
@@ -91,6 +91,13 @@ public class ExprRewriter {
     for (int i = 0; i < exprs.size(); ++i) exprs.set(i, rewrite(exprs.get(i), analyzer));
   }
 
+  /**
+   * Add numChanges_ of otherRewriter to this rewriter's numChanges_.
+   */
+  public void addNumChanges(ExprRewriter otherRewriter) {
+    numChanges_ += otherRewriter.numChanges_;
+  }
+
   public void reset() { numChanges_ = 0; }
   public boolean changed() { return numChanges_ > 0; }
   public int getNumChanges() { return numChanges_; }
diff --git a/fe/src/test/java/org/apache/impala/analysis/ExprRewriterTest.java b/fe/src/test/java/org/apache/impala/analysis/ExprRewriterTest.java
index 9880db3b2..4c9457a39 100644
--- a/fe/src/test/java/org/apache/impala/analysis/ExprRewriterTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/ExprRewriterTest.java
@@ -149,8 +149,20 @@ public class ExprRewriterTest extends AnalyzerTest {
         stmt_, stmt_), 47, 23);
     // Constant select.
     RewritesOk("select 1, 2, 3, 4", 4, 4);
-    // Values stmt - expression rewrites are disabled.
-    RewritesOk("values(1, '2', 3, 4.1), (1, '2', 3, 4.1)", 0, 0);
+    // Values stmt - expression rewrites are not required in this test cases.
+    RewritesOk("values(1, '2', 3, 4.1), (1, '2', 3, 4.1),"
+            + "(CAST(true OR false AS INT), '2', 3*1+2-4, 1.1%1)",
+        0, 0);
+    RewritesOk("values(CONCAT('a', 'b'), true OR true)", 0, 0);
+    // Values stmt - expression rewrites are required for || and Between predicate.
+    RewritesOk("values(1 <= 2 || 'impala' <> 'IMPALA'), (0.5 BETWEEN 0 AND 1),"
+            + "('a' NOT BETWEEN 'b' AND 'c')",
+        3, 0);
+    // Values stmt - expression rewrites are required for || and Between predicate that
+    // is not at root Expr.
+    RewritesOk("values(1 <= 2 AND ((0.5 BETWEEN 0 AND 1) AND "
+            + "(('a' || 'b') = 'ab' AND (true || false))))",
+        3, 0);
     // Test WHERE-clause subqueries.
     RewritesOk("select id, int_col from functional.alltypes a " +
         "where exists (select 1 from functional.alltypes " +
diff --git a/testdata/workloads/functional-query/queries/QueryTest/values.test b/testdata/workloads/functional-query/queries/QueryTest/values.test
index bfda103b2..f19830dbe 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/values.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/values.test
@@ -140,4 +140,22 @@ select cast("0.43149576573887316" as double)
 0.43149576573887316
 ---- TYPES
 DOUBLE
-====
\ No newline at end of file
+====
+---- QUERY
+# IMPALA-11284: Don't skip rewrites for || and BETWEEN operator as the backend cannot
+# execute them directly.
+select * from
+(
+  values (concat("a", "b" || "c"), 1 <= 2 AND ((0.5 BETWEEN 0 AND 1) AND (true || false))),
+         ("hello" || "world", 0 <= 1 || 0.5 < 0.6),
+         ("impala", 4.0 BETWEEN 3.2 AND 4.1),
+         ("sql", 'a' NOT BETWEEN 'b' AND 'c')
+) t;
+---- RESULTS
+'abc',true
+'helloworld',true
+'impala',true
+'sql',true
+---- TYPES
+string,boolean
+====