You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2023/06/16 11:11:48 UTC

[doris] branch master updated: [opt](Nereids) revert convert IN with 2 options to OR expression rule (#20894)

This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 5dc0f90c7f [opt](Nereids) revert convert IN with 2 options to OR expression rule (#20894)
5dc0f90c7f is described below

commit 5dc0f90c7f30823df13553fa9c6683720492981a
Author: morrySnow <10...@users.noreply.github.com>
AuthorDate: Fri Jun 16 19:11:37 2023 +0800

    [opt](Nereids) revert convert IN with 2 options to OR expression rule (#20894)
    
    revert this rule because it has negative effect on predicate push-down-to-storage-layer
---
 .../expression/rules/InPredicateToEqualToRule.java | 27 ++++------------------
 .../rules/expression/ExpressionRewriteTest.java    | 10 ++------
 .../nereids_tpcds_shape_sf100_p0/shape/query66.out |  4 ++--
 .../nereids_tpcds_shape_sf100_p0/shape/query68.out |  2 +-
 .../nereids_tpch_shape_sf1000_p0/shape/q12.out     |  2 +-
 .../nereids_tpch_shape_sf1000_p0/shape/q19.out     |  2 +-
 .../data/nereids_tpch_shape_sf500_p0/shape/q12.out |  2 +-
 .../data/nereids_tpch_shape_sf500_p0/shape/q19.out |  2 +-
 8 files changed, 14 insertions(+), 37 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/InPredicateToEqualToRule.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/InPredicateToEqualToRule.java
index 6546f08646..b076cadd53 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/InPredicateToEqualToRule.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/InPredicateToEqualToRule.java
@@ -22,21 +22,13 @@ import org.apache.doris.nereids.rules.expression.ExpressionRewriteContext;
 import org.apache.doris.nereids.trees.expressions.EqualTo;
 import org.apache.doris.nereids.trees.expressions.Expression;
 import org.apache.doris.nereids.trees.expressions.InPredicate;
-import org.apache.doris.nereids.trees.expressions.literal.BooleanLiteral;
-import org.apache.doris.nereids.util.ExpressionUtils;
-
-import com.google.common.base.Preconditions;
 
 import java.util.List;
-import java.util.stream.Collectors;
 
 /**
  * Paper: Quantifying TPC-H Choke Points and Their Optimizations
  * - Figure 14:
  * <p>
- * Rewrite InPredicate to disjunction, if there exists < 3 elements in InPredicate
- * Examples:
- * where A in (x, y) ==> where A = x or A = y
  * Examples:
  * where A in (x) ==> where A = x
  * where A not in (x) ==> where not A = x (After ExpressionTranslator, "not A = x" will be translated to "A != x")
@@ -50,20 +42,11 @@ public class InPredicateToEqualToRule extends AbstractExpressionRewriteRule {
 
     @Override
     public Expression visitInPredicate(InPredicate inPredicate, ExpressionRewriteContext context) {
-        Expression cmpExpr = inPredicate.getCompareExpr();
-        List<Expression> options = inPredicate.getOptions();
-        Preconditions.checkArgument(options.size() > 0, "InPredicate.options should not be empty");
-        if (options.size() > 2 || isOptionContainNullLiteral(options)) {
-            return new InPredicate(cmpExpr.accept(this, context), options);
+        Expression left = inPredicate.getCompareExpr();
+        List<Expression> right = inPredicate.getOptions();
+        if (right.size() != 1) {
+            return new InPredicate(left.accept(this, context), right);
         }
-        Expression newCmpExpr = cmpExpr.accept(this, context);
-        List<Expression> disjunction = options.stream()
-                .map(option -> new EqualTo(newCmpExpr, option.accept(this, context)))
-                .collect(Collectors.toList());
-        return disjunction.isEmpty() ? BooleanLiteral.FALSE : ExpressionUtils.or(disjunction);
-    }
-
-    private boolean isOptionContainNullLiteral(List<Expression> options) {
-        return options.stream().anyMatch(Expression::isNullLiteral);
+        return new EqualTo(left.accept(this, context), right.get(0).accept(this, context));
     }
 }
diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/ExpressionRewriteTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/ExpressionRewriteTest.java
index 65d9e8c6ac..2bdcc966fc 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/ExpressionRewriteTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/ExpressionRewriteTest.java
@@ -179,21 +179,15 @@ public class ExpressionRewriteTest extends ExpressionRewriteTestHelper {
         executor = new ExpressionRuleExecutor(ImmutableList.of(InPredicateToEqualToRule.INSTANCE));
 
         assertRewrite("a in (1)", "a = 1");
-        assertRewrite("a in (1, 2)", "((a = 1) OR (a = 2))");
         assertRewrite("a not in (1)", "not a = 1");
-        assertRewrite("a not in (1, 2)", "not ((a = 1) OR (a = 2))");
         assertRewrite("a in (a in (1))", "a = (a = 1)");
-        assertRewrite("a in (a in (1, 2))", "a = ((a = 1) OR (a = 2))");
         assertRewrite("(a in (1)) in (1)", "(a = 1) = 1");
-        assertRewrite("(a in (1, 2)) in (1)", "((a = 1) OR (a = 2)) = 1");
-        assertRewrite("(a in (1)) in (1, 2)", "((a = 1) = 1) OR ((a = 1) = 2)");
+        assertRewrite("(a in (1, 2)) in (1)", "(a in (1, 2)) = 1");
+        assertRewrite("(a in (1)) in (1, 2)", "((a = 1) in (1, 2))");
         assertRewrite("case a when b in (1) then a else c end in (1)",
                 "case a when b = 1 then a else c end = 1");
         assertRewrite("case a when b not in (1) then a else c end not in (1)",
                 "not case a when not b = 1 then a else c end = 1");
-        assertRewrite("case a when b not in (1) then a else c end in (1, 2)",
-                "(CASE  WHEN (a = ( not (b = 1))) THEN a ELSE c END = 1) OR (CASE  WHEN (a = ( not (b = 1))) THEN a ELSE c END = 2)");
-
     }
 
     @Test
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query66.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query66.out
index 016fe50bd7..e80fc9bdd9 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query66.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query66.out
@@ -21,7 +21,7 @@ PhysicalTopN
 ------------------------------------PhysicalOlapScan[web_sales]
 ----------------------------------PhysicalDistribute
 ------------------------------------PhysicalProject
---------------------------------------filter(((ship_mode.sm_carrier = 'GREAT EASTERN') OR (ship_mode.sm_carrier = 'LATVIAN')))
+--------------------------------------filter(sm_carrier IN ('GREAT EASTERN', 'LATVIAN'))
 ----------------------------------------PhysicalOlapScan[ship_mode]
 --------------------------------PhysicalDistribute
 ----------------------------------PhysicalProject
@@ -48,7 +48,7 @@ PhysicalTopN
 ------------------------------------PhysicalOlapScan[catalog_sales]
 ----------------------------------PhysicalDistribute
 ------------------------------------PhysicalProject
---------------------------------------filter(((ship_mode.sm_carrier = 'GREAT EASTERN') OR (ship_mode.sm_carrier = 'LATVIAN')))
+--------------------------------------filter(sm_carrier IN ('GREAT EASTERN', 'LATVIAN'))
 ----------------------------------------PhysicalOlapScan[ship_mode]
 --------------------------------PhysicalDistribute
 ----------------------------------PhysicalProject
diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query68.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query68.out
index 04780a2e2e..7bc504cd33 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query68.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query68.out
@@ -32,7 +32,7 @@ PhysicalTopN
 ------------------------------------------PhysicalOlapScan[date_dim]
 ----------------------------------PhysicalDistribute
 ------------------------------------PhysicalProject
---------------------------------------filter(((store.s_city = 'Pleasant Hill') OR (store.s_city = 'Five Points')))
+--------------------------------------filter(s_city IN ('Pleasant Hill', 'Five Points'))
 ----------------------------------------PhysicalOlapScan[store]
 --------------------------------PhysicalDistribute
 ----------------------------------PhysicalProject
diff --git a/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q12.out b/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q12.out
index 6dd416cefa..cf83a4373e 100644
--- a/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q12.out
+++ b/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q12.out
@@ -11,6 +11,6 @@ PhysicalQuickSort
 ----------------PhysicalProject
 ------------------PhysicalOlapScan[orders]
 ----------------PhysicalProject
-------------------filter(((lineitem.l_shipmode = 'MAIL') OR (lineitem.l_shipmode = 'SHIP'))(lineitem.l_shipdate < lineitem.l_commitdate)(lineitem.l_receiptdate < 1995-01-01)(lineitem.l_receiptdate >= 1994-01-01)(lineitem.l_commitdate < lineitem.l_receiptdate))
+------------------filter(l_shipmode IN ('MAIL', 'SHIP')(lineitem.l_shipdate < lineitem.l_commitdate)(lineitem.l_receiptdate < 1995-01-01)(lineitem.l_receiptdate >= 1994-01-01)(lineitem.l_commitdate < lineitem.l_receiptdate))
 --------------------PhysicalOlapScan[lineitem]
 
diff --git a/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q19.out b/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q19.out
index 1b877ad328..150f569cdf 100644
--- a/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q19.out
+++ b/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q19.out
@@ -6,7 +6,7 @@ hashAgg[GLOBAL]
 ------PhysicalProject
 --------hashJoin[INNER_JOIN](part.p_partkey = lineitem.l_partkey)((((((part.p_brand = 'Brand#12') AND p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')) AND ((lineitem.l_quantity >= 1.00) AND (lineitem.l_quantity <= 11.00))) AND (part.p_size <= 5)) OR ((((part.p_brand = 'Brand#23') AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')) AND ((lineitem.l_quantity >= 10.00) AND (lineitem.l_quantity <= 20.00))) AND (part.p_size <= 10))) OR ((((part.p_brand = 'Brand#34') AN [...]
 ----------PhysicalProject
-------------filter((lineitem.l_shipinstruct = 'DELIVER IN PERSON')((((lineitem.l_quantity >= 1.00) AND (lineitem.l_quantity <= 11.00)) OR ((lineitem.l_quantity >= 10.00) AND (lineitem.l_quantity <= 20.00))) OR ((lineitem.l_quantity >= 20.00) AND (lineitem.l_quantity <= 30.00)))((lineitem.l_shipmode = 'AIR') OR (lineitem.l_shipmode = 'AIR REG')))
+------------filter(l_shipmode IN ('AIR', 'AIR REG')(lineitem.l_shipinstruct = 'DELIVER IN PERSON')((((lineitem.l_quantity >= 1.00) AND (lineitem.l_quantity <= 11.00)) OR ((lineitem.l_quantity >= 10.00) AND (lineitem.l_quantity <= 20.00))) OR ((lineitem.l_quantity >= 20.00) AND (lineitem.l_quantity <= 30.00))))
 --------------PhysicalOlapScan[lineitem]
 ----------PhysicalDistribute
 ------------PhysicalProject
diff --git a/regression-test/data/nereids_tpch_shape_sf500_p0/shape/q12.out b/regression-test/data/nereids_tpch_shape_sf500_p0/shape/q12.out
index 6dd416cefa..cf83a4373e 100644
--- a/regression-test/data/nereids_tpch_shape_sf500_p0/shape/q12.out
+++ b/regression-test/data/nereids_tpch_shape_sf500_p0/shape/q12.out
@@ -11,6 +11,6 @@ PhysicalQuickSort
 ----------------PhysicalProject
 ------------------PhysicalOlapScan[orders]
 ----------------PhysicalProject
-------------------filter(((lineitem.l_shipmode = 'MAIL') OR (lineitem.l_shipmode = 'SHIP'))(lineitem.l_shipdate < lineitem.l_commitdate)(lineitem.l_receiptdate < 1995-01-01)(lineitem.l_receiptdate >= 1994-01-01)(lineitem.l_commitdate < lineitem.l_receiptdate))
+------------------filter(l_shipmode IN ('MAIL', 'SHIP')(lineitem.l_shipdate < lineitem.l_commitdate)(lineitem.l_receiptdate < 1995-01-01)(lineitem.l_receiptdate >= 1994-01-01)(lineitem.l_commitdate < lineitem.l_receiptdate))
 --------------------PhysicalOlapScan[lineitem]
 
diff --git a/regression-test/data/nereids_tpch_shape_sf500_p0/shape/q19.out b/regression-test/data/nereids_tpch_shape_sf500_p0/shape/q19.out
index 1b877ad328..150f569cdf 100644
--- a/regression-test/data/nereids_tpch_shape_sf500_p0/shape/q19.out
+++ b/regression-test/data/nereids_tpch_shape_sf500_p0/shape/q19.out
@@ -6,7 +6,7 @@ hashAgg[GLOBAL]
 ------PhysicalProject
 --------hashJoin[INNER_JOIN](part.p_partkey = lineitem.l_partkey)((((((part.p_brand = 'Brand#12') AND p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')) AND ((lineitem.l_quantity >= 1.00) AND (lineitem.l_quantity <= 11.00))) AND (part.p_size <= 5)) OR ((((part.p_brand = 'Brand#23') AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')) AND ((lineitem.l_quantity >= 10.00) AND (lineitem.l_quantity <= 20.00))) AND (part.p_size <= 10))) OR ((((part.p_brand = 'Brand#34') AN [...]
 ----------PhysicalProject
-------------filter((lineitem.l_shipinstruct = 'DELIVER IN PERSON')((((lineitem.l_quantity >= 1.00) AND (lineitem.l_quantity <= 11.00)) OR ((lineitem.l_quantity >= 10.00) AND (lineitem.l_quantity <= 20.00))) OR ((lineitem.l_quantity >= 20.00) AND (lineitem.l_quantity <= 30.00)))((lineitem.l_shipmode = 'AIR') OR (lineitem.l_shipmode = 'AIR REG')))
+------------filter(l_shipmode IN ('AIR', 'AIR REG')(lineitem.l_shipinstruct = 'DELIVER IN PERSON')((((lineitem.l_quantity >= 1.00) AND (lineitem.l_quantity <= 11.00)) OR ((lineitem.l_quantity >= 10.00) AND (lineitem.l_quantity <= 20.00))) OR ((lineitem.l_quantity >= 20.00) AND (lineitem.l_quantity <= 30.00))))
 --------------PhysicalOlapScan[lineitem]
 ----------PhysicalDistribute
 ------------PhysicalProject


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org