You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by ro...@apache.org on 2023/11/29 15:34:24 UTC

(pinot) branch master updated: [multistage] don't prune project after agg during relBuilder (#12058)

This is an automated email from the ASF dual-hosted git repository.

rongr pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new c57117a750 [multistage] don't prune project after agg during relBuilder (#12058)
c57117a750 is described below

commit c57117a7503aa2fd0b9a7d3bd2de19f92d49ad9b
Author: Rong Rong <ro...@apache.org>
AuthorDate: Wed Nov 29 07:34:18 2023 -0800

    [multistage] don't prune project after agg during relBuilder (#12058)
    
    
    
    ---------
    
    Co-authored-by: Rong Rong <ro...@startree.ai>
---
 .../org/apache/pinot/query/QueryEnvironment.java   |  3 +-
 .../src/test/resources/queries/JoinPlans.json      | 76 +++++++++++++++++++++-
 .../src/test/resources/queries/WithStatements.json | 21 ++++++
 3 files changed, 98 insertions(+), 2 deletions(-)

diff --git a/pinot-query-planner/src/main/java/org/apache/pinot/query/QueryEnvironment.java b/pinot-query-planner/src/main/java/org/apache/pinot/query/QueryEnvironment.java
index fc210ed26c..2eb8dfff95 100644
--- a/pinot-query-planner/src/main/java/org/apache/pinot/query/QueryEnvironment.java
+++ b/pinot-query-planner/src/main/java/org/apache/pinot/query/QueryEnvironment.java
@@ -122,7 +122,8 @@ public class QueryEnvironment {
             // SUB-QUERY Threshold is useless as we are encoding all IN clause in-line anyway
             .withInSubQueryThreshold(Integer.MAX_VALUE)
             .addRelBuilderConfigTransform(c -> c.withPushJoinCondition(true))
-            .addRelBuilderConfigTransform(c -> c.withAggregateUnique(true)))
+            .addRelBuilderConfigTransform(c -> c.withAggregateUnique(true))
+            .addRelBuilderConfigTransform(c -> c.withPruneInputOfAggregate(false)))
         .build();
     _optProgram = getOptProgram();
     _traitProgram = getTraitProgram();
diff --git a/pinot-query-planner/src/test/resources/queries/JoinPlans.json b/pinot-query-planner/src/test/resources/queries/JoinPlans.json
index 9ab9edebda..7ce235acf0 100644
--- a/pinot-query-planner/src/test/resources/queries/JoinPlans.json
+++ b/pinot-query-planner/src/test/resources/queries/JoinPlans.json
@@ -424,7 +424,81 @@
         ]
       },
       {
-        "description": "nexted reused tmp table SEMI JOINs",
+        "description": "multiple SEMI JOINs on same column multiple conditions and multiple columns",
+        "sql": "EXPLAIN PLAN FOR WITH tmp1 AS ( SELECT * FROM a WHERE col2 NOT IN ('foo', 'bar') ) SELECT * FROM a WHERE col2 IN (SELECT col1 FROM tmp1) AND col2 IN (SELECT col1 FROM b WHERE col3 > 100) AND col3 IN (SELECT col3 from b WHERE col3 < 100)",
+        "output": [
+          "Execution Plan",
+          "\nLogicalJoin(condition=[=($2, $7)], joinType=[semi])",
+          "\n  LogicalJoin(condition=[=($1, $7)], joinType=[semi])",
+          "\n    LogicalJoin(condition=[=($1, $7)], joinType=[semi])",
+          "\n      LogicalTableScan(table=[[a]])",
+          "\n      PinotLogicalExchange(distribution=[broadcast], relExchangeType=[PIPELINE_BREAKER])",
+          "\n        LogicalProject(col1=[$0])",
+          "\n          LogicalFilter(condition=[AND(<>($1, 'bar'), <>($1, 'foo'))])",
+          "\n            LogicalTableScan(table=[[a]])",
+          "\n    PinotLogicalExchange(distribution=[broadcast], relExchangeType=[PIPELINE_BREAKER])",
+          "\n      LogicalProject(col1=[$0])",
+          "\n        LogicalFilter(condition=[>($2, 100)])",
+          "\n          LogicalTableScan(table=[[b]])",
+          "\n  PinotLogicalExchange(distribution=[broadcast], relExchangeType=[PIPELINE_BREAKER])",
+          "\n    LogicalProject(col3=[$2])",
+          "\n      LogicalFilter(condition=[<($2, 100)])",
+          "\n        LogicalTableScan(table=[[b]])",
+          "\n"
+        ]
+      },
+      {
+        "description": "multiple SEMI JOINs with Agg",
+        "sql": "EXPLAIN PLAN FOR WITH tmp1 AS ( SELECT * FROM a WHERE col2 NOT IN ('foo', 'bar') ) SELECT COUNT(*) FROM a WHERE col2 IN (SELECT col1 FROM tmp1) AND col3 IN (SELECT col3 from b WHERE col3 < 100)",
+        "output": [
+          "Execution Plan",
+          "\nLogicalAggregate(group=[{}], agg#0=[COUNT($0)])",
+          "\n  PinotLogicalExchange(distribution=[hash])",
+          "\n    LogicalAggregate(group=[{}], agg#0=[COUNT()])",
+          "\n      LogicalJoin(condition=[=($0, $1)], joinType=[semi])",
+          "\n        LogicalProject(col3=[$1])",
+          "\n          LogicalJoin(condition=[=($0, $2)], joinType=[semi])",
+          "\n            LogicalProject(col2=[$1], col3=[$2])",
+          "\n              LogicalTableScan(table=[[a]])",
+          "\n            PinotLogicalExchange(distribution=[broadcast], relExchangeType=[PIPELINE_BREAKER])",
+          "\n              LogicalProject(col1=[$0])",
+          "\n                LogicalFilter(condition=[AND(<>($1, 'bar'), <>($1, 'foo'))])",
+          "\n                  LogicalTableScan(table=[[a]])",
+          "\n        PinotLogicalExchange(distribution=[broadcast], relExchangeType=[PIPELINE_BREAKER])",
+          "\n          LogicalProject(col3=[$2])",
+          "\n            LogicalFilter(condition=[<($2, 100)])",
+          "\n              LogicalTableScan(table=[[b]])",
+          "\n"
+        ]
+      },
+      {
+        "description": "multiple SEMI JOINs with group-by and having clause",
+        "sql": "EXPLAIN PLAN FOR WITH tmp1 AS ( SELECT * FROM a WHERE col2 NOT IN ('foo', 'bar') ) SELECT col1, SUM(col3) FROM a WHERE col2 IN (SELECT col1 FROM tmp1) AND col3 IN (SELECT col3 from b WHERE col3 < 100) GROUP BY col1 HAVING COUNT(*) > 10",
+        "output": [
+          "Execution Plan",
+          "\nLogicalProject(col1=[$0], EXPR$1=[$1])",
+          "\n  LogicalFilter(condition=[>($2, 10)])",
+          "\n    LogicalAggregate(group=[{0}], agg#0=[$SUM0($1)], agg#1=[COUNT($2)])",
+          "\n      PinotLogicalExchange(distribution=[hash[0]])",
+          "\n        LogicalAggregate(group=[{0}], agg#0=[$SUM0($1)], agg#1=[COUNT()])",
+          "\n          LogicalJoin(condition=[=($1, $2)], joinType=[semi])",
+          "\n            LogicalProject(col1=[$0], col3=[$2])",
+          "\n              LogicalJoin(condition=[=($1, $3)], joinType=[semi])",
+          "\n                LogicalProject(col1=[$0], col2=[$1], col3=[$2])",
+          "\n                  LogicalTableScan(table=[[a]])",
+          "\n                PinotLogicalExchange(distribution=[broadcast], relExchangeType=[PIPELINE_BREAKER])",
+          "\n                  LogicalProject(col1=[$0])",
+          "\n                    LogicalFilter(condition=[AND(<>($1, 'bar'), <>($1, 'foo'))])",
+          "\n                      LogicalTableScan(table=[[a]])",
+          "\n            PinotLogicalExchange(distribution=[broadcast], relExchangeType=[PIPELINE_BREAKER])",
+          "\n              LogicalProject(col3=[$2])",
+          "\n                LogicalFilter(condition=[<($2, 100)])",
+          "\n                  LogicalTableScan(table=[[b]])",
+          "\n"
+        ]
+      },
+      {
+        "description": "reused tmp table in SEMI JOINs",
         "sql": "EXPLAIN PLAN FOR WITH tmp1 AS ( SELECT * FROM a WHERE col2 NOT IN ('foo', 'bar') ), tmp2 AS ( SELECT * FROM b WHERE col1 IN (SELECT col1 FROM tmp1) AND col3 < 100 ) SELECT * FROM tmp2 WHERE col3 IN (SELECT col3 from tmp1)",
         "output": [
           "Execution Plan",
diff --git a/pinot-query-runtime/src/test/resources/queries/WithStatements.json b/pinot-query-runtime/src/test/resources/queries/WithStatements.json
index 0d8063e1c5..61e0940a1c 100644
--- a/pinot-query-runtime/src/test/resources/queries/WithStatements.json
+++ b/pinot-query-runtime/src/test/resources/queries/WithStatements.json
@@ -117,6 +117,27 @@
           ["b", "bob", 196883]
         ]
       },
+      {
+        "description": "multi 'with' table and semi-joins on same column multiple conditions and other column condition",
+        "sql": "WITH t1 AS ( SELECT * FROM {tbl1} WHERE intCol > 1 ) SELECT boolCol, strCol1, strCol2, intCol FROM {tbl2} WHERE strCol1 IN (SELECT strCol FROM t1) AND strCol2 IN (SELECT strCol2 FROM {tbl2} WHERE intCol > 1000) AND strCol1 IN (SELECT strCol1 FROM {tbl2} WHERE boolCol)",
+        "outputs": [
+          [true, "b", "bob", 196883]
+        ]
+      },
+      {
+        "description": "multi 'with' table and semi-joins and group-by with having",
+        "sql": "WITH t1 AS ( SELECT * FROM {tbl1} WHERE intCol > 1 ) SELECT strCol1, SUM(doubleCol) FROM {tbl2} WHERE strCol1 IN (SELECT strCol FROM t1) AND intCol IN (SELECT intCol FROM {tbl1} WHERE intCol < 100) GROUP BY strCol1 HAVING COUNT(*) < 10",
+        "outputs": [
+          ["a", 275.12]
+        ]
+      },
+      {
+        "description": "multi 'with' table and semi-joins and agg",
+        "sql": "WITH t1 AS ( SELECT * FROM {tbl1} WHERE intCol > 1 ) SELECT COUNT(*) FROM {tbl2} WHERE strCol1 IN (SELECT strCol FROM t1) AND intCol IN (SELECT intCol FROM {tbl1} WHERE intCol < 100)",
+        "outputs": [
+          [2]
+        ]
+      },
       {
         "description": "nested 'with' on agg table: (with a as ( ... ), select ... ",
         "sql": "WITH agg1 AS (SELECT strCol1, strCol2, sum(intCol) AS sumVal FROM {tbl2} GROUP BY strCol1, strCol2) SELECT strCol1, avg(sumVal) AS avgVal FROM agg1 GROUP BY strCol1",


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org