You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by ro...@apache.org on 2023/11/22 15:26:26 UTC

(pinot) branch master updated: [multistage][bugfix] fix multi-semi-join issues (#12038)

This is an automated email from the ASF dual-hosted git repository.

rongr pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 22403e5675 [multistage][bugfix] fix multi-semi-join issues (#12038)
22403e5675 is described below

commit 22403e567510fabf6016f01865721bea324ac21c
Author: Rong Rong <ro...@apache.org>
AuthorDate: Wed Nov 22 07:26:17 2023 -0800

    [multistage][bugfix] fix multi-semi-join issues (#12038)
    
    - fix multi-semi join nested and unable to find proper semi-hint from project
    - alter the order of AGG_PROJECT merge mechanism, now projects are much more concise
    - avoided exchanging more than required data off leaf.
    - fix another bug: dynamic broadcast rule cannot be applied to lateral semi-join (with more than 1 join key)
    ---------
    
    Co-authored-by: Rong Rong <ro...@startree.ai>
---
 .../rel/rules/PinotJoinToDynamicBroadcastRule.java |  6 ++-
 .../calcite/rel/rules/PinotQueryRuleSets.java      |  5 +--
 .../src/test/resources/queries/AggregatePlans.json | 49 ++++++++++------------
 .../resources/queries/ExplainPhysicalPlans.json    | 36 ++++++++--------
 .../src/test/resources/queries/GroupByPlans.json   | 47 ++++++++++-----------
 .../src/test/resources/queries/JoinPlans.json      | 31 +++++++-------
 .../test/resources/queries/PinotHintablePlans.json | 46 ++++++++++----------
 7 files changed, 106 insertions(+), 114 deletions(-)

diff --git a/pinot-query-planner/src/main/java/org/apache/calcite/rel/rules/PinotJoinToDynamicBroadcastRule.java b/pinot-query-planner/src/main/java/org/apache/calcite/rel/rules/PinotJoinToDynamicBroadcastRule.java
index 1e80585c7d..bdbaec23b5 100644
--- a/pinot-query-planner/src/main/java/org/apache/calcite/rel/rules/PinotJoinToDynamicBroadcastRule.java
+++ b/pinot-query-planner/src/main/java/org/apache/calcite/rel/rules/PinotJoinToDynamicBroadcastRule.java
@@ -142,9 +142,13 @@ public class PinotJoinToDynamicBroadcastRule extends RelOptRule {
     RelNode right = join.getRight() instanceof HepRelVertex ? ((HepRelVertex) join.getRight()).getCurrentRel()
         : join.getRight();
     return left instanceof Exchange && right instanceof Exchange
+        // left side can be pushed as dynamic exchange
         && PinotRuleUtils.canPushDynamicBroadcastToLeaf(left.getInput(0))
         // default enable dynamic broadcast for SEMI join unless other join strategy were specified
-        && (!explicitOtherStrategy && join.getJoinType() == JoinRelType.SEMI && joinInfo.nonEquiConditions.isEmpty());
+        && !explicitOtherStrategy
+        // condition for SEMI join
+        && join.getJoinType() == JoinRelType.SEMI && joinInfo.nonEquiConditions.isEmpty()
+        && joinInfo.leftKeys.size() == 1;
   }
 
   @Override
diff --git a/pinot-query-planner/src/main/java/org/apache/calcite/rel/rules/PinotQueryRuleSets.java b/pinot-query-planner/src/main/java/org/apache/calcite/rel/rules/PinotQueryRuleSets.java
index ebb4029348..b73a2b62b2 100644
--- a/pinot-query-planner/src/main/java/org/apache/calcite/rel/rules/PinotQueryRuleSets.java
+++ b/pinot-query-planner/src/main/java/org/apache/calcite/rel/rules/PinotQueryRuleSets.java
@@ -50,14 +50,12 @@ public class PinotQueryRuleSets {
           CoreRules.PROJECT_SET_OP_TRANSPOSE,
 
           // aggregation and projection rules
-          CoreRules.AGGREGATE_PROJECT_MERGE, CoreRules.AGGREGATE_PROJECT_PULL_UP_CONSTANTS,
+          CoreRules.AGGREGATE_PROJECT_PULL_UP_CONSTANTS,
           // push a projection past a filter or vice versa
           CoreRules.PROJECT_FILTER_TRANSPOSE, CoreRules.FILTER_PROJECT_TRANSPOSE,
           // push a projection to the children of a join
           // push all expressions to handle the time indicator correctly
           CoreRules.JOIN_CONDITION_PUSH,
-          // merge projections
-          CoreRules.PROJECT_MERGE,
           // remove identity project
           CoreRules.PROJECT_REMOVE,
 
@@ -102,6 +100,7 @@ public class PinotQueryRuleSets {
 
   // The pruner rules run top-down to ensure Calcite restarts from root node after applying a transformation.
   public static final Collection<RelOptRule> PRUNE_RULES = ImmutableList.of(
+      CoreRules.AGGREGATE_PROJECT_MERGE,
       CoreRules.PROJECT_MERGE,
       CoreRules.FILTER_MERGE,
       CoreRules.AGGREGATE_REMOVE,
diff --git a/pinot-query-planner/src/test/resources/queries/AggregatePlans.json b/pinot-query-planner/src/test/resources/queries/AggregatePlans.json
index c93e8d6138..9a8f042406 100644
--- a/pinot-query-planner/src/test/resources/queries/AggregatePlans.json
+++ b/pinot-query-planner/src/test/resources/queries/AggregatePlans.json
@@ -31,10 +31,9 @@
           "\nLogicalProject(avg=[CAST(/(CASE(=($1, 0), null:DECIMAL(1000, 0), $0), $1)):DECIMAL(1000, 0)])",
           "\n  LogicalAggregate(group=[{}], agg#0=[$SUM0($0)], agg#1=[COUNT($1)])",
           "\n    PinotLogicalExchange(distribution=[hash])",
-          "\n      LogicalAggregate(group=[{}], agg#0=[$SUM0($2)], agg#1=[COUNT()])",
-          "\n        LogicalProject(col2=[$1], col3=[$2], col4=[$3])",
-          "\n          LogicalFilter(condition=[AND(>=($2, 0), =($1, 'pink floyd'))])",
-          "\n            LogicalTableScan(table=[[a]])",
+          "\n      LogicalAggregate(group=[{}], agg#0=[$SUM0($3)], agg#1=[COUNT()])",
+          "\n        LogicalFilter(condition=[AND(>=($2, 0), =($1, 'pink floyd'))])",
+          "\n          LogicalTableScan(table=[[a]])",
           "\n"
         ]
       },
@@ -46,10 +45,9 @@
           "\nLogicalProject(avg=[CAST(/(CASE(=($1, 0), null:DECIMAL(1000, 0), $0), $1)):DECIMAL(1000, 0)], sum=[CASE(=($1, 0), null:DECIMAL(1000, 0), $0)], max=[$2])",
           "\n  LogicalAggregate(group=[{}], agg#0=[$SUM0($0)], agg#1=[COUNT($1)], agg#2=[MAX($2)])",
           "\n    PinotLogicalExchange(distribution=[hash])",
-          "\n      LogicalAggregate(group=[{}], agg#0=[$SUM0($2)], agg#1=[COUNT()], agg#2=[MAX($2)])",
-          "\n        LogicalProject(col2=[$1], col3=[$2], col4=[$3])",
-          "\n          LogicalFilter(condition=[AND(>=($2, 0), =($1, 'pink floyd'))])",
-          "\n            LogicalTableScan(table=[[a]])",
+          "\n      LogicalAggregate(group=[{}], agg#0=[$SUM0($3)], agg#1=[COUNT()], agg#2=[MAX($3)])",
+          "\n        LogicalFilter(condition=[AND(>=($2, 0), =($1, 'pink floyd'))])",
+          "\n          LogicalTableScan(table=[[a]])",
           "\n"
         ]
       },
@@ -61,10 +59,9 @@
           "\nLogicalProject(avg=[/(CAST(CASE(=($1, 0), null:BIGINT, $0)):DOUBLE, $1)], count=[$1])",
           "\n  LogicalAggregate(group=[{}], agg#0=[$SUM0($0)], agg#1=[COUNT($1)])",
           "\n    PinotLogicalExchange(distribution=[hash])",
-          "\n      LogicalAggregate(group=[{}], agg#0=[$SUM0($1)], agg#1=[COUNT()])",
-          "\n        LogicalProject(col2=[$1], col3=[$2])",
-          "\n          LogicalFilter(condition=[AND(>=($2, 0), =($1, 'pink floyd'))])",
-          "\n            LogicalTableScan(table=[[a]])",
+          "\n      LogicalAggregate(group=[{}], agg#0=[$SUM0($2)], agg#1=[COUNT()])",
+          "\n        LogicalFilter(condition=[AND(>=($2, 0), =($1, 'pink floyd'))])",
+          "\n          LogicalTableScan(table=[[a]])",
           "\n"
         ]
       },
@@ -89,10 +86,9 @@
           "\nLogicalProject(EXPR$0=[CASE(=($1, 0), null:BIGINT, $0)], EXPR$1=[$1])",
           "\n  LogicalAggregate(group=[{}], agg#0=[$SUM0($0)], agg#1=[COUNT($1)])",
           "\n    PinotLogicalExchange(distribution=[hash])",
-          "\n      LogicalAggregate(group=[{}], agg#0=[$SUM0($1)], agg#1=[COUNT()])",
-          "\n        LogicalProject(col2=[$1], col3=[$2])",
-          "\n          LogicalFilter(condition=[AND(>=($2, 0), =($1, 'a'))])",
-          "\n            LogicalTableScan(table=[[a]])",
+          "\n      LogicalAggregate(group=[{}], agg#0=[$SUM0($2)], agg#1=[COUNT()])",
+          "\n        LogicalFilter(condition=[AND(>=($2, 0), =($1, 'a'))])",
+          "\n          LogicalTableScan(table=[[a]])",
           "\n"
         ]
       },
@@ -104,10 +100,9 @@
           "\nLogicalProject(sum=[CASE(=($1, 0), null:BIGINT, $0)], count=[$1])",
           "\n  LogicalAggregate(group=[{}], agg#0=[$SUM0($0)], agg#1=[COUNT($1)])",
           "\n    PinotLogicalExchange(distribution=[hash])",
-          "\n      LogicalAggregate(group=[{}], agg#0=[$SUM0($1)], agg#1=[COUNT()])",
-          "\n        LogicalProject(col2=[$1], col3=[$2])",
-          "\n          LogicalFilter(condition=[AND(>=($2, 0), =($1, 'pink floyd'))])",
-          "\n            LogicalTableScan(table=[[a]])",
+          "\n      LogicalAggregate(group=[{}], agg#0=[$SUM0($2)], agg#1=[COUNT()])",
+          "\n        LogicalFilter(condition=[AND(>=($2, 0), =($1, 'pink floyd'))])",
+          "\n          LogicalTableScan(table=[[a]])",
           "\n"
         ]
       },
@@ -119,10 +114,9 @@
           "\nLogicalProject(avg=[/(CAST(CASE(=($1, 0), null:BIGINT, $0)):DOUBLE, $1)], count=[$1])",
           "\n  LogicalAggregate(group=[{}], agg#0=[$SUM0($0)], agg#1=[COUNT($1)])",
           "\n    PinotLogicalExchange(distribution=[hash])",
-          "\n      LogicalAggregate(group=[{}], agg#0=[$SUM0($1)], agg#1=[COUNT()])",
-          "\n        LogicalProject(col2=[$1], col3=[$2])",
-          "\n          LogicalFilter(condition=[AND(>=($2, 0), =($1, 'pink floyd'))])",
-          "\n            LogicalTableScan(table=[[a]])",
+          "\n      LogicalAggregate(group=[{}], agg#0=[$SUM0($2)], agg#1=[COUNT()])",
+          "\n        LogicalFilter(condition=[AND(>=($2, 0), =($1, 'pink floyd'))])",
+          "\n          LogicalTableScan(table=[[a]])",
           "\n"
         ]
       },
@@ -134,10 +128,9 @@
           "\nLogicalProject(sum=[CASE(=($1, 0), null:BIGINT, $0)], count=[$1])",
           "\n  LogicalAggregate(group=[{}], agg#0=[$SUM0($0)], agg#1=[COUNT($1)])",
           "\n    PinotLogicalExchange(distribution=[hash])",
-          "\n      LogicalAggregate(group=[{}], agg#0=[$SUM0($1)], agg#1=[COUNT()])",
-          "\n        LogicalProject(col2=[$1], col3=[$2])",
-          "\n          LogicalFilter(condition=[AND(>=($2, 0), =($1, 'pink floyd'))])",
-          "\n            LogicalTableScan(table=[[a]])",
+          "\n      LogicalAggregate(group=[{}], agg#0=[$SUM0($2)], agg#1=[COUNT()])",
+          "\n        LogicalFilter(condition=[AND(>=($2, 0), =($1, 'pink floyd'))])",
+          "\n          LogicalTableScan(table=[[a]])",
           "\n"
         ]
       }
diff --git a/pinot-query-planner/src/test/resources/queries/ExplainPhysicalPlans.json b/pinot-query-planner/src/test/resources/queries/ExplainPhysicalPlans.json
index 37b338da16..1e85e4ca4e 100644
--- a/pinot-query-planner/src/test/resources/queries/ExplainPhysicalPlans.json
+++ b/pinot-query-planner/src/test/resources/queries/ExplainPhysicalPlans.json
@@ -337,15 +337,15 @@
           "                        └── [2]@localhost:1|[1] MAIL_RECEIVE(BROADCAST_DISTRIBUTED)\n",
           "                            ├── [3]@localhost:1|[1] MAIL_SEND(BROADCAST_DISTRIBUTED)->{[2]@localhost:1|[0, 1],[2]@localhost:2|[2, 3]} (Subtree Omitted)\n",
           "                            └── [3]@localhost:2|[0] MAIL_SEND(BROADCAST_DISTRIBUTED)->{[2]@localhost:1|[0, 1],[2]@localhost:2|[2, 3]}\n",
-          "                                └── [3]@localhost:2|[0] FILTER\n",
-          "                                    └── [3]@localhost:2|[0] AGGREGATE_FINAL\n",
-          "                                        └── [3]@localhost:2|[0] MAIL_RECEIVE(HASH_DISTRIBUTED)\n",
-          "                                            ├── [4]@localhost:2|[2] MAIL_SEND(HASH_DISTRIBUTED)->{[3]@localhost:1|[1],[3]@localhost:2|[0]} (Subtree Omitted)\n",
-          "                                            ├── [4]@localhost:2|[3] MAIL_SEND(HASH_DISTRIBUTED)->{[3]@localhost:1|[1],[3]@localhost:2|[0]} (Subtree Omitted)\n",
-          "                                            ├── [4]@localhost:1|[0] MAIL_SEND(HASH_DISTRIBUTED)->{[3]@localhost:1|[1],[3]@localhost:2|[0]} (Subtree Omitted)\n",
-          "                                            └── [4]@localhost:1|[1] MAIL_SEND(HASH_DISTRIBUTED)->{[3]@localhost:1|[1],[3]@localhost:2|[0]}\n",
-          "                                                └── [4]@localhost:1|[1] AGGREGATE_LEAF\n",
-          "                                                    └── [4]@localhost:1|[1] PROJECT\n",
+          "                                └── [3]@localhost:2|[0] PROJECT\n",
+          "                                    └── [3]@localhost:2|[0] FILTER\n",
+          "                                        └── [3]@localhost:2|[0] AGGREGATE_FINAL\n",
+          "                                            └── [3]@localhost:2|[0] MAIL_RECEIVE(HASH_DISTRIBUTED)\n",
+          "                                                ├── [4]@localhost:2|[2] MAIL_SEND(HASH_DISTRIBUTED)->{[3]@localhost:1|[1],[3]@localhost:2|[0]} (Subtree Omitted)\n",
+          "                                                ├── [4]@localhost:2|[3] MAIL_SEND(HASH_DISTRIBUTED)->{[3]@localhost:1|[1],[3]@localhost:2|[0]} (Subtree Omitted)\n",
+          "                                                ├── [4]@localhost:1|[0] MAIL_SEND(HASH_DISTRIBUTED)->{[3]@localhost:1|[1],[3]@localhost:2|[0]} (Subtree Omitted)\n",
+          "                                                └── [4]@localhost:1|[1] MAIL_SEND(HASH_DISTRIBUTED)->{[3]@localhost:1|[1],[3]@localhost:2|[0]}\n",
+          "                                                    └── [4]@localhost:1|[1] AGGREGATE_LEAF\n",
           "                                                        └── [4]@localhost:1|[1] FILTER\n",
           "                                                            └── [4]@localhost:1|[1] TABLE SCAN (b) null\n",
           ""
@@ -373,15 +373,15 @@
           "                            ├── [3]@localhost:2|[3] MAIL_SEND(BROADCAST_DISTRIBUTED)->{[2]@localhost:1|[0, 1],[2]@localhost:2|[2, 3]} (Subtree Omitted)\n",
           "                            ├── [3]@localhost:1|[0] MAIL_SEND(BROADCAST_DISTRIBUTED)->{[2]@localhost:1|[0, 1],[2]@localhost:2|[2, 3]} (Subtree Omitted)\n",
           "                            └── [3]@localhost:1|[1] MAIL_SEND(BROADCAST_DISTRIBUTED)->{[2]@localhost:1|[0, 1],[2]@localhost:2|[2, 3]}\n",
-          "                                └── [3]@localhost:1|[1] FILTER\n",
-          "                                    └── [3]@localhost:1|[1] AGGREGATE_FINAL\n",
-          "                                        └── [3]@localhost:1|[1] MAIL_RECEIVE(HASH_DISTRIBUTED)\n",
-          "                                            ├── [4]@localhost:2|[2] MAIL_SEND(HASH_DISTRIBUTED)[PARTITIONED]->{[3]@localhost:2|[2]} (Subtree Omitted)\n",
-          "                                            ├── [4]@localhost:2|[3] MAIL_SEND(HASH_DISTRIBUTED)[PARTITIONED]->{[3]@localhost:2|[3]} (Subtree Omitted)\n",
-          "                                            ├── [4]@localhost:1|[0] MAIL_SEND(HASH_DISTRIBUTED)[PARTITIONED]->{[3]@localhost:1|[0]} (Subtree Omitted)\n",
-          "                                            └── [4]@localhost:1|[1] MAIL_SEND(HASH_DISTRIBUTED)[PARTITIONED]->{[3]@localhost:1|[1]}\n",
-          "                                                └── [4]@localhost:1|[1] AGGREGATE_LEAF\n",
-          "                                                    └── [4]@localhost:1|[1] PROJECT\n",
+          "                                └── [3]@localhost:1|[1] PROJECT\n",
+          "                                    └── [3]@localhost:1|[1] FILTER\n",
+          "                                        └── [3]@localhost:1|[1] AGGREGATE_FINAL\n",
+          "                                            └── [3]@localhost:1|[1] MAIL_RECEIVE(HASH_DISTRIBUTED)\n",
+          "                                                ├── [4]@localhost:2|[2] MAIL_SEND(HASH_DISTRIBUTED)[PARTITIONED]->{[3]@localhost:2|[2]} (Subtree Omitted)\n",
+          "                                                ├── [4]@localhost:2|[3] MAIL_SEND(HASH_DISTRIBUTED)[PARTITIONED]->{[3]@localhost:2|[3]} (Subtree Omitted)\n",
+          "                                                ├── [4]@localhost:1|[0] MAIL_SEND(HASH_DISTRIBUTED)[PARTITIONED]->{[3]@localhost:1|[0]} (Subtree Omitted)\n",
+          "                                                └── [4]@localhost:1|[1] MAIL_SEND(HASH_DISTRIBUTED)[PARTITIONED]->{[3]@localhost:1|[1]}\n",
+          "                                                    └── [4]@localhost:1|[1] AGGREGATE_LEAF\n",
           "                                                        └── [4]@localhost:1|[1] FILTER\n",
           "                                                            └── [4]@localhost:1|[1] TABLE SCAN (b) null\n",
           ""
diff --git a/pinot-query-planner/src/test/resources/queries/GroupByPlans.json b/pinot-query-planner/src/test/resources/queries/GroupByPlans.json
index 0b19599217..690acbd01f 100644
--- a/pinot-query-planner/src/test/resources/queries/GroupByPlans.json
+++ b/pinot-query-planner/src/test/resources/queries/GroupByPlans.json
@@ -34,9 +34,8 @@
           "\nLogicalAggregate(group=[{0}], agg#0=[$SUM0($1)])",
           "\n  PinotLogicalExchange(distribution=[hash[0]])",
           "\n    LogicalAggregate(group=[{0}], agg#0=[$SUM0($2)])",
-          "\n      LogicalProject(col1=[$0], col2=[$1], col3=[$2])",
-          "\n        LogicalFilter(condition=[AND(>=($2, 0), =($1, 'a'))])",
-          "\n          LogicalTableScan(table=[[a]])",
+          "\n      LogicalFilter(condition=[AND(>=($2, 0), =($1, 'a'))])",
+          "\n        LogicalTableScan(table=[[a]])",
           "\n"
         ]
       },
@@ -49,9 +48,8 @@
           "\nLogicalAggregate(group=[{0}], agg#0=[COUNT($1)])",
           "\n  PinotLogicalExchange(distribution=[hash[0]])",
           "\n    LogicalAggregate(group=[{0}], agg#0=[COUNT()])",
-          "\n      LogicalProject(col1=[$0], col2=[$1], col3=[$2])",
-          "\n        LogicalFilter(condition=[AND(>=($2, 0), =($1, 'a'))])",
-          "\n          LogicalTableScan(table=[[a]])",
+          "\n      LogicalFilter(condition=[AND(>=($2, 0), =($1, 'a'))])",
+          "\n        LogicalTableScan(table=[[a]])",
           "\n"
         ]
       },
@@ -64,9 +62,8 @@
           "\n  LogicalAggregate(group=[{0, 1}], agg#0=[$SUM0($2)])",
           "\n    PinotLogicalExchange(distribution=[hash[0, 1]])",
           "\n      LogicalAggregate(group=[{0, 1}], agg#0=[$SUM0($2)])",
-          "\n        LogicalProject(col1=[$0], col2=[$1], col3=[$2])",
-          "\n          LogicalFilter(condition=[AND(>=($2, 0), =($0, 'a'))])",
-          "\n            LogicalTableScan(table=[[a]])",
+          "\n        LogicalFilter(condition=[AND(>=($2, 0), =($0, 'a'))])",
+          "\n          LogicalTableScan(table=[[a]])",
           "\n"
         ]
       },
@@ -80,9 +77,8 @@
           "\n    LogicalAggregate(group=[{0}], agg#0=[COUNT($1)], agg#1=[$SUM0($2)], agg#2=[MAX($3)], agg#3=[MIN($4)])",
           "\n      PinotLogicalExchange(distribution=[hash[0]])",
           "\n        LogicalAggregate(group=[{0}], agg#0=[COUNT()], agg#1=[$SUM0($2)], agg#2=[MAX($2)], agg#3=[MIN($2)])",
-          "\n          LogicalProject(col1=[$0], col2=[$1], col3=[$2])",
-          "\n            LogicalFilter(condition=[AND(>=($2, 0), =($1, 'a'))])",
-          "\n              LogicalTableScan(table=[[a]])",
+          "\n          LogicalFilter(condition=[AND(>=($2, 0), =($1, 'a'))])",
+          "\n            LogicalTableScan(table=[[a]])",
           "\n"
         ]
       },
@@ -96,9 +92,8 @@
           "\n    LogicalAggregate(group=[{0}], agg#0=[COUNT($1)], agg#1=[$SUM0($2)], agg#2=[MAX($3)], agg#3=[MIN($4)])",
           "\n      PinotLogicalExchange(distribution=[hash[0]])",
           "\n        LogicalAggregate(group=[{0}], agg#0=[COUNT()], agg#1=[$SUM0($2)], agg#2=[MAX($2)], agg#3=[MIN($2)])",
-          "\n          LogicalProject(col1=[$0], col2=[$1], col3=[$2])",
-          "\n            LogicalFilter(condition=[AND(>=($2, 0), =($1, 'a'))])",
-          "\n              LogicalTableScan(table=[[a]])",
+          "\n          LogicalFilter(condition=[AND(>=($2, 0), =($1, 'a'))])",
+          "\n            LogicalTableScan(table=[[a]])",
           "\n"
         ]
       },
@@ -145,9 +140,9 @@
         "sql": "EXPLAIN PLAN FOR SELECT /*+ aggOptions(is_skip_leaf_stage_group_by='true') */ a.col1, SUM(a.col3) FROM a WHERE a.col3 >= 0 AND a.col2 = 'a' GROUP BY a.col1",
         "output": [
           "Execution Plan",
-          "\nLogicalAggregate(group=[{0}], EXPR$1=[$SUM0($2)])",
+          "\nLogicalAggregate(group=[{0}], EXPR$1=[$SUM0($1)])",
           "\n  PinotLogicalExchange(distribution=[hash[0]])",
-          "\n    LogicalProject(col1=[$0], col2=[$1], col3=[$2])",
+          "\n    LogicalProject(col1=[$0], col3=[$2])",
           "\n      LogicalFilter(condition=[AND(>=($2, 0), =($1, 'a'))])",
           "\n        LogicalTableScan(table=[[a]])",
           "\n"
@@ -158,9 +153,9 @@
         "sql": "EXPLAIN PLAN FOR SELECT /*+ aggOptions(is_skip_leaf_stage_group_by='true') */ a.col1, SUM(a.col3), MAX(a.col3) FROM a WHERE a.col3 >= 0 AND a.col2 = 'a' GROUP BY a.col1",
         "output": [
           "Execution Plan",
-          "\nLogicalAggregate(group=[{0}], EXPR$1=[$SUM0($2)], EXPR$2=[MAX($2)])",
+          "\nLogicalAggregate(group=[{0}], EXPR$1=[$SUM0($1)], EXPR$2=[MAX($1)])",
           "\n  PinotLogicalExchange(distribution=[hash[0]])",
-          "\n    LogicalProject(col1=[$0], col2=[$1], col3=[$2])",
+          "\n    LogicalProject(col1=[$0], col3=[$2])",
           "\n      LogicalFilter(condition=[AND(>=($2, 0), =($1, 'a'))])",
           "\n        LogicalTableScan(table=[[a]])",
           "\n"
@@ -174,7 +169,7 @@
           "Execution Plan",
           "\nLogicalAggregate(group=[{0}], EXPR$1=[COUNT()])",
           "\n  PinotLogicalExchange(distribution=[hash[0]])",
-          "\n    LogicalProject(col1=[$0], col2=[$1], col3=[$2])",
+          "\n    LogicalProject(col1=[$0])",
           "\n      LogicalFilter(condition=[AND(>=($2, 0), =($1, 'a'))])",
           "\n        LogicalTableScan(table=[[a]])",
           "\n"
@@ -201,9 +196,9 @@
           "Execution Plan",
           "\nLogicalProject(col1=[$0], EXPR$1=[$1], EXPR$2=[$2])",
           "\n  LogicalFilter(condition=[AND(>($1, 10), >=($3, 0), <($4, 20), <=($2, 10), =(/(CAST($2):DOUBLE NOT NULL, $1), 5))])",
-          "\n    LogicalAggregate(group=[{0}], EXPR$1=[COUNT()], EXPR$2=[$SUM0($2)], agg#2=[MAX($2)], agg#3=[MIN($2)])",
+          "\n    LogicalAggregate(group=[{0}], EXPR$1=[COUNT()], EXPR$2=[$SUM0($1)], agg#2=[MAX($1)], agg#3=[MIN($1)])",
           "\n      PinotLogicalExchange(distribution=[hash[0]])",
-          "\n        LogicalProject(col1=[$0], col2=[$1], col3=[$2])",
+          "\n        LogicalProject(col1=[$0], col3=[$2])",
           "\n          LogicalFilter(condition=[AND(>=($2, 0), =($1, 'a'))])",
           "\n            LogicalTableScan(table=[[a]])",
           "\n"
@@ -216,9 +211,9 @@
           "Execution Plan",
           "\nLogicalProject(col1=[$0], EXPR$1=[$1])",
           "\n  LogicalFilter(condition=[AND(>=($2, 0), <($3, 20), <=($1, 10), =(/(CAST($1):DOUBLE NOT NULL, $4), 5))])",
-          "\n    LogicalAggregate(group=[{0}], EXPR$1=[$SUM0($2)], agg#1=[MAX($2)], agg#2=[MIN($2)], agg#3=[COUNT()])",
+          "\n    LogicalAggregate(group=[{0}], EXPR$1=[$SUM0($1)], agg#1=[MAX($1)], agg#2=[MIN($1)], agg#3=[COUNT()])",
           "\n      PinotLogicalExchange(distribution=[hash[0]])",
-          "\n        LogicalProject(col1=[$0], col2=[$1], col3=[$2])",
+          "\n        LogicalProject(col1=[$0], col3=[$2])",
           "\n          LogicalFilter(condition=[AND(>=($2, 0), =($1, 'a'))])",
           "\n            LogicalTableScan(table=[[a]])",
           "\n"
@@ -231,9 +226,9 @@
           "Execution Plan",
           "\nLogicalProject(value1=[$0], count=[$1], SUM=[$2])",
           "\n  LogicalFilter(condition=[AND(>($1, 10), >=($3, 0), <($4, 20), <=($2, 10), =(/(CAST($2):DOUBLE NOT NULL, $1), 5))])",
-          "\n    LogicalAggregate(group=[{0}], count=[COUNT()], SUM=[$SUM0($2)], agg#2=[MAX($2)], agg#3=[MIN($2)])",
+          "\n    LogicalAggregate(group=[{0}], count=[COUNT()], SUM=[$SUM0($1)], agg#2=[MAX($1)], agg#3=[MIN($1)])",
           "\n      PinotLogicalExchange(distribution=[hash[0]])",
-          "\n        LogicalProject(col1=[$0], col2=[$1], col3=[$2])",
+          "\n        LogicalProject(col1=[$0], col3=[$2])",
           "\n          LogicalFilter(condition=[AND(>=($2, 0), =($1, 'a'))])",
           "\n            LogicalTableScan(table=[[a]])",
           "\n"
diff --git a/pinot-query-planner/src/test/resources/queries/JoinPlans.json b/pinot-query-planner/src/test/resources/queries/JoinPlans.json
index 46489d8737..9ab9edebda 100644
--- a/pinot-query-planner/src/test/resources/queries/JoinPlans.json
+++ b/pinot-query-planner/src/test/resources/queries/JoinPlans.json
@@ -228,12 +228,13 @@
         "output": [
           "Execution Plan",
           "\nLogicalProject(col1=[$0], col2=[$1])",
-          "\n  LogicalJoin(condition=[=($2, $5)], joinType=[semi])",
+          "\n  LogicalJoin(condition=[=($2, $3)], joinType=[semi])",
           "\n    PinotLogicalExchange(distribution=[hash[2]])",
           "\n      LogicalProject(col1=[$0], col2=[$1], col3=[$2])",
           "\n        LogicalTableScan(table=[[a]])",
-          "\n    PinotLogicalExchange(distribution=[hash[2]])",
-          "\n      LogicalTableScan(table=[[b]])",
+          "\n    PinotLogicalExchange(distribution=[hash[0]])",
+          "\n      LogicalProject(col3=[$2])",
+          "\n        LogicalTableScan(table=[[b]])",
           "\n"
         ]
       },
@@ -243,25 +244,25 @@
         "output": [
           "Execution Plan",
           "\nLogicalProject(col1=[$0], col2=[$1])",
-          "\n  LogicalJoin(condition=[=($2, $4)], joinType=[semi])",
+          "\n  LogicalJoin(condition=[=($2, $3)], joinType=[semi])",
           "\n    PinotLogicalExchange(distribution=[hash[2]])",
-          "\n      LogicalJoin(condition=[=($2, $4)], joinType=[semi])",
+          "\n      LogicalJoin(condition=[=($2, $3)], joinType=[semi])",
           "\n        PinotLogicalExchange(distribution=[hash[2]])",
-          "\n          LogicalJoin(condition=[=($2, $4)], joinType=[semi])",
+          "\n          LogicalJoin(condition=[=($2, $3)], joinType=[semi])",
           "\n            PinotLogicalExchange(distribution=[hash[2]])",
           "\n              LogicalProject(col1=[$0], col2=[$1], col3=[$2])",
           "\n                LogicalFilter(condition=[=($1, 'test')])",
           "\n                  LogicalTableScan(table=[[a]])",
-          "\n            PinotLogicalExchange(distribution=[hash[1]])",
-          "\n              LogicalProject(col1=[$0], col3=[$2])",
+          "\n            PinotLogicalExchange(distribution=[hash[0]])",
+          "\n              LogicalProject(col3=[$2])",
           "\n                LogicalFilter(condition=[=($0, 'foo')])",
           "\n                  LogicalTableScan(table=[[b]])",
-          "\n        PinotLogicalExchange(distribution=[hash[1]])",
-          "\n          LogicalProject(col1=[$0], col3=[$2])",
+          "\n        PinotLogicalExchange(distribution=[hash[0]])",
+          "\n          LogicalProject(col3=[$2])",
           "\n            LogicalFilter(condition=[=($0, 'bar')])",
           "\n              LogicalTableScan(table=[[b]])",
-          "\n    PinotLogicalExchange(distribution=[hash[1]])",
-          "\n      LogicalProject(col1=[$0], col3=[$2])",
+          "\n    PinotLogicalExchange(distribution=[hash[0]])",
+          "\n      LogicalProject(col3=[$2])",
           "\n        LogicalFilter(condition=[=($0, 'foobar')])",
           "\n          LogicalTableScan(table=[[b]])",
           "\n"
@@ -427,16 +428,16 @@
         "sql": "EXPLAIN PLAN FOR WITH tmp1 AS ( SELECT * FROM a WHERE col2 NOT IN ('foo', 'bar') ), tmp2 AS ( SELECT * FROM b WHERE col1 IN (SELECT col1 FROM tmp1) AND col3 < 100 ) SELECT * FROM tmp2 WHERE col3 IN (SELECT col3 from tmp1)",
         "output": [
           "Execution Plan",
-          "\nLogicalJoin(condition=[=($2, $8)], joinType=[semi])",
+          "\nLogicalJoin(condition=[=($2, $7)], joinType=[semi])",
           "\n  LogicalJoin(condition=[=($0, $7)], joinType=[semi])",
           "\n    LogicalFilter(condition=[<($2, 100)])",
           "\n      LogicalTableScan(table=[[b]])",
           "\n    PinotLogicalExchange(distribution=[broadcast], relExchangeType=[PIPELINE_BREAKER])",
-          "\n      LogicalProject(col1=[$0], col2=[$1])",
+          "\n      LogicalProject(col1=[$0])",
           "\n        LogicalFilter(condition=[AND(<>($1, 'bar'), <>($1, 'foo'))])",
           "\n          LogicalTableScan(table=[[a]])",
           "\n  PinotLogicalExchange(distribution=[broadcast], relExchangeType=[PIPELINE_BREAKER])",
-          "\n    LogicalProject(col2=[$1], col3=[$2])",
+          "\n    LogicalProject(col3=[$2])",
           "\n      LogicalFilter(condition=[AND(<>($1, 'bar'), <>($1, 'foo'))])",
           "\n        LogicalTableScan(table=[[a]])",
           "\n"
diff --git a/pinot-query-planner/src/test/resources/queries/PinotHintablePlans.json b/pinot-query-planner/src/test/resources/queries/PinotHintablePlans.json
index 6b07bdf9ee..e5f435fccf 100644
--- a/pinot-query-planner/src/test/resources/queries/PinotHintablePlans.json
+++ b/pinot-query-planner/src/test/resources/queries/PinotHintablePlans.json
@@ -29,7 +29,7 @@
           "\n  LogicalProject(col1=[$0], col2=[$1])",
           "\n    LogicalTableScan(table=[[a]])",
           "\n  PinotLogicalExchange(distribution=[broadcast], relExchangeType=[PIPELINE_BREAKER])",
-          "\n    LogicalProject(col2=[$1], col3=[$2])",
+          "\n    LogicalProject(col2=[$1])",
           "\n      LogicalFilter(condition=[>($2, 0)])",
           "\n        LogicalTableScan(table=[[b]])",
           "\n"
@@ -45,11 +45,11 @@
           "\n    LogicalProject(col1=[$0], col2=[$1])",
           "\n      LogicalTableScan(table=[[a]])",
           "\n    PinotLogicalExchange(distribution=[broadcast], relExchangeType=[PIPELINE_BREAKER])",
-          "\n      LogicalProject(col2=[$1], col3=[$2])",
+          "\n      LogicalProject(col2=[$1])",
           "\n        LogicalFilter(condition=[>($2, 0)])",
           "\n          LogicalTableScan(table=[[b]])",
           "\n  PinotLogicalExchange(distribution=[broadcast], relExchangeType=[PIPELINE_BREAKER])",
-          "\n    LogicalProject(col1=[$0], col3=[$2])",
+          "\n    LogicalProject(col1=[$0])",
           "\n      LogicalFilter(condition=[>($2, 0)])",
           "\n        LogicalTableScan(table=[[c]])",
           "\n"
@@ -68,11 +68,11 @@
           "\n          LogicalProject(col1=[$0], col2=[$1], col3=[$2])",
           "\n            LogicalTableScan(table=[[a]])",
           "\n          PinotLogicalExchange(distribution=[broadcast], relExchangeType=[PIPELINE_BREAKER])",
-          "\n            LogicalProject(col2=[$1], col3=[$2])",
+          "\n            LogicalProject(col2=[$1])",
           "\n              LogicalFilter(condition=[>($2, 0)])",
           "\n                LogicalTableScan(table=[[b]])",
           "\n        PinotLogicalExchange(distribution=[broadcast], relExchangeType=[PIPELINE_BREAKER])",
-          "\n          LogicalProject(col1=[$0], col3=[$2])",
+          "\n          LogicalProject(col1=[$0])",
           "\n            LogicalFilter(condition=[>($2, 0)])",
           "\n              LogicalTableScan(table=[[c]])",
           "\n"
@@ -88,7 +88,7 @@
           "\n    LogicalProject(col1=[$0], col3=[$2])",
           "\n      LogicalTableScan(table=[[a]])",
           "\n    PinotLogicalExchange(distribution=[broadcast], relExchangeType=[PIPELINE_BREAKER])",
-          "\n      LogicalProject(col2=[$1], col3=[$2])",
+          "\n      LogicalProject(col2=[$1])",
           "\n        LogicalFilter(condition=[>($2, 0)])",
           "\n          LogicalTableScan(table=[[b]])",
           "\n"
@@ -106,7 +106,7 @@
           "\n        LogicalProject(col1=[$0], col2=[$1], col3=[$2])",
           "\n          LogicalTableScan(table=[[a]])",
           "\n        PinotLogicalExchange(distribution=[broadcast], relExchangeType=[PIPELINE_BREAKER])",
-          "\n          LogicalProject(col2=[$1], col3=[$2])",
+          "\n          LogicalProject(col2=[$1])",
           "\n            LogicalFilter(condition=[>($2, 0)])",
           "\n              LogicalTableScan(table=[[b]])",
           "\n"
@@ -280,7 +280,7 @@
           "\n        LogicalProject(col2=[$1], col3=[$2])",
           "\n          LogicalTableScan(table=[[a]])",
           "\n        PinotLogicalExchange(distribution=[broadcast], relExchangeType=[PIPELINE_BREAKER])",
-          "\n          LogicalProject(col1=[$0], col3=[$2])",
+          "\n          LogicalProject(col1=[$0])",
           "\n            LogicalFilter(condition=[>($2, 0)])",
           "\n              LogicalTableScan(table=[[b]])",
           "\n"
@@ -298,7 +298,7 @@
           "\n        LogicalProject(col2=[$1], col3=[$2])",
           "\n          LogicalTableScan(table=[[a]])",
           "\n        PinotLogicalExchange(distribution=[broadcast], relExchangeType=[PIPELINE_BREAKER])",
-          "\n          LogicalProject(col1=[$0], col3=[$2])",
+          "\n          LogicalProject(col1=[$0])",
           "\n            LogicalFilter(condition=[>($2, 0)])",
           "\n              LogicalTableScan(table=[[b]])",
           "\n"
@@ -316,7 +316,7 @@
           "\n        LogicalProject(col1=[$0], col2=[$1], col3=[$2])",
           "\n          LogicalTableScan(table=[[a]])",
           "\n        PinotLogicalExchange(distribution=[broadcast], relExchangeType=[PIPELINE_BREAKER])",
-          "\n          LogicalProject(col1=[$0], col3=[$2])",
+          "\n          LogicalProject(col1=[$0])",
           "\n            LogicalFilter(condition=[>($2, 0)])",
           "\n              LogicalTableScan(table=[[b]])",
           "\n"
@@ -334,11 +334,11 @@
           "\n        LogicalProject(col1=[$0], col2=[$1], col3=[$2])",
           "\n          LogicalTableScan(table=[[a]])",
           "\n        PinotLogicalExchange(distribution=[broadcast], relExchangeType=[PIPELINE_BREAKER])",
-          "\n          LogicalFilter(condition=[>($1, 1)])",
-          "\n            LogicalAggregate(group=[{0}], agg#0=[COUNT($1)])",
-          "\n              PinotLogicalExchange(distribution=[hash[0]])",
-          "\n                LogicalAggregate(group=[{0}], agg#0=[COUNT()])",
-          "\n                  LogicalProject(col1=[$0], col3=[$2])",
+          "\n          LogicalProject(col1=[$0])",
+          "\n            LogicalFilter(condition=[>($1, 1)])",
+          "\n              LogicalAggregate(group=[{0}], agg#0=[COUNT($1)])",
+          "\n                PinotLogicalExchange(distribution=[hash[0]])",
+          "\n                  LogicalAggregate(group=[{0}], agg#0=[COUNT()])",
           "\n                    LogicalFilter(condition=[>($2, 0)])",
           "\n                      LogicalTableScan(table=[[b]])",
           "\n"
@@ -358,7 +358,7 @@
           "\n            LogicalProject(col1=[$0], col2=[$1], col3=[$2])",
           "\n              LogicalTableScan(table=[[a]])",
           "\n            PinotLogicalExchange(distribution=[broadcast], relExchangeType=[PIPELINE_BREAKER])",
-          "\n              LogicalProject(col1=[$0], col3=[$2])",
+          "\n              LogicalProject(col1=[$0])",
           "\n                LogicalFilter(condition=[>($2, 0)])",
           "\n                  LogicalTableScan(table=[[b]])",
           "\n"
@@ -379,7 +379,7 @@
           "\n              LogicalProject(col1=[$0], col2=[$1], col3=[$2])",
           "\n                LogicalTableScan(table=[[a]])",
           "\n              PinotLogicalExchange(distribution=[broadcast], relExchangeType=[PIPELINE_BREAKER])",
-          "\n                LogicalProject(col1=[$0], col3=[$2])",
+          "\n                LogicalProject(col1=[$0])",
           "\n                  LogicalFilter(condition=[>($2, 0)])",
           "\n                    LogicalTableScan(table=[[b]])",
           "\n"
@@ -395,7 +395,7 @@
           "\n    LogicalProject(col2=[$1], col3=[$2])",
           "\n      LogicalTableScan(table=[[a]])",
           "\n    PinotLogicalExchange(distribution=[broadcast], relExchangeType=[PIPELINE_BREAKER])",
-          "\n      LogicalProject(col1=[$0], col3=[$2])",
+          "\n      LogicalProject(col1=[$0])",
           "\n        LogicalFilter(condition=[>($2, 0)])",
           "\n          LogicalTableScan(table=[[b]])",
           "\n"
@@ -411,7 +411,7 @@
           "\n    LogicalProject(col2=[$1], col3=[$2])",
           "\n      LogicalTableScan(table=[[a]])",
           "\n    PinotLogicalExchange(distribution=[broadcast], relExchangeType=[PIPELINE_BREAKER])",
-          "\n      LogicalProject(col1=[$0], col3=[$2])",
+          "\n      LogicalProject(col1=[$0])",
           "\n        LogicalFilter(condition=[>($2, 0)])",
           "\n          LogicalTableScan(table=[[b]])",
           "\n"
@@ -429,7 +429,7 @@
           "\n        LogicalProject(col2=[$1], col3=[$2])",
           "\n          LogicalTableScan(table=[[a]])",
           "\n        PinotLogicalExchange(distribution=[broadcast], relExchangeType=[PIPELINE_BREAKER])",
-          "\n          LogicalProject(col1=[$0], col3=[$2])",
+          "\n          LogicalProject(col1=[$0])",
           "\n            LogicalFilter(condition=[>($2, 0)])",
           "\n              LogicalTableScan(table=[[b]])",
           "\n"
@@ -448,7 +448,7 @@
           "\n          LogicalProject(col2=[$1], col3=[$2])",
           "\n            LogicalTableScan(table=[[a]])",
           "\n          PinotLogicalExchange(distribution=[broadcast], relExchangeType=[PIPELINE_BREAKER])",
-          "\n            LogicalProject(col1=[$0], col3=[$2])",
+          "\n            LogicalProject(col1=[$0])",
           "\n              LogicalFilter(condition=[>($2, 0)])",
           "\n                LogicalTableScan(table=[[b]])",
           "\n"
@@ -466,7 +466,7 @@
           "\n        LogicalProject(col2=[$1])",
           "\n          LogicalTableScan(table=[[a]])",
           "\n        PinotLogicalExchange(distribution=[broadcast], relExchangeType=[PIPELINE_BREAKER])",
-          "\n          LogicalProject(col1=[$0], col3=[$2])",
+          "\n          LogicalProject(col1=[$0])",
           "\n            LogicalFilter(condition=[>($2, 0)])",
           "\n              LogicalTableScan(table=[[b]])",
           "\n"
@@ -485,7 +485,7 @@
           "\n          LogicalProject(col1=[$0], col2=[$1], col3=[$2])",
           "\n            LogicalTableScan(table=[[a]])",
           "\n          PinotLogicalExchange(distribution=[broadcast], relExchangeType=[PIPELINE_BREAKER])",
-          "\n            LogicalProject(col1=[$0], col3=[$2])",
+          "\n            LogicalProject(col1=[$0])",
           "\n              LogicalFilter(condition=[>($2, 0)])",
           "\n                LogicalTableScan(table=[[b]])",
           "\n"


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org