You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@calcite.apache.org by GitBox <gi...@apache.org> on 2021/03/15 07:44:42 UTC

[GitHub] [calcite] vlsi commented on a change in pull request #2370: [CALCITE-3366] RelDecorrelator supports Union (Xu Zhaohui & Jin Xing)

vlsi commented on a change in pull request #2370:
URL: https://github.com/apache/calcite/pull/2370#discussion_r594108497



##########
File path: core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
##########
@@ -11699,6 +11699,249 @@ LogicalProject(JOB=[$1])
 ]]>
         </Resource>
     </TestCase>
+  <TestCase name="testDecorrelateUnion0">
+    <Resource name="sql">
+      <![CDATA[SELECT deptno FROM dept where exists
+(SELECT 1 FROM emp where sal < 100 and emp.deptno=dept.deptno
+UNION ALL
+SELECT 1 FROM bonus where bonus.comm=dept.deptno)"]]>
+    </Resource>
+    <Resource name="planBefore">
+      <![CDATA[
+LogicalProject(DEPTNO=[$0])
+  LogicalFilter(condition=[IS NOT NULL($2)])
+    LogicalCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{0}])
+      LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+      LogicalAggregate(group=[{}], agg#0=[MIN($0)])
+        LogicalProject($f0=[true])
+          LogicalUnion(all=[true])
+            LogicalProject(EXPR$0=[1])
+              LogicalFilter(condition=[AND(<($5, 100), =($7, $cor0.DEPTNO))])
+                LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+            LogicalProject(EXPR$0=[1])
+              LogicalFilter(condition=[=($3, $cor0.DEPTNO)])
+                LogicalTableScan(table=[[CATALOG, SALES, BONUS]])
+]]>
+    </Resource>
+    <Resource name="planMid">
+      <![CDATA[
+LogicalProject(DEPTNO=[$0])
+  LogicalFilter(condition=[IS NOT NULL($2)])
+    LogicalCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{0}])
+      LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+      LogicalAggregate(group=[{}], agg#0=[MIN($0)])
+        LogicalProject($f0=[true])
+          LogicalUnion(all=[true])
+            LogicalProject(EXPR$0=[1])
+              LogicalFilter(condition=[AND(<($5, 100), =($7, $cor0.DEPTNO))])
+                LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+            LogicalProject(EXPR$0=[1])
+              LogicalFilter(condition=[=($3, $cor0.DEPTNO)])
+                LogicalTableScan(table=[[CATALOG, SALES, BONUS]])
+]]>
+    </Resource>
+    <Resource name="planAfter">
+      <![CDATA[
+LogicalProject(DEPTNO=[$0])
+  LogicalProject(DEPTNO=[$0], NAME=[$1], DEPTNO0=[CAST($2):INTEGER], $f1=[CAST($3):BOOLEAN])
+    LogicalJoin(condition=[=($0, $2)], joinType=[inner])
+      LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+      LogicalAggregate(group=[{0}], agg#0=[MIN($1)])
+        LogicalProject(DEPTNO=[$1], $f0=[true])
+          LogicalUnion(all=[true])
+            LogicalProject(EXPR$0=[1], DEPTNO=[$7])
+              LogicalFilter(condition=[<($5, 100)])
+                LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+            LogicalProject(EXPR$0=[1], COMM=[$3])
+              LogicalTableScan(table=[[CATALOG, SALES, BONUS]])
+]]>
+    </Resource>
+  </TestCase>
+  <TestCase name="testDecorrelateUnion1">
+    <Resource name="sql">
+      <![CDATA[SELECT * FROM emp where exists
+(SELECT 1 FROM dept where deptno = emp.deptno
+UNION ALL
+SELECT 1 FROM dept where deptno + 1 = emp.deptno)]]>
+    </Resource>
+    <Resource name="planBefore">
+      <![CDATA[
+LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
+  LogicalFilter(condition=[IS NOT NULL($9)])
+    LogicalCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{7}])
+      LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+      LogicalAggregate(group=[{}], agg#0=[MIN($0)])
+        LogicalProject($f0=[true])
+          LogicalUnion(all=[true])
+            LogicalProject(EXPR$0=[1])
+              LogicalFilter(condition=[=($0, $cor0.DEPTNO)])
+                LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+            LogicalProject(EXPR$0=[1])
+              LogicalFilter(condition=[=(+($0, 1), $cor0.DEPTNO)])
+                LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+    </Resource>
+    <Resource name="planMid">
+      <![CDATA[
+LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
+  LogicalFilter(condition=[IS NOT NULL($9)])
+    LogicalCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{7}])
+      LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+      LogicalAggregate(group=[{}], agg#0=[MIN($0)])
+        LogicalProject($f0=[true])
+          LogicalUnion(all=[true])
+            LogicalProject(EXPR$0=[1])
+              LogicalFilter(condition=[=($0, $cor0.DEPTNO)])
+                LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+            LogicalProject(EXPR$0=[1])
+              LogicalFilter(condition=[=(+($0, 1), $cor0.DEPTNO)])
+                LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+    </Resource>
+    <Resource name="planAfter">
+      <![CDATA[
+LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
+  LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], DEPTNO0=[CAST($9):INTEGER], $f1=[CAST($10):BOOLEAN])
+    LogicalJoin(condition=[=($7, $9)], joinType=[inner])
+      LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+      LogicalAggregate(group=[{0}], agg#0=[MIN($1)])
+        LogicalProject(DEPTNO=[$1], $f0=[true])
+          LogicalUnion(all=[true])
+            LogicalProject(EXPR$0=[1], DEPTNO=[$0])
+              LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+            LogicalProject(EXPR$0=[1], $f2=[$2])
+              LogicalFilter(condition=[=(+($0, 1), $2)])
+                LogicalProject(DEPTNO=[$0], NAME=[$1], $f2=[+($0, 1)])
+                  LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+    </Resource>
+  </TestCase>
+  <TestCase name="testDecorrelateUnion2">
+    <Resource name="sql">
+      <![CDATA[SELECT * FROM emp where exists
+(SELECT 1 FROM dept where deptno = emp.deptno
+UNION ALL
+SELECT 1 FROM dept where name = emp.ename)]]>
+    </Resource>
+    <Resource name="planBefore">
+      <![CDATA[
+LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
+  LogicalFilter(condition=[IS NOT NULL($9)])
+    LogicalCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{1, 7}])
+      LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+      LogicalAggregate(group=[{}], agg#0=[MIN($0)])
+        LogicalProject($f0=[true])
+          LogicalUnion(all=[true])
+            LogicalProject(EXPR$0=[1])
+              LogicalFilter(condition=[=($0, $cor0.DEPTNO)])
+                LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+            LogicalProject(EXPR$0=[1])
+              LogicalFilter(condition=[=(CAST($1):VARCHAR(20) NOT NULL, $cor0.ENAME)])
+                LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+    </Resource>
+    <Resource name="planMid">
+      <![CDATA[
+LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
+  LogicalFilter(condition=[IS NOT NULL($9)])
+    LogicalCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{1, 7}])
+      LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+      LogicalAggregate(group=[{}], agg#0=[MIN($0)])
+        LogicalProject($f0=[true])
+          LogicalUnion(all=[true])
+            LogicalProject(EXPR$0=[1])
+              LogicalFilter(condition=[=($0, $cor0.DEPTNO)])
+                LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+            LogicalProject(EXPR$0=[1])
+              LogicalFilter(condition=[=(CAST($1):VARCHAR(20) NOT NULL, $cor0.ENAME)])
+                LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+    </Resource>
+    <Resource name="planAfter">
+      <![CDATA[
+LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
+  LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], ENAME0=[CAST($9):VARCHAR(20)], DEPTNO0=[CAST($10):INTEGER], $f2=[CAST($11):BOOLEAN])
+    LogicalJoin(condition=[AND(=($1, $9), =($7, $10))], joinType=[inner])
+      LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+      LogicalAggregate(group=[{0, 1}], agg#0=[MIN($2)])
+        LogicalProject(ENAME=[$1], DEPTNO=[$2], $f0=[true])
+          LogicalUnion(all=[true])
+            LogicalProject(EXPR$0=[$0], ENAME=[$2], DEPTNO=[$1])
+              LogicalJoin(condition=[true], joinType=[inner])
+                LogicalProject(EXPR$0=[1], DEPTNO=[$0])
+                  LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+                LogicalAggregate(group=[{0}])
+                  LogicalProject(ENAME=[$1])
+                    LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+            LogicalJoin(condition=[true], joinType=[inner])
+              LogicalProject(EXPR$0=[1], NAME0=[$2])
+                LogicalFilter(condition=[=(CAST($1):VARCHAR(20) NOT NULL, $2)])
+                  LogicalProject(DEPTNO=[$0], NAME=[$1], NAME0=[CAST($1):VARCHAR(20) NOT NULL])
+                    LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+              LogicalAggregate(group=[{0}])
+                LogicalProject(DEPTNO=[$7])
+                  LogicalTableScan(table=[[CATALOG, SALES, EMP]])

Review comment:
       This looks sad :(
   
   Have you considered decorellating as follows?
   In other words, make distinct "core variables" produce outer joins with overall where condition that selects at least one of them.
   
   ```sql
   SELECT *
     FROM emp e
     LEFT JOIN (select deptno from dept group by deptno) d1 on (d.deptno = e.deptno)
     LEFT JOIN (select name from dept group by name) d2 on (d.name = e.ename)
   WHERE d1.deptno is not null
      or d2.name is not null
   ```
   




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org