You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@calcite.apache.org by GitBox <gi...@apache.org> on 2021/05/26 08:34:57 UTC

[GitHub] [calcite] zabetak commented on a change in pull request #2370: [CALCITE-3366] RelDecorrelator supports Union (Xu Zhaohui & Jin Xing)

zabetak commented on a change in pull request #2370:
URL: https://github.com/apache/calcite/pull/2370#discussion_r639516358



##########
File path: core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
##########
@@ -11699,6 +11699,249 @@ LogicalProject(JOB=[$1])
 ]]>
         </Resource>
     </TestCase>
+  <TestCase name="testDecorrelateUnion0">
+    <Resource name="sql">
+      <![CDATA[SELECT deptno FROM dept where exists
+(SELECT 1 FROM emp where sal < 100 and emp.deptno=dept.deptno
+UNION ALL
+SELECT 1 FROM bonus where bonus.comm=dept.deptno)"]]>
+    </Resource>
+    <Resource name="planBefore">
+      <![CDATA[
+LogicalProject(DEPTNO=[$0])
+  LogicalFilter(condition=[IS NOT NULL($2)])
+    LogicalCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{0}])
+      LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+      LogicalAggregate(group=[{}], agg#0=[MIN($0)])
+        LogicalProject($f0=[true])
+          LogicalUnion(all=[true])
+            LogicalProject(EXPR$0=[1])
+              LogicalFilter(condition=[AND(<($5, 100), =($7, $cor0.DEPTNO))])
+                LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+            LogicalProject(EXPR$0=[1])
+              LogicalFilter(condition=[=($3, $cor0.DEPTNO)])
+                LogicalTableScan(table=[[CATALOG, SALES, BONUS]])
+]]>
+    </Resource>
+    <Resource name="planMid">
+      <![CDATA[
+LogicalProject(DEPTNO=[$0])
+  LogicalFilter(condition=[IS NOT NULL($2)])
+    LogicalCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{0}])
+      LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+      LogicalAggregate(group=[{}], agg#0=[MIN($0)])
+        LogicalProject($f0=[true])
+          LogicalUnion(all=[true])
+            LogicalProject(EXPR$0=[1])
+              LogicalFilter(condition=[AND(<($5, 100), =($7, $cor0.DEPTNO))])
+                LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+            LogicalProject(EXPR$0=[1])
+              LogicalFilter(condition=[=($3, $cor0.DEPTNO)])
+                LogicalTableScan(table=[[CATALOG, SALES, BONUS]])
+]]>
+    </Resource>
+    <Resource name="planAfter">
+      <![CDATA[
+LogicalProject(DEPTNO=[$0])
+  LogicalProject(DEPTNO=[$0], NAME=[$1], DEPTNO0=[CAST($2):INTEGER], $f1=[CAST($3):BOOLEAN])
+    LogicalJoin(condition=[=($0, $2)], joinType=[inner])
+      LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+      LogicalAggregate(group=[{0}], agg#0=[MIN($1)])
+        LogicalProject(DEPTNO=[$1], $f0=[true])
+          LogicalUnion(all=[true])
+            LogicalProject(EXPR$0=[1], DEPTNO=[$7])
+              LogicalFilter(condition=[<($5, 100)])
+                LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+            LogicalProject(EXPR$0=[1], COMM=[$3])
+              LogicalTableScan(table=[[CATALOG, SALES, BONUS]])
+]]>
+    </Resource>
+  </TestCase>
+  <TestCase name="testDecorrelateUnion1">
+    <Resource name="sql">
+      <![CDATA[SELECT * FROM emp where exists
+(SELECT 1 FROM dept where deptno = emp.deptno
+UNION ALL
+SELECT 1 FROM dept where deptno + 1 = emp.deptno)]]>
+    </Resource>
+    <Resource name="planBefore">
+      <![CDATA[
+LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
+  LogicalFilter(condition=[IS NOT NULL($9)])
+    LogicalCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{7}])
+      LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+      LogicalAggregate(group=[{}], agg#0=[MIN($0)])
+        LogicalProject($f0=[true])
+          LogicalUnion(all=[true])
+            LogicalProject(EXPR$0=[1])
+              LogicalFilter(condition=[=($0, $cor0.DEPTNO)])
+                LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+            LogicalProject(EXPR$0=[1])
+              LogicalFilter(condition=[=(+($0, 1), $cor0.DEPTNO)])
+                LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+    </Resource>
+    <Resource name="planMid">
+      <![CDATA[
+LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
+  LogicalFilter(condition=[IS NOT NULL($9)])
+    LogicalCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{7}])
+      LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+      LogicalAggregate(group=[{}], agg#0=[MIN($0)])
+        LogicalProject($f0=[true])
+          LogicalUnion(all=[true])
+            LogicalProject(EXPR$0=[1])
+              LogicalFilter(condition=[=($0, $cor0.DEPTNO)])
+                LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+            LogicalProject(EXPR$0=[1])
+              LogicalFilter(condition=[=(+($0, 1), $cor0.DEPTNO)])
+                LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+    </Resource>
+    <Resource name="planAfter">
+      <![CDATA[
+LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
+  LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], DEPTNO0=[CAST($9):INTEGER], $f1=[CAST($10):BOOLEAN])
+    LogicalJoin(condition=[=($7, $9)], joinType=[inner])
+      LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+      LogicalAggregate(group=[{0}], agg#0=[MIN($1)])
+        LogicalProject(DEPTNO=[$1], $f0=[true])
+          LogicalUnion(all=[true])
+            LogicalProject(EXPR$0=[1], DEPTNO=[$0])
+              LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+            LogicalProject(EXPR$0=[1], $f2=[$2])
+              LogicalFilter(condition=[=(+($0, 1), $2)])
+                LogicalProject(DEPTNO=[$0], NAME=[$1], $f2=[+($0, 1)])
+                  LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+    </Resource>
+  </TestCase>
+  <TestCase name="testDecorrelateUnion2">
+    <Resource name="sql">
+      <![CDATA[SELECT * FROM emp where exists
+(SELECT 1 FROM dept where deptno = emp.deptno
+UNION ALL
+SELECT 1 FROM dept where name = emp.ename)]]>
+    </Resource>
+    <Resource name="planBefore">
+      <![CDATA[
+LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
+  LogicalFilter(condition=[IS NOT NULL($9)])
+    LogicalCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{1, 7}])
+      LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+      LogicalAggregate(group=[{}], agg#0=[MIN($0)])
+        LogicalProject($f0=[true])
+          LogicalUnion(all=[true])
+            LogicalProject(EXPR$0=[1])
+              LogicalFilter(condition=[=($0, $cor0.DEPTNO)])
+                LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+            LogicalProject(EXPR$0=[1])
+              LogicalFilter(condition=[=(CAST($1):VARCHAR(20) NOT NULL, $cor0.ENAME)])
+                LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+    </Resource>
+    <Resource name="planMid">
+      <![CDATA[
+LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
+  LogicalFilter(condition=[IS NOT NULL($9)])
+    LogicalCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{1, 7}])
+      LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+      LogicalAggregate(group=[{}], agg#0=[MIN($0)])
+        LogicalProject($f0=[true])
+          LogicalUnion(all=[true])
+            LogicalProject(EXPR$0=[1])
+              LogicalFilter(condition=[=($0, $cor0.DEPTNO)])
+                LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+            LogicalProject(EXPR$0=[1])
+              LogicalFilter(condition=[=(CAST($1):VARCHAR(20) NOT NULL, $cor0.ENAME)])
+                LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+    </Resource>
+    <Resource name="planAfter">
+      <![CDATA[
+LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
+  LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8], ENAME0=[CAST($9):VARCHAR(20)], DEPTNO0=[CAST($10):INTEGER], $f2=[CAST($11):BOOLEAN])
+    LogicalJoin(condition=[AND(=($1, $9), =($7, $10))], joinType=[inner])
+      LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+      LogicalAggregate(group=[{0, 1}], agg#0=[MIN($2)])
+        LogicalProject(ENAME=[$1], DEPTNO=[$2], $f0=[true])
+          LogicalUnion(all=[true])
+            LogicalProject(EXPR$0=[$0], ENAME=[$2], DEPTNO=[$1])
+              LogicalJoin(condition=[true], joinType=[inner])
+                LogicalProject(EXPR$0=[1], DEPTNO=[$0])
+                  LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+                LogicalAggregate(group=[{0}])
+                  LogicalProject(ENAME=[$1])
+                    LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+            LogicalJoin(condition=[true], joinType=[inner])
+              LogicalProject(EXPR$0=[1], NAME0=[$2])
+                LogicalFilter(condition=[=(CAST($1):VARCHAR(20) NOT NULL, $2)])
+                  LogicalProject(DEPTNO=[$0], NAME=[$1], NAME0=[CAST($1):VARCHAR(20) NOT NULL])
+                    LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+              LogicalAggregate(group=[{0}])
+                LogicalProject(DEPTNO=[$7])
+                  LogicalTableScan(table=[[CATALOG, SALES, EMP]])

Review comment:
       Hey @vlsi, it seems that you have a performance concern regarding this PR. I didn't start reviewing yet but I was wondering if it makes sense to merge it even if the decorrelated plan is not the most efficient one. Some systems do not have a way to execute correlated queries so even if the decorrelated plan is not the best it is still some progress.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org