You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@calcite.apache.org by hy...@apache.org on 2019/07/29 12:25:18 UTC

[calcite] branch master updated: [CALCITE-3031] Support for correlated ANY/SOME/ALL sub-query (Vineet Garg)

This is an automated email from the ASF dual-hosted git repository.

hyuan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/calcite.git


The following commit(s) were added to refs/heads/master by this push:
     new a1851b6  [CALCITE-3031] Support for correlated ANY/SOME/ALL sub-query (Vineet Garg)
a1851b6 is described below

commit a1851b67d56f98e14fbe54a96972104585ae8a61
Author: Vineet Garg <vg...@apache.org>
AuthorDate: Mon Apr 29 16:06:40 2019 -0700

    [CALCITE-3031] Support for correlated ANY/SOME/ALL sub-query (Vineet Garg)
---
 .../calcite/rel/rules/SubQueryRemoveRule.java      | 114 ++++++++++++++++-----
 .../org/apache/calcite/test/RelOptRulesTest.java   |  14 +++
 .../org/apache/calcite/test/RelOptRulesTest.xml    |  85 +++++++++++++++
 core/src/test/resources/sql/sub-query.iq           |  66 ++++++++++++
 4 files changed, 252 insertions(+), 27 deletions(-)

diff --git a/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java b/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java
index 78de78f..9a0f32d 100644
--- a/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java
+++ b/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java
@@ -39,6 +39,7 @@ import org.apache.calcite.rex.RexUtil;
 import org.apache.calcite.sql.SqlKind;
 import org.apache.calcite.sql.fun.SqlQuantifyOperator;
 import org.apache.calcite.sql.fun.SqlStdOperatorTable;
+import org.apache.calcite.sql.type.SqlTypeName;
 import org.apache.calcite.sql2rel.RelDecorrelator;
 import org.apache.calcite.tools.RelBuilder;
 import org.apache.calcite.tools.RelBuilderFactory;
@@ -93,7 +94,7 @@ public abstract class SubQueryRemoveRule extends RelOptRule {
     case SCALAR_QUERY:
       return rewriteScalarQuery(e, variablesSet, builder, inputCount, offset);
     case SOME:
-      return rewriteSome(e, builder);
+      return rewriteSome(e, variablesSet, builder);
     case IN:
       return rewriteIn(e, variablesSet, logic, builder, offset);
     case EXISTS:
@@ -138,7 +139,8 @@ public abstract class SubQueryRemoveRule extends RelOptRule {
    *
    * @return Expression that may be used to replace the RexSubQuery
    */
-  private RexNode rewriteSome(RexSubQuery e, RelBuilder builder) {
+  private RexNode rewriteSome(RexSubQuery e, Set<CorrelationId> variablesSet,
+      RelBuilder builder) {
     // Most general case, where the left and right keys might have nulls, and
     // caller requires 3-valued logic return.
     //
@@ -161,31 +163,89 @@ public abstract class SubQueryRemoveRule extends RelOptRule {
     //
     final SqlQuantifyOperator op = (SqlQuantifyOperator) e.op;
 
-    // SOME_EQ (=SOME) should have been rewritten into IN
-    assert op != SqlStdOperatorTable.SOME_EQ;
-    builder.push(e.rel)
-        .aggregate(builder.groupKey(),
-            op.comparisonKind == SqlKind.GREATER_THAN
-                || op.comparisonKind == SqlKind.GREATER_THAN_OR_EQUAL
-                ? builder.min("m", builder.field(0))
-                : builder.max("m", builder.field(0)),
-            builder.count(false, "c"),
-            builder.count(false, "d", builder.field(0)))
-        .as("q")
-        .join(JoinRelType.INNER);
-    final RexNode caseRexNode = builder.call(SqlStdOperatorTable.CASE,
-        builder.call(SqlStdOperatorTable.EQUALS,
-            builder.field("q", "c"), builder.literal(0)),
-        builder.literal(false),
-        builder.call(SqlStdOperatorTable.IS_TRUE,
-            builder.call(RelOptUtil.op(op.comparisonKind, null),
-                e.operands.get(0), builder.field("q", "m"))),
-        builder.literal(true),
-        builder.call(SqlStdOperatorTable.GREATER_THAN,
-            builder.field("q", "c"), builder.field("q", "d")),
-        builder.literal(null),
-        builder.call(RelOptUtil.op(op.comparisonKind, null),
-            e.operands.get(0), builder.field("q", "m")));
+    // SOME_EQ & SOME_NE should have been rewritten into IN/ NOT IN
+    assert op == SqlStdOperatorTable.SOME_GE || op == SqlStdOperatorTable.SOME_LE
+        || op == SqlStdOperatorTable.SOME_LT || op == SqlStdOperatorTable.SOME_GT;
+
+    RexNode caseRexNode = null;
+    if (variablesSet.isEmpty()) {
+      // for non-correlated case queries such as
+      // select e.deptno, e.deptno < some (select deptno from emp) as v
+      // from emp as e
+      //
+      // becomes
+      //
+      // select e.deptno,
+      //   case
+      //   when q.c = 0 then false // sub-query is empty
+      //   when (e.deptno < q.m) is true then true
+      //   when q.c > q.d then unknown // sub-query has at least one null
+      //   else e.deptno < q.m
+      //   end as v
+      // from emp as e
+      // cross join (
+      //   select max(deptno) as m, count(*) as c, count(deptno) as d
+      //   from emp) as q
+      builder.push(e.rel).aggregate(builder.groupKey(), op.comparisonKind == SqlKind.GREATER_THAN
+              || op.comparisonKind == SqlKind.GREATER_THAN_OR_EQUAL ? builder
+              .min("m", builder.field(0)) : builder.max("m", builder.field(0)),
+          builder.count(false, "c"), builder.count(false, "d", builder.field(0))).as("q")
+          .join(JoinRelType.INNER);
+      caseRexNode = builder.call(SqlStdOperatorTable.CASE,
+          builder.call(SqlStdOperatorTable.EQUALS, builder.field("q", "c"), builder.literal(0)),
+          builder.literal(false), builder.call(SqlStdOperatorTable.IS_TRUE, builder
+              .call(RelOptUtil.op(op.comparisonKind, null), e.operands.get(0),
+                  builder.field("q", "m"))), builder.literal(true), builder
+              .call(SqlStdOperatorTable.GREATER_THAN, builder.field("q", "c"),
+                  builder.field("q", "d")),
+          e.rel.getCluster().getRexBuilder().makeNullLiteral(SqlTypeName.BOOLEAN), builder
+              .call(RelOptUtil.op(op.comparisonKind, null), e.operands.get(0),
+                  builder.field("q", "m")));
+    } else {
+      // for correlated case queries such as
+      // select e.deptno, e.deptno < some (select deptno from emp where emp.name = e.name) as v
+      // from emp as e
+      //
+      // becomes
+      //
+      // select e.deptno,
+      //   case
+      //   when indicator is null then false // sub-query is empty for corresponding corr value
+      //   when q.c = 0 then false // sub-query is empty
+      //   when (e.deptno < q.m) is true then true
+      //   when q.c > q.d then unknown // sub-query has at least one null
+      //   else e.deptno < q.m
+      //   end as v
+      // from emp as e
+      // left outer join (
+      //   select max(deptno) as m, count(*) as c, count(deptno) as d, "alwaysTrue" as indicator
+      //   group by name from emp) as q on e.name = q.name
+      builder.push(e.rel);
+      builder.aggregate(builder.groupKey(), op.comparisonKind == SqlKind.GREATER_THAN
+              || op.comparisonKind == SqlKind.GREATER_THAN_OR_EQUAL ? builder
+              .min("m", builder.field(0)) : builder.max("m", builder.field(0)),
+          builder.count(false, "c"), builder.count(false, "d", builder.field(0)));
+
+      final List<RexNode> parentQueryFields = new ArrayList<>();
+      parentQueryFields.addAll(builder.fields());
+      String indicator = "trueLiteral";
+      parentQueryFields.add(builder.alias(builder.literal(true), indicator));
+      builder.project(parentQueryFields).as("q");
+      builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet);
+      caseRexNode = builder.call(SqlStdOperatorTable.CASE,
+          builder.call(SqlStdOperatorTable.IS_NULL, builder.field("q", indicator)),
+          builder.literal(false),
+          builder.call(SqlStdOperatorTable.EQUALS, builder.field("q", "c"), builder.literal(0)),
+          builder.literal(false), builder.call(SqlStdOperatorTable.IS_TRUE, builder
+              .call(RelOptUtil.op(op.comparisonKind, null), e.operands.get(0),
+                  builder.field("q", "m"))), builder.literal(true), builder
+              .call(SqlStdOperatorTable.GREATER_THAN, builder.field("q", "c"),
+                  builder.field("q", "d")),
+          e.rel.getCluster().getRexBuilder().makeNullLiteral(SqlTypeName.BOOLEAN), builder
+              .call(RelOptUtil.op(op.comparisonKind, null), e.operands.get(0),
+                  builder.field("q", "m")));
+    }
+
     // CASE statement above is created with nullable boolean type, but it might
     // not be correct.  If the original sub-query node's type is not nullable it
     // is guranteed for case statement to not produce NULLs. Therefore to avoid
diff --git a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
index dcf23dc..507d7a3 100644
--- a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
+++ b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
@@ -5133,6 +5133,20 @@ public class RelOptRulesTest extends RelOptTestBase {
     checkSubQuery(sql).withLateDecorrelation(true).check();
   }
 
+  @Test public void testSelectAnyCorrelated() {
+    final String sql = "select empno > ANY (\n"
+        + " select deptno from dept where emp.job = dept.name) \n"
+        + "from emp\n";
+    checkSubQuery(sql).withLateDecorrelation(true).check();
+  }
+
+  @Test public void testWhereAnyCorrelatedInSelect() {
+    final String sql =
+        "select * from emp where empno > ANY (\n"
+            + " select deptno from dept where emp.job = dept.name) \n";
+    checkSubQuery(sql).withLateDecorrelation(true).check();
+  }
+
   @Test public void testSomeWithEquality() {
     final String sql = "select * from emp e1\n"
         + "  where e1.deptno = SOME (select deptno from dept)";
diff --git a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
index 2ca593c..7127189 100644
--- a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
+++ b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
@@ -9599,6 +9599,47 @@ LogicalProject(EMPNO=[$0])
 ]]>
         </Resource>
     </TestCase>
+    <TestCase name="testSelectAnyCorrelated">
+        <Resource name="sql">
+            <![CDATA[select empno > ANY (
+ select deptno from dept where emp.job = dept.name) 
+from emp
+]]>
+        </Resource>
+        <Resource name="planBefore">
+            <![CDATA[
+LogicalProject(EXPR$0=[> SOME($0, {
+LogicalProject(DEPTNO=[$0])
+  LogicalFilter(condition=[=($cor0.JOB, $1)])
+    LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+})])
+  LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+        </Resource>
+        <Resource name="planMid">
+            <![CDATA[
+LogicalProject(EXPR$0=[CAST(OR(AND(IS TRUE(>($0, $9)), IS NOT TRUE(OR(IS NULL($12), =($10, 0)))), AND(IS TRUE(>($10, $11)), null, IS NOT TRUE(OR(IS NULL($12), =($10, 0))), IS NOT TRUE(>($0, $9))), AND(>($0, $9), IS NOT TRUE(OR(IS NULL($12), =($10, 0))), IS NOT TRUE(>($0, $9)), IS NOT TRUE(>($10, $11))))):BOOLEAN NOT NULL])
+  LogicalCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{2}])
+    LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+    LogicalProject(m=[$0], c=[$1], d=[$2], trueLiteral=[true])
+      LogicalAggregate(group=[{}], m=[MIN($0)], c=[COUNT()], d=[COUNT($0)])
+        LogicalProject(DEPTNO=[$0])
+          LogicalFilter(condition=[=($cor0.JOB, $1)])
+            LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+        </Resource>
+        <Resource name="planAfter">
+            <![CDATA[
+LogicalProject(EXPR$0=[CAST(OR(AND(IS TRUE(>($0, $9)), IS NOT TRUE(OR(IS NULL($12), =($10, 0)))), AND(IS TRUE(>($10, $11)), null, IS NOT TRUE(OR(IS NULL($12), =($10, 0))), IS NOT TRUE(>($0, $9))), AND(>($0, $9), IS NOT TRUE(OR(IS NULL($12), =($10, 0))), IS NOT TRUE(>($0, $9)), IS NOT TRUE(>($10, $11))))):BOOLEAN NOT NULL])
+  LogicalJoin(condition=[=($2, $13)], joinType=[left])
+    LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+    LogicalProject(m=[$1], c=[$2], d=[$3], trueLiteral=[true], NAME=[$0])
+      LogicalAggregate(group=[{0}], m=[MIN($1)], c=[COUNT()], d=[COUNT($1)])
+        LogicalProject(NAME=[$1], DEPTNO=[$0])
+          LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+        </Resource>
+    </TestCase>
     <TestCase name="testSubQueryWithDynamicStarHang">
         <Resource name="sql">
             <![CDATA[select n.n_regionkey from (select * from
@@ -11166,6 +11207,50 @@ MultiJoin(joinFilter=[true], isFullOuterJoin=[false], joinTypes=[[INNER, LEFT]],
 ]]>
         </Resource>
     </TestCase>
+    <TestCase name="testWhereAnyCorrelatedInSelect">
+        <Resource name="sql">
+            <![CDATA[select * from emp where empno > ANY (
+ select deptno from dept where emp.job = dept.name) 
+]]>
+        </Resource>
+        <Resource name="planBefore">
+            <![CDATA[
+LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
+  LogicalFilter(condition=[> SOME($0, {
+LogicalProject(DEPTNO=[$0])
+  LogicalFilter(condition=[=($cor0.JOB, $1)])
+    LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+})], variablesSet=[[$cor0]])
+    LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+        </Resource>
+        <Resource name="planMid">
+            <![CDATA[
+LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
+  LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
+    LogicalFilter(condition=[OR(AND(IS TRUE(>($0, $9)), IS NOT TRUE(OR(IS NULL($12), =($10, 0)))), AND(IS TRUE(>($10, $11)), null, IS NOT TRUE(OR(IS NULL($12), =($10, 0))), IS NOT TRUE(>($0, $9))), AND(>($0, $9), IS NOT TRUE(OR(IS NULL($12), =($10, 0))), IS NOT TRUE(>($0, $9)), IS NOT TRUE(>($10, $11))))])
+      LogicalCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{2}])
+        LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+        LogicalProject(m=[$0], c=[$1], d=[$2], trueLiteral=[true])
+          LogicalAggregate(group=[{}], m=[MIN($0)], c=[COUNT()], d=[COUNT($0)])
+            LogicalProject(DEPTNO=[$0])
+              LogicalFilter(condition=[=($cor0.JOB, $1)])
+                LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+        </Resource>
+        <Resource name="planAfter">
+            <![CDATA[
+LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
+  LogicalFilter(condition=[OR(AND(>($0, $9), IS NOT TRUE(OR(IS NULL($12), =($10, 0)))), AND(>($0, $9), IS NOT TRUE(OR(IS NULL($12), =($10, 0))), IS NOT TRUE(>($0, $9)), IS NOT TRUE(>($10, $11))))])
+    LogicalJoin(condition=[=($2, $13)], joinType=[left])
+      LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+      LogicalProject(m=[$1], c=[$2], d=[$3], trueLiteral=[true], NAME=[$0])
+        LogicalAggregate(group=[{0}], m=[MIN($1)], c=[COUNT()], d=[COUNT($1)])
+          LogicalProject(NAME=[$1], DEPTNO=[$0])
+            LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+        </Resource>
+    </TestCase>
     <TestCase name="testPushFiltertWithIsNotDistinctFromPastJoin">
         <Resource name="sql">
             <![CDATA[SELECT * FROM emp t1 INNER JOIN emp t2 ON t1.deptno = t2.deptno WHERE t1.ename is not distinct from t2.ename]]>
diff --git a/core/src/test/resources/sql/sub-query.iq b/core/src/test/resources/sql/sub-query.iq
index 9de621c..432f604 100644
--- a/core/src/test/resources/sql/sub-query.iq
+++ b/core/src/test/resources/sql/sub-query.iq
@@ -2063,4 +2063,70 @@ EnumerableAggregate(group=[{}], C=[COUNT()])
             EnumerableTableScan(table=[[scott, DEPT]])
 !plan
 
+# Correlated ANY sub-query 
+select empno from "scott".emp as e 
+where e.empno > ANY(
+  select 2 from "scott".dept e2 where e2.deptno = e.deptno) ;
+EnumerableCalc(expr#0..6=[{inputs}], expr#7=[>($t0, $t2)], expr#8=[IS NULL($t5)], expr#9=[0], expr#10=[=($t3, $t9)], expr#11=[OR($t8, $t10)], expr#12=[IS NOT TRUE($t11)], expr#13=[AND($t7, $t12)], expr#14=[IS NOT TRUE($t7)], expr#15=[>($t3, $t4)], expr#16=[IS NOT TRUE($t15)], expr#17=[AND($t7, $t12, $t14, $t16)], expr#18=[OR($t13, $t17)], EMPNO=[$t0], $condition=[$t18])
+  EnumerableHashJoin(condition=[=($1, $6)], joinType=[left])
+    EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], DEPTNO=[$t7])
+      EnumerableTableScan(table=[[scott, EMP]])
+    EnumerableCalc(expr#0..2=[{inputs}], expr#3=[2], expr#4=[1:BIGINT], expr#5=[true], m=[$t3], c=[$t4], d=[$t4], trueLiteral=[$t5], DEPTNO=[$t0])
+      EnumerableTableScan(table=[[scott, DEPT]])
+!plan
+ EMPNO
+-------
+  7369
+  7499
+  7521
+  7566
+  7654
+  7698
+  7782
+  7788
+  7839
+  7844
+  7876
+  7900
+  7902
+  7934
+(14 rows)
+
+!ok
+
+# # inner query produces empty result therefore ANY should produce 'false'
+select empno,
+e.deptno > ANY(
+  select 2 from "scott".dept e2 where e2.deptno = e.empno) from "scott".emp as e;
+
+EnumerableCalc(expr#0..6=[{inputs}], expr#7=[>($t1, $t2)], expr#8=[IS TRUE($t7)], expr#9=[IS NULL($t5)], expr#10=[0], expr#11=[=($t3, $t10)], expr#12=[OR($t9, $t11)], expr#13=[IS NOT TRUE($t12)], expr#14=[AND($t8, $t13)], expr#15=[>($t3, $t4)], expr#16=[IS TRUE($t15)], expr#17=[null:BOOLEAN], expr#18=[IS NOT TRUE($t7)], expr#19=[AND($t16, $t17, $t13, $t18)], expr#20=[IS NOT TRUE($t15)], expr#21=[AND($t7, $t13, $t18, $t20)], expr#22=[OR($t14, $t19, $t21)], EMPNO=[$t0], EXPR$1=[$t22])
+  EnumerableHashJoin(condition=[=($0, $6)], joinType=[left])
+    EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], DEPTNO=[$t7])
+      EnumerableTableScan(table=[[scott, EMP]])
+    EnumerableCalc(expr#0..3=[{inputs}], expr#4=[true], m=[$t1], c=[$t2], d=[$t3], trueLiteral=[$t4], DEPTNO0=[$t0])
+      EnumerableAggregate(group=[{0}], m=[MIN($1)], c=[COUNT()], d=[COUNT($1)])
+        EnumerableCalc(expr#0..2=[{inputs}], expr#3=[CAST($t0):SMALLINT NOT NULL], expr#4=[2], DEPTNO0=[$t3], EXPR$0=[$t4])
+          EnumerableTableScan(table=[[scott, DEPT]])
+!plan
+ EMPNO | EXPR$1
+-------+--------
+  7369 | false
+  7499 | false
+  7521 | false
+  7566 | false
+  7654 | false
+  7698 | false
+  7782 | false
+  7788 | false
+  7839 | false
+  7844 | false
+  7876 | false
+  7900 | false
+  7902 | false
+  7934 | false
+(14 rows)
+
+!ok
+
+
 # End sub-query.iq