You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@calcite.apache.org by hy...@apache.org on 2019/07/29 12:25:18 UTC
[calcite] branch master updated: [CALCITE-3031] Support for
correlated ANY/SOME/ALL sub-query (Vineet Garg)
This is an automated email from the ASF dual-hosted git repository.
hyuan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/master by this push:
new a1851b6 [CALCITE-3031] Support for correlated ANY/SOME/ALL sub-query (Vineet Garg)
a1851b6 is described below
commit a1851b67d56f98e14fbe54a96972104585ae8a61
Author: Vineet Garg <vg...@apache.org>
AuthorDate: Mon Apr 29 16:06:40 2019 -0700
[CALCITE-3031] Support for correlated ANY/SOME/ALL sub-query (Vineet Garg)
---
.../calcite/rel/rules/SubQueryRemoveRule.java | 114 ++++++++++++++++-----
.../org/apache/calcite/test/RelOptRulesTest.java | 14 +++
.../org/apache/calcite/test/RelOptRulesTest.xml | 85 +++++++++++++++
core/src/test/resources/sql/sub-query.iq | 66 ++++++++++++
4 files changed, 252 insertions(+), 27 deletions(-)
diff --git a/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java b/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java
index 78de78f..9a0f32d 100644
--- a/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java
+++ b/core/src/main/java/org/apache/calcite/rel/rules/SubQueryRemoveRule.java
@@ -39,6 +39,7 @@ import org.apache.calcite.rex.RexUtil;
import org.apache.calcite.sql.SqlKind;
import org.apache.calcite.sql.fun.SqlQuantifyOperator;
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
+import org.apache.calcite.sql.type.SqlTypeName;
import org.apache.calcite.sql2rel.RelDecorrelator;
import org.apache.calcite.tools.RelBuilder;
import org.apache.calcite.tools.RelBuilderFactory;
@@ -93,7 +94,7 @@ public abstract class SubQueryRemoveRule extends RelOptRule {
case SCALAR_QUERY:
return rewriteScalarQuery(e, variablesSet, builder, inputCount, offset);
case SOME:
- return rewriteSome(e, builder);
+ return rewriteSome(e, variablesSet, builder);
case IN:
return rewriteIn(e, variablesSet, logic, builder, offset);
case EXISTS:
@@ -138,7 +139,8 @@ public abstract class SubQueryRemoveRule extends RelOptRule {
*
* @return Expression that may be used to replace the RexSubQuery
*/
- private RexNode rewriteSome(RexSubQuery e, RelBuilder builder) {
+ private RexNode rewriteSome(RexSubQuery e, Set<CorrelationId> variablesSet,
+ RelBuilder builder) {
// Most general case, where the left and right keys might have nulls, and
// caller requires 3-valued logic return.
//
@@ -161,31 +163,89 @@ public abstract class SubQueryRemoveRule extends RelOptRule {
//
final SqlQuantifyOperator op = (SqlQuantifyOperator) e.op;
- // SOME_EQ (=SOME) should have been rewritten into IN
- assert op != SqlStdOperatorTable.SOME_EQ;
- builder.push(e.rel)
- .aggregate(builder.groupKey(),
- op.comparisonKind == SqlKind.GREATER_THAN
- || op.comparisonKind == SqlKind.GREATER_THAN_OR_EQUAL
- ? builder.min("m", builder.field(0))
- : builder.max("m", builder.field(0)),
- builder.count(false, "c"),
- builder.count(false, "d", builder.field(0)))
- .as("q")
- .join(JoinRelType.INNER);
- final RexNode caseRexNode = builder.call(SqlStdOperatorTable.CASE,
- builder.call(SqlStdOperatorTable.EQUALS,
- builder.field("q", "c"), builder.literal(0)),
- builder.literal(false),
- builder.call(SqlStdOperatorTable.IS_TRUE,
- builder.call(RelOptUtil.op(op.comparisonKind, null),
- e.operands.get(0), builder.field("q", "m"))),
- builder.literal(true),
- builder.call(SqlStdOperatorTable.GREATER_THAN,
- builder.field("q", "c"), builder.field("q", "d")),
- builder.literal(null),
- builder.call(RelOptUtil.op(op.comparisonKind, null),
- e.operands.get(0), builder.field("q", "m")));
+ // SOME_EQ & SOME_NE should have been rewritten into IN/ NOT IN
+ assert op == SqlStdOperatorTable.SOME_GE || op == SqlStdOperatorTable.SOME_LE
+ || op == SqlStdOperatorTable.SOME_LT || op == SqlStdOperatorTable.SOME_GT;
+
+ RexNode caseRexNode = null;
+ if (variablesSet.isEmpty()) {
+ // for non-correlated case queries such as
+ // select e.deptno, e.deptno < some (select deptno from emp) as v
+ // from emp as e
+ //
+ // becomes
+ //
+ // select e.deptno,
+ // case
+ // when q.c = 0 then false // sub-query is empty
+ // when (e.deptno < q.m) is true then true
+ // when q.c > q.d then unknown // sub-query has at least one null
+ // else e.deptno < q.m
+ // end as v
+ // from emp as e
+ // cross join (
+ // select max(deptno) as m, count(*) as c, count(deptno) as d
+ // from emp) as q
+ builder.push(e.rel).aggregate(builder.groupKey(), op.comparisonKind == SqlKind.GREATER_THAN
+ || op.comparisonKind == SqlKind.GREATER_THAN_OR_EQUAL ? builder
+ .min("m", builder.field(0)) : builder.max("m", builder.field(0)),
+ builder.count(false, "c"), builder.count(false, "d", builder.field(0))).as("q")
+ .join(JoinRelType.INNER);
+ caseRexNode = builder.call(SqlStdOperatorTable.CASE,
+ builder.call(SqlStdOperatorTable.EQUALS, builder.field("q", "c"), builder.literal(0)),
+ builder.literal(false), builder.call(SqlStdOperatorTable.IS_TRUE, builder
+ .call(RelOptUtil.op(op.comparisonKind, null), e.operands.get(0),
+ builder.field("q", "m"))), builder.literal(true), builder
+ .call(SqlStdOperatorTable.GREATER_THAN, builder.field("q", "c"),
+ builder.field("q", "d")),
+ e.rel.getCluster().getRexBuilder().makeNullLiteral(SqlTypeName.BOOLEAN), builder
+ .call(RelOptUtil.op(op.comparisonKind, null), e.operands.get(0),
+ builder.field("q", "m")));
+ } else {
+ // for correlated case queries such as
+ // select e.deptno, e.deptno < some (select deptno from emp where emp.name = e.name) as v
+ // from emp as e
+ //
+ // becomes
+ //
+ // select e.deptno,
+ // case
+ // when indicator is null then false // sub-query is empty for corresponding corr value
+ // when q.c = 0 then false // sub-query is empty
+ // when (e.deptno < q.m) is true then true
+ // when q.c > q.d then unknown // sub-query has at least one null
+ // else e.deptno < q.m
+ // end as v
+ // from emp as e
+ // left outer join (
+ // select max(deptno) as m, count(*) as c, count(deptno) as d, "alwaysTrue" as indicator
+ // group by name from emp) as q on e.name = q.name
+ builder.push(e.rel);
+ builder.aggregate(builder.groupKey(), op.comparisonKind == SqlKind.GREATER_THAN
+ || op.comparisonKind == SqlKind.GREATER_THAN_OR_EQUAL ? builder
+ .min("m", builder.field(0)) : builder.max("m", builder.field(0)),
+ builder.count(false, "c"), builder.count(false, "d", builder.field(0)));
+
+ final List<RexNode> parentQueryFields = new ArrayList<>();
+ parentQueryFields.addAll(builder.fields());
+ String indicator = "trueLiteral";
+ parentQueryFields.add(builder.alias(builder.literal(true), indicator));
+ builder.project(parentQueryFields).as("q");
+ builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet);
+ caseRexNode = builder.call(SqlStdOperatorTable.CASE,
+ builder.call(SqlStdOperatorTable.IS_NULL, builder.field("q", indicator)),
+ builder.literal(false),
+ builder.call(SqlStdOperatorTable.EQUALS, builder.field("q", "c"), builder.literal(0)),
+ builder.literal(false), builder.call(SqlStdOperatorTable.IS_TRUE, builder
+ .call(RelOptUtil.op(op.comparisonKind, null), e.operands.get(0),
+ builder.field("q", "m"))), builder.literal(true), builder
+ .call(SqlStdOperatorTable.GREATER_THAN, builder.field("q", "c"),
+ builder.field("q", "d")),
+ e.rel.getCluster().getRexBuilder().makeNullLiteral(SqlTypeName.BOOLEAN), builder
+ .call(RelOptUtil.op(op.comparisonKind, null), e.operands.get(0),
+ builder.field("q", "m")));
+ }
+
// CASE statement above is created with nullable boolean type, but it might
// not be correct. If the original sub-query node's type is not nullable it
// is guranteed for case statement to not produce NULLs. Therefore to avoid
diff --git a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
index dcf23dc..507d7a3 100644
--- a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
+++ b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
@@ -5133,6 +5133,20 @@ public class RelOptRulesTest extends RelOptTestBase {
checkSubQuery(sql).withLateDecorrelation(true).check();
}
+ @Test public void testSelectAnyCorrelated() {
+ final String sql = "select empno > ANY (\n"
+ + " select deptno from dept where emp.job = dept.name) \n"
+ + "from emp\n";
+ checkSubQuery(sql).withLateDecorrelation(true).check();
+ }
+
+ @Test public void testWhereAnyCorrelatedInSelect() {
+ final String sql =
+ "select * from emp where empno > ANY (\n"
+ + " select deptno from dept where emp.job = dept.name) \n";
+ checkSubQuery(sql).withLateDecorrelation(true).check();
+ }
+
@Test public void testSomeWithEquality() {
final String sql = "select * from emp e1\n"
+ " where e1.deptno = SOME (select deptno from dept)";
diff --git a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
index 2ca593c..7127189 100644
--- a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
+++ b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
@@ -9599,6 +9599,47 @@ LogicalProject(EMPNO=[$0])
]]>
</Resource>
</TestCase>
+ <TestCase name="testSelectAnyCorrelated">
+ <Resource name="sql">
+ <![CDATA[select empno > ANY (
+ select deptno from dept where emp.job = dept.name)
+from emp
+]]>
+ </Resource>
+ <Resource name="planBefore">
+ <![CDATA[
+LogicalProject(EXPR$0=[> SOME($0, {
+LogicalProject(DEPTNO=[$0])
+ LogicalFilter(condition=[=($cor0.JOB, $1)])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+})])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+ </Resource>
+ <Resource name="planMid">
+ <![CDATA[
+LogicalProject(EXPR$0=[CAST(OR(AND(IS TRUE(>($0, $9)), IS NOT TRUE(OR(IS NULL($12), =($10, 0)))), AND(IS TRUE(>($10, $11)), null, IS NOT TRUE(OR(IS NULL($12), =($10, 0))), IS NOT TRUE(>($0, $9))), AND(>($0, $9), IS NOT TRUE(OR(IS NULL($12), =($10, 0))), IS NOT TRUE(>($0, $9)), IS NOT TRUE(>($10, $11))))):BOOLEAN NOT NULL])
+ LogicalCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{2}])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ LogicalProject(m=[$0], c=[$1], d=[$2], trueLiteral=[true])
+ LogicalAggregate(group=[{}], m=[MIN($0)], c=[COUNT()], d=[COUNT($0)])
+ LogicalProject(DEPTNO=[$0])
+ LogicalFilter(condition=[=($cor0.JOB, $1)])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+ </Resource>
+ <Resource name="planAfter">
+ <![CDATA[
+LogicalProject(EXPR$0=[CAST(OR(AND(IS TRUE(>($0, $9)), IS NOT TRUE(OR(IS NULL($12), =($10, 0)))), AND(IS TRUE(>($10, $11)), null, IS NOT TRUE(OR(IS NULL($12), =($10, 0))), IS NOT TRUE(>($0, $9))), AND(>($0, $9), IS NOT TRUE(OR(IS NULL($12), =($10, 0))), IS NOT TRUE(>($0, $9)), IS NOT TRUE(>($10, $11))))):BOOLEAN NOT NULL])
+ LogicalJoin(condition=[=($2, $13)], joinType=[left])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ LogicalProject(m=[$1], c=[$2], d=[$3], trueLiteral=[true], NAME=[$0])
+ LogicalAggregate(group=[{0}], m=[MIN($1)], c=[COUNT()], d=[COUNT($1)])
+ LogicalProject(NAME=[$1], DEPTNO=[$0])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+ </Resource>
+ </TestCase>
<TestCase name="testSubQueryWithDynamicStarHang">
<Resource name="sql">
<![CDATA[select n.n_regionkey from (select * from
@@ -11166,6 +11207,50 @@ MultiJoin(joinFilter=[true], isFullOuterJoin=[false], joinTypes=[[INNER, LEFT]],
]]>
</Resource>
</TestCase>
+ <TestCase name="testWhereAnyCorrelatedInSelect">
+ <Resource name="sql">
+ <![CDATA[select * from emp where empno > ANY (
+ select deptno from dept where emp.job = dept.name)
+]]>
+ </Resource>
+ <Resource name="planBefore">
+ <![CDATA[
+LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
+ LogicalFilter(condition=[> SOME($0, {
+LogicalProject(DEPTNO=[$0])
+ LogicalFilter(condition=[=($cor0.JOB, $1)])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+})], variablesSet=[[$cor0]])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+ </Resource>
+ <Resource name="planMid">
+ <![CDATA[
+LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
+ LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
+ LogicalFilter(condition=[OR(AND(IS TRUE(>($0, $9)), IS NOT TRUE(OR(IS NULL($12), =($10, 0)))), AND(IS TRUE(>($10, $11)), null, IS NOT TRUE(OR(IS NULL($12), =($10, 0))), IS NOT TRUE(>($0, $9))), AND(>($0, $9), IS NOT TRUE(OR(IS NULL($12), =($10, 0))), IS NOT TRUE(>($0, $9)), IS NOT TRUE(>($10, $11))))])
+ LogicalCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{2}])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ LogicalProject(m=[$0], c=[$1], d=[$2], trueLiteral=[true])
+ LogicalAggregate(group=[{}], m=[MIN($0)], c=[COUNT()], d=[COUNT($0)])
+ LogicalProject(DEPTNO=[$0])
+ LogicalFilter(condition=[=($cor0.JOB, $1)])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+ </Resource>
+ <Resource name="planAfter">
+ <![CDATA[
+LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
+ LogicalFilter(condition=[OR(AND(>($0, $9), IS NOT TRUE(OR(IS NULL($12), =($10, 0)))), AND(>($0, $9), IS NOT TRUE(OR(IS NULL($12), =($10, 0))), IS NOT TRUE(>($0, $9)), IS NOT TRUE(>($10, $11))))])
+ LogicalJoin(condition=[=($2, $13)], joinType=[left])
+ LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+ LogicalProject(m=[$1], c=[$2], d=[$3], trueLiteral=[true], NAME=[$0])
+ LogicalAggregate(group=[{0}], m=[MIN($1)], c=[COUNT()], d=[COUNT($1)])
+ LogicalProject(NAME=[$1], DEPTNO=[$0])
+ LogicalTableScan(table=[[CATALOG, SALES, DEPT]])
+]]>
+ </Resource>
+ </TestCase>
<TestCase name="testPushFiltertWithIsNotDistinctFromPastJoin">
<Resource name="sql">
<![CDATA[SELECT * FROM emp t1 INNER JOIN emp t2 ON t1.deptno = t2.deptno WHERE t1.ename is not distinct from t2.ename]]>
diff --git a/core/src/test/resources/sql/sub-query.iq b/core/src/test/resources/sql/sub-query.iq
index 9de621c..432f604 100644
--- a/core/src/test/resources/sql/sub-query.iq
+++ b/core/src/test/resources/sql/sub-query.iq
@@ -2063,4 +2063,70 @@ EnumerableAggregate(group=[{}], C=[COUNT()])
EnumerableTableScan(table=[[scott, DEPT]])
!plan
+# Correlated ANY sub-query
+select empno from "scott".emp as e
+where e.empno > ANY(
+ select 2 from "scott".dept e2 where e2.deptno = e.deptno) ;
+EnumerableCalc(expr#0..6=[{inputs}], expr#7=[>($t0, $t2)], expr#8=[IS NULL($t5)], expr#9=[0], expr#10=[=($t3, $t9)], expr#11=[OR($t8, $t10)], expr#12=[IS NOT TRUE($t11)], expr#13=[AND($t7, $t12)], expr#14=[IS NOT TRUE($t7)], expr#15=[>($t3, $t4)], expr#16=[IS NOT TRUE($t15)], expr#17=[AND($t7, $t12, $t14, $t16)], expr#18=[OR($t13, $t17)], EMPNO=[$t0], $condition=[$t18])
+ EnumerableHashJoin(condition=[=($1, $6)], joinType=[left])
+ EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], DEPTNO=[$t7])
+ EnumerableTableScan(table=[[scott, EMP]])
+ EnumerableCalc(expr#0..2=[{inputs}], expr#3=[2], expr#4=[1:BIGINT], expr#5=[true], m=[$t3], c=[$t4], d=[$t4], trueLiteral=[$t5], DEPTNO=[$t0])
+ EnumerableTableScan(table=[[scott, DEPT]])
+!plan
+ EMPNO
+-------
+ 7369
+ 7499
+ 7521
+ 7566
+ 7654
+ 7698
+ 7782
+ 7788
+ 7839
+ 7844
+ 7876
+ 7900
+ 7902
+ 7934
+(14 rows)
+
+!ok
+
+# # inner query produces empty result therefore ANY should produce 'false'
+select empno,
+e.deptno > ANY(
+ select 2 from "scott".dept e2 where e2.deptno = e.empno) from "scott".emp as e;
+
+EnumerableCalc(expr#0..6=[{inputs}], expr#7=[>($t1, $t2)], expr#8=[IS TRUE($t7)], expr#9=[IS NULL($t5)], expr#10=[0], expr#11=[=($t3, $t10)], expr#12=[OR($t9, $t11)], expr#13=[IS NOT TRUE($t12)], expr#14=[AND($t8, $t13)], expr#15=[>($t3, $t4)], expr#16=[IS TRUE($t15)], expr#17=[null:BOOLEAN], expr#18=[IS NOT TRUE($t7)], expr#19=[AND($t16, $t17, $t13, $t18)], expr#20=[IS NOT TRUE($t15)], expr#21=[AND($t7, $t13, $t18, $t20)], expr#22=[OR($t14, $t19, $t21)], EMPNO=[$t0], EXPR$1=[$t22])
+ EnumerableHashJoin(condition=[=($0, $6)], joinType=[left])
+ EnumerableCalc(expr#0..7=[{inputs}], EMPNO=[$t0], DEPTNO=[$t7])
+ EnumerableTableScan(table=[[scott, EMP]])
+ EnumerableCalc(expr#0..3=[{inputs}], expr#4=[true], m=[$t1], c=[$t2], d=[$t3], trueLiteral=[$t4], DEPTNO0=[$t0])
+ EnumerableAggregate(group=[{0}], m=[MIN($1)], c=[COUNT()], d=[COUNT($1)])
+ EnumerableCalc(expr#0..2=[{inputs}], expr#3=[CAST($t0):SMALLINT NOT NULL], expr#4=[2], DEPTNO0=[$t3], EXPR$0=[$t4])
+ EnumerableTableScan(table=[[scott, DEPT]])
+!plan
+ EMPNO | EXPR$1
+-------+--------
+ 7369 | false
+ 7499 | false
+ 7521 | false
+ 7566 | false
+ 7654 | false
+ 7698 | false
+ 7782 | false
+ 7788 | false
+ 7839 | false
+ 7844 | false
+ 7876 | false
+ 7900 | false
+ 7902 | false
+ 7934 | false
+(14 rows)
+
+!ok
+
+
# End sub-query.iq