You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by sa...@apache.org on 2017/12/04 04:54:05 UTC
[2/3] impala git commit: IMPALA-1422: support a constant on LHS of IN
predicates.
IMPALA-1422: support a constant on LHS of IN predicates.
Currently, constant expressions for the LHS of the IN predicate
are not supported. This patch adds this support as a rewrite in
StmtRewriter (where subqueries are rewritten to joins). Since
there is a nested-loop variant of left semijoin, support for IN
is handled by not erring out. NOT IN is handled by a rewrite to
corresponding NOT EXISTS predicate. Support for NOT IN with a
correlated subquery is not included in this change.
Re-organized the frontend subquery analysis tests to expand coverage.
Testing:
- added frontend subquery analysis tests
- added e2e tests
Change-Id: I0d69889a3c72e90be9d4ccf47d2816819ae32acb
Reviewed-on: http://gerrit.cloudera.org:8080/8322
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Impala Public Jenkins
Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/633dbff7
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/633dbff7
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/633dbff7
Branch: refs/heads/master
Commit: 633dbff71dd005858ba4cf6907376d944235d404
Parents: 575b5a2
Author: Vuk Ercegovac <ve...@cloudera.com>
Authored: Mon Sep 11 22:45:25 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Sat Dec 2 04:09:05 2017 +0000
----------------------------------------------------------------------
.../org/apache/impala/analysis/InPredicate.java | 2 +-
.../org/apache/impala/analysis/SlotRef.java | 3 +
.../apache/impala/analysis/StmtRewriter.java | 153 +++++-
.../impala/analysis/AnalyzeSubqueriesTest.java | 464 +++++++++++--------
.../queries/PlannerTest/subquery-rewrite.test | 220 ++++++++-
.../QueryTest/subquery-in-constant-lhs.test | 296 ++++++++++++
tests/query_test/test_queries.py | 3 +
7 files changed, 935 insertions(+), 206 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/impala/blob/633dbff7/fe/src/main/java/org/apache/impala/analysis/InPredicate.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/InPredicate.java b/fe/src/main/java/org/apache/impala/analysis/InPredicate.java
index e34752f..ba0772b 100644
--- a/fe/src/main/java/org/apache/impala/analysis/InPredicate.java
+++ b/fe/src/main/java/org/apache/impala/analysis/InPredicate.java
@@ -40,7 +40,7 @@ import com.google.common.collect.Lists;
public class InPredicate extends Predicate {
private static final String IN_SET_LOOKUP = "in_set_lookup";
private static final String NOT_IN_SET_LOOKUP = "not_in_set_lookup";
- private static final String IN_ITERATE= "in_iterate";
+ private static final String IN_ITERATE = "in_iterate";
private static final String NOT_IN_ITERATE = "not_in_iterate";
private final boolean isNotIn_;
http://git-wip-us.apache.org/repos/asf/impala/blob/633dbff7/fe/src/main/java/org/apache/impala/analysis/SlotRef.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/SlotRef.java b/fe/src/main/java/org/apache/impala/analysis/SlotRef.java
index b4505ba..23f2d88 100644
--- a/fe/src/main/java/org/apache/impala/analysis/SlotRef.java
+++ b/fe/src/main/java/org/apache/impala/analysis/SlotRef.java
@@ -89,6 +89,9 @@ public class SlotRef extends Expr {
@Override
protected void analyzeImpl(Analyzer analyzer) throws AnalysisException {
+ // TODO: derived slot refs (e.g., star-expanded) will not have rawPath set.
+ // Change construction to properly handle such cases.
+ Preconditions.checkState(rawPath_ != null);
Path resolvedPath = null;
try {
resolvedPath = analyzer.resolvePath(rawPath_, PathType.SLOT_REF);
http://git-wip-us.apache.org/repos/asf/impala/blob/633dbff7/fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java b/fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java
index 740b794..a0eb757 100644
--- a/fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java
+++ b/fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java
@@ -24,6 +24,8 @@ import java.util.List;
import org.apache.impala.analysis.AnalysisContext.AnalysisResult;
import org.apache.impala.analysis.UnionStmt.UnionOperand;
import org.apache.impala.common.AnalysisException;
+import org.apache.impala.common.ColumnAliasGenerator;
+import org.apache.impala.common.TableAliasGenerator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -220,11 +222,21 @@ public class StmtRewriter {
"expression: " + conjunct.toSql());
}
- if (conjunct instanceof ExistsPredicate) {
+ Expr rewrittenConjunct = conjunct;
+ if (conjunct instanceof InPredicate && conjunct.getChild(0).isConstant()) {
+ Expr newConjunct = rewriteInConstant(stmt, (InPredicate)conjunct);
+ if (newConjunct != null) {
+ newConjunct.analyze(analyzer);
+ rewrittenConjunct = newConjunct;
+ }
+ }
+
+ if (rewrittenConjunct instanceof ExistsPredicate) {
// Check if we can determine the result of an ExistsPredicate during analysis.
// If so, replace the predicate with a BoolLiteral predicate and remove it from
// the list of predicates to be rewritten.
- BoolLiteral boolLiteral = replaceExistsPredicate((ExistsPredicate) conjunct);
+ BoolLiteral boolLiteral =
+ replaceExistsPredicate((ExistsPredicate) rewrittenConjunct);
if (boolLiteral != null) {
boolLiteral.analyze(analyzer);
smap.put(conjunct, boolLiteral);
@@ -237,7 +249,7 @@ public class StmtRewriter {
BoolLiteral boolLiteral = new BoolLiteral(true);
boolLiteral.analyze(analyzer);
smap.put(conjunct, boolLiteral);
- exprsWithSubqueries.add(conjunct);
+ exprsWithSubqueries.add(rewrittenConjunct);
}
stmt.whereClause_ = stmt.whereClause_.substitute(smap, analyzer, false);
@@ -273,6 +285,130 @@ public class StmtRewriter {
}
/**
+ * Rewrites [NOT] IN predicate when the LHS is a constant and RHS is a subquery.
+ * If 'inPred' is not rewritten, null is returned. If 'inPred' is rewritten, the
+ * resulting expression is not analyzed (caller must analyze). 'outerBlock' is the
+ * parent block of 'inPred'.
+ *
+ * Example: SELECT * FROM t WHERE 1 IN (SELECT id FROM s)
+ *
+ * The rewrite transforms 'inPred' using the following cases. C refers to the LHS
+ * constant and RHS is the subquery. All cases apply to both correlated and
+ * uncorrelated subqueries.
+ *
+ * 1) Predicate is IN: No rewrite since it can be evaluated using the existing
+ * NestedLoop based Left Semijoin.
+ *
+ * 2) Predicate is NOT IN and RHS returns a single row.
+ *
+ * Example: 10 NOT IN (SELECT 1)
+ * Example: 10 NOT IN (SELECT MAX(b) FROM t)
+ * Example: 10 NOT IN (SELECT x FROM t LIMIT 1)
+ *
+ * REWRITE: C NOT IN RHS: => C != (RHS)
+ *
+ * 3) Predicate is NOT IN and RHS returns multiple rows.
+ *
+ * Example: SELECT * FROM t WHERE 1 NOT IN (SELECT id FROM s)
+ *
+ * Assume RHS is of the form SELECT expr FROM T WHERE ...
+ *
+ * REWRITE:
+ * C NOT IN (RHS)
+ * Rewrites to:
+ * NOT EXISTS (SELECT x FROM (SELECT x FROM RHS) tmp
+ * WHERE C IS NULL OR tmp.x IS NULL OR tmp.x = C)
+ *
+ * Example:
+ * ... 10 NOT IN (SELECT x FROM t WHERE t.y > 3)
+ * Rewrites to:
+ * ... NOT EXISTS (SELECT x (SELECT x FROM t WHERE t.y > 3) tmp
+ * WHERE 10 IS NULL OR tmp.x IS NULL OR tmp.x = 10)
+ *
+ * The rewrite wraps the RHS subquery in an inline view and filters it with a
+ * condition using the LHS constant. The inline view ensures that the filter is
+ * logically evaluated over the RHS result. Alternatively, injecting the filter into
+ * the RHS is generally incorrect so requires push-down analysis to preserve
+ * correctness (consider cases such as limit, aggregation, and analytic functions).
+ * Such special cases are avoided here by using the inline view.
+ * TODO: Correlated NOT IN subqueries require that column resolution be extended to
+ * handle references to an outer block that is more than one nesting level away.
+ *
+ * The filter constructed from the LHS constant is subtle, so warrants further
+ * explanation. Consider the cases where the LHS is NULL vs. NOT NULL and the RHS
+ * is empty vs. not-empty. When RHS subquery evaluates to the empty result set, the
+ * NOT EXISTS passes for all LHS values. When the RHS subquery is not-empty, it is
+ * useful to think of C NOT IN (RHS) as the boolean expansion:
+ * C != x_1 & C != x_2 & C != x_3 & ... where each x_i is bound to a result
+ * from the RHS subquery.
+ *
+ * So, if C is equal to any x_i, the expression is false. Similarly, if any
+ * x_i is null or if C is null, then the overall expression also is false.
+ */
+ private static Expr rewriteInConstant(SelectStmt outerBlock,
+ InPredicate inPred) throws AnalysisException {
+ Expr lhs = inPred.getChild(0);
+ Preconditions.checkArgument(lhs.isConstant());
+
+ Expr rhs = inPred.getChild(1);
+ QueryStmt subquery = inPred.getSubquery().getStatement();
+ Preconditions.checkState(subquery instanceof SelectStmt);
+ SelectStmt rhsQuery = (SelectStmt) subquery;
+
+ // CASE 1, IN:
+ if (!inPred.isNotIn()) return null;
+
+ // CASE 2, NOT IN and RHS returns a single row:
+ if (rhsQuery.returnsSingleRow()) {
+ return new BinaryPredicate(BinaryPredicate.Operator.NE, lhs, rhs);
+ }
+
+ // CASE 3, NOT IN, RHS returns multiple rows.
+ Preconditions.checkState(rhsQuery.getResultExprs().size() == 1);
+ // Do not rewrite NOT IN when the RHS is correlated.
+ if (isCorrelated(rhsQuery)) return null;
+
+ // Wrap RHS in an inline view: (select wrapperColumnAlias from RHS) wrapperTableAlias.
+ // Use outerBlock (parent block of subquery) to generate aliases. Doing so guarantees
+ // that the wrapper view does not produce the same alias if further rewritten.
+ String wrapperTableAlias = outerBlock.getTableAliasGenerator().getNextAlias();
+ String wrapperColumnAlias = outerBlock.getColumnAliasGenerator().getNextAlias();
+ InlineViewRef wrapperView = new InlineViewRef(wrapperTableAlias, rhsQuery,
+ Lists.newArrayList(wrapperColumnAlias));
+ SlotRef wrapperResult = new SlotRef(
+ Lists.newArrayList(wrapperTableAlias, wrapperColumnAlias));
+
+ // Build: lhs IS NULL OR rhsResultExpr IS NULL OR lhs = rhs
+ Expr rewritePredicate = new CompoundPredicate(CompoundPredicate.Operator.OR,
+ new IsNullPredicate(lhs, false),
+ new CompoundPredicate(CompoundPredicate.Operator.OR,
+ new IsNullPredicate(wrapperResult, false),
+ new BinaryPredicate(BinaryPredicate.Operator.EQ, wrapperResult, lhs)));
+
+ List<TableRef> fromList = Lists.newArrayList();
+ fromList.add(wrapperView);
+ SelectStmt rewriteQuery = new SelectStmt(
+ new SelectList(Lists.newArrayList(new SelectListItem(wrapperResult, null))),
+ new FromClause(fromList),
+ rewritePredicate,
+ null, null, null, null);
+ Subquery newSubquery = new Subquery(rewriteQuery);
+ rhsQuery.reset();
+
+ // Build: NOT EXISTS(newSubquery)
+ return new ExistsPredicate(newSubquery, true);
+ }
+
+ /**
+ * Tests if a subquery is correlated to its outer block.
+ */
+ private static boolean isCorrelated(SelectStmt subqueryStmt) {
+ if (!subqueryStmt.hasWhereClause()) return false;
+ return containsCorrelatedPredicate(subqueryStmt.getWhereClause(),
+ subqueryStmt.getTableRefIds());
+ }
+
+ /**
* Modifies in place an expr that contains a subquery by rewriting its
* subquery stmt. The modified analyzed expr is returned.
*/
@@ -372,6 +508,7 @@ public class StmtRewriter {
SelectListItem firstItem =
((SelectStmt) inlineView.getViewStmt()).getSelectList().getItems().get(0);
if (!onClauseConjuncts.isEmpty() &&
+ firstItem.getExpr() != null &&
firstItem.getExpr().contains(Expr.NON_NULL_EMPTY_AGG)) {
// Correlated subqueries with an aggregate function that returns non-null on
// an empty input are rewritten using a LEFT OUTER JOIN because we
@@ -453,7 +590,8 @@ public class StmtRewriter {
if (!operator.isEquivalence()) continue;
List<TupleId> lhsTupleIds = Lists.newArrayList();
conjunct.getChild(0).getIds(lhsTupleIds, null);
- if (lhsTupleIds.isEmpty()) continue;
+ // Allows for constants to be a join predicate.
+ if (lhsTupleIds.isEmpty() && !conjunct.getChild(0).isConstant()) continue;
List<TupleId> rhsTupleIds = Lists.newArrayList();
conjunct.getChild(1).getIds(rhsTupleIds, null);
if (rhsTupleIds.isEmpty()) continue;
@@ -504,6 +642,7 @@ public class StmtRewriter {
joinOp = JoinOperator.NULL_AWARE_LEFT_ANTI_JOIN;
List<TupleId> tIds = Lists.newArrayList();
joinConjunct.getIds(tIds, null);
+
if (tIds.size() <= 1 || !tIds.contains(inlineView.getDesc().getId())) {
throw new AnalysisException("Unsupported NOT IN predicate with subquery: " +
expr.toSql());
@@ -531,7 +670,7 @@ public class StmtRewriter {
/**
* Replace all unqualified star exprs ('*') from stmt's select list with qualified
* ones, i.e. tbl_1.*,...,tbl_n.*, where tbl_1,...,tbl_n are the visible tablerefs
- * in stmt. 'tableIndx' indicates the maximum tableRef ordinal to consider when
+ * in stmt. 'tableIdx' indicates the maximum tableRef ordinal to consider when
* replacing an unqualified star item.
*/
private static void replaceUnqualifiedStarItems(SelectStmt stmt, int tableIdx) {
@@ -640,7 +779,7 @@ public class StmtRewriter {
/**
* Checks if an expr containing a correlated subquery is eligible for rewrite by
- * tranforming into a join. Throws an AnalysisException if 'expr' is not eligible for
+ * transforming into a join. Throws an AnalysisException if 'expr' is not eligible for
* rewrite.
* TODO: Merge all the rewrite eligibility tests into a single function.
*/
@@ -673,7 +812,7 @@ public class StmtRewriter {
/**
* Checks if all the 'correlatedPredicates' extracted from the subquery of 'expr' can be
* added to the ON-clause of the join that results from the subquery rewrite. It throws
- * an AnalysisException is this is not the case. 'inlineView' is the generated inline
+ * an AnalysisException if this is not the case. 'inlineView' is the generated inline
* view that will replace the subquery in the rewritten statement.
*/
private static void validateCorrelatedPredicates(Expr expr, InlineViewRef inlineView,
http://git-wip-us.apache.org/repos/asf/impala/blob/633dbff7/fe/src/test/java/org/apache/impala/analysis/AnalyzeSubqueriesTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeSubqueriesTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeSubqueriesTest.java
index dd710cd..e49e289 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeSubqueriesTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeSubqueriesTest.java
@@ -24,6 +24,7 @@ import org.apache.impala.common.AnalysisException;
public class AnalyzeSubqueriesTest extends AnalyzerTest {
private static String cmpOperators[] = {"=", "!=", "<=", ">=", ">", "<"};
+ private static String nonEquiCmpOperators[] = {"!=", "<=", ">=", ">", "<"};
@Test
public void TestInSubqueries() throws AnalysisException {
@@ -34,10 +35,11 @@ public class AnalyzeSubqueriesTest extends AnalyzerTest {
"left semi join", "left anti join"};
// [NOT] IN subquery predicates
- String operators[] = {"in", "not in"};
+ String operators[] = {"IN", "NOT IN"};
for (String op: operators) {
AnalyzesOk(String.format("select * from functional.alltypes where id %s " +
"(select id from functional.alltypestiny)", op));
+
// Using column and table aliases similar to the ones produced by the
// column/table alias generators during a rewrite.
AnalyzesOk(String.format("select id `$c$1` from functional.alltypestiny `$a$1` " +
@@ -133,14 +135,14 @@ public class AnalyzeSubqueriesTest extends AnalyzerTest {
"s on a.int_col = s.int_col where a.bool_col = false)", op, joinOp));
}
- // Subquery with relative table references
+ // Complex type: subquery with relative table references
AnalyzesOk(String.format(
"select id from functional.allcomplextypes t where id %s " +
"(select f1 from t.struct_array_col a, t.int_array_col b " +
"where f2 = 'xyz' and b.item < 3 group by f1 having count(*) > 2 limit 5)",
op));
- // Correlated predicates in the subquery's ON clause
+ // Correlated predicates in the subquery's ON clause. Vary join column type.
AnalyzesOk(String.format("select * from functional.alltypes t where t.id %s " +
"(select a.id from functional.alltypesagg a left outer join " +
"functional.alltypessmall s on s.int_col = t.int_col)", op));
@@ -220,6 +222,7 @@ public class AnalyzeSubqueriesTest extends AnalyzerTest {
AnalyzesOk(String.format(
"select id from functional.allcomplextypes t where id %s " +
"(select f1 from t.struct_array_col a where t.int_struct_col.f1 = a.f1)", op));
+
// Test correlated BETWEEN predicates.
AnalyzesOk(String.format("select 1 from functional.alltypes t where id %s " +
"(select id from functional.alltypesagg a where " +
@@ -270,208 +273,291 @@ public class AnalyzeSubqueriesTest extends AnalyzerTest {
"functional.alltypestiny t1 where int_col %s (select min(bigint_col) " +
"over (partition by bool_col) from functional.alltypessmall t2 where " +
"int_col < 10)", op));
- }
- // Constant on the left hand side
- AnalyzesOk("select * from functional.alltypes a where 1 in " +
- "(select id from functional.alltypesagg s where s.int_col = a.int_col)");
- AnalysisError("select * from functional.alltypes a where 1 not in " +
- "(select id from functional.alltypesagg s where s.int_col = a.int_col)",
- "Unsupported NOT IN predicate with subquery: 1 NOT IN (SELECT id FROM " +
- "functional.alltypesagg s WHERE s.int_col = a.int_col)");
-
- // IN subquery that is equivalent to an uncorrelated EXISTS subquery
- AnalysisError("select * from functional.alltypes t where 1 in " +
- "(select int_col from functional.alltypesagg)", "Unsupported " +
- "predicate with subquery: 1 IN (SELECT int_col FROM functional.alltypesagg)");
- // Different non-equi comparison operators in the correlated predicate
- String nonEquiCmpOperators[] = {"!=", "<=", ">=", ">", "<"};
- for (String cmpOp: nonEquiCmpOperators) {
- // Allowed because the subquery only has relative table refs.
- AnalyzesOk(String.format("select 1 from functional.allcomplextypes t where id in" +
- "(select f1 from t.struct_array_col a where t.int_struct_col.f1 %s a.f1)",
- cmpOp));
- // Not allowed because the subquery has absolute table refs.
- AnalysisError(String.format("select 1 from functional.alltypes t where 1 in " +
- "(select int_col from functional.alltypesagg g where g.id %s t.id)",
- cmpOp), String.format("Unsupported predicate with subquery: 1 " +
- "IN (SELECT int_col FROM functional.alltypesagg g WHERE g.id %s t.id)",
- cmpOp));
- }
+ // [NOT] IN subquery with a correlated non-equi predicate is ok if the subquery only
+ // has relative table refs
+ AnalyzesOk(String.format("select 1 from functional.allcomplextypes t where id %s " +
+ "(select f1 from t.struct_array_col a where t.int_struct_col.f1 < a.f1)", op));
+
+ // Statement with a GROUP BY and a correlated IN subquery that has a non-equi
+ // correlated predicate and only relative table refs
+ AnalyzesOk(String.format("select id, count(*) from functional.allcomplextypes t " +
+ "where id %s" +
+ "(select f1 from t.struct_array_col a where t.int_struct_col.f1 < a.f1) " +
+ "group by id", op));
+
+ // Complex type: Correlated, non-equijoins.
+ for (String cmpOp: nonEquiCmpOperators) {
+ // Complex type: allowed because the subquery only has relative table refs.
+ AnalyzesOk(String.format("select 1 from functional.allcomplextypes t " +
+ "where id %s " +
+ "(select f1 from t.struct_array_col a where t.int_struct_col.f1 %s a.f1)",
+ op, cmpOp));
+ }
- // NOT IN subquery with a correlated non-equi predicate is ok if the subquery only
- // has relative table refs
- AnalyzesOk("select 1 from functional.allcomplextypes t where id not in " +
- "(select f1 from t.struct_array_col a where t.int_struct_col.f1 < a.f1)");
- // NOT IN subquery with a correlated non-equi predicate is not ok if the subquery
- // has absolute table refs
- AnalysisError("select 1 from functional.alltypes t where 1 not in " +
- "(select id from functional.alltypestiny g where g.id < t.id)",
- "Unsupported predicate with subquery: 1 NOT IN (SELECT id FROM " +
- "functional.alltypestiny g WHERE g.id < t.id)");
-
- // Statement with a GROUP BY and a correlated IN subquery that has a non-equi
- // correlated predicate and only relative table refs
- AnalyzesOk("select id, count(*) from functional.allcomplextypes t where id in" +
- "(select f1 from t.struct_array_col a where t.int_struct_col.f1 < a.f1) " +
- "group by id");
- // Statement with a GROUP BY and a correlated IN subquery that has a non-equi
- // correlated predicate and absolute table refs
- AnalysisError("select id, count(*) from functional.alltypes t " +
- "where 1 IN (select id from functional.alltypesagg g where t.int_col < " +
- "g.int_col) group by id", "Unsupported predicate with subquery: 1 IN " +
- "(SELECT id FROM functional.alltypesagg g WHERE t.int_col < g.int_col)");
-
- // Reference a non-existing table in the subquery
- AnalysisError("select * from functional.alltypestiny t where id in " +
- "(select id from functional.alltypessmall s left outer join p on " +
- "(s.int_col = p.int_col))",
- "Could not resolve table reference: 'p'");
- // Reference a non-existing column from a table in the outer scope
- AnalysisError("select * from functional.alltypestiny t where id in " +
- "(select id from functional.alltypessmall s where s.int_col = t.bad_col)",
- "Could not resolve column/field reference: 't.bad_col'");
-
- // Referencing the same table in the inner and the outer query block
- // No explicit alias
- AnalyzesOk("select id from functional.alltypestiny where int_col in " +
- "(select int_col from functional.alltypestiny)");
- // Different alias between inner and outer block referencing the same table
- AnalyzesOk("select id from functional.alltypestiny t where int_col in " +
- "(select int_col from functional.alltypestiny p)");
- // Alias only in the outer block
- AnalyzesOk("select id from functional.alltypestiny t where int_col in " +
- "(select int_col from functional.alltypestiny)");
- // Same alias in both inner and outer block
- AnalyzesOk("select id from functional.alltypestiny t where int_col in " +
- "(select int_col from functional.alltypestiny t)");
- // Binary predicate with non-comparable operands
- AnalysisError("select * from functional.alltypes t where " +
- "(id in (select id from functional.alltypestiny)) = 'string_val'",
- "operands of type BOOLEAN and STRING are not comparable: " +
- "(id IN (SELECT id FROM functional.alltypestiny)) = 'string_val'");
+ // Reference a non-existing table in the subquery
+ AnalysisError(String.format("select * from functional.alltypestiny t where id %s " +
+ "(select id from functional.alltypessmall s left outer join p on " +
+ "(s.int_col = p.int_col))", op),
+ "Could not resolve table reference: 'p'");
+ // Reference a non-existing column from a table in the outer scope
+ AnalysisError(String.format("select * from functional.alltypestiny t where id %s " +
+ "(select id from functional.alltypessmall s where s.int_col = t.bad_col)", op),
+ "Could not resolve column/field reference: 't.bad_col'");
+
+ // Referencing the same table in the inner and the outer query block
+ // No explicit alias
+ AnalyzesOk(String.format("select id from functional.alltypestiny where " +
+ "int_col %s (select int_col from functional.alltypestiny)", op));
+ // Different alias between inner and outer block referencing the same table
+ AnalyzesOk(String.format("select id from functional.alltypestiny t where " +
+ "int_col %s (select int_col from functional.alltypestiny p)", op));
+ // Alias only in the outer block
+ AnalyzesOk(String.format("select id from functional.alltypestiny t where " +
+ "int_col %s (select int_col from functional.alltypestiny)", op));
+ // Same alias in both inner and outer block
+ AnalyzesOk(String.format("select id from functional.alltypestiny t where " +
+ "int_col %s (select int_col from functional.alltypestiny t)", op));
+
+ // OR with subquery predicates
+ AnalysisError(String.format("select * from functional.alltypes t where t.id %s " +
+ "(select id from functional.alltypesagg) or t.bool_col = false", op),
+ String.format("Subqueries in OR predicates are not supported: t.id %s " +
+ "(SELECT id FROM functional.alltypesagg) OR t.bool_col = FALSE", op));
+ AnalysisError(String.format("select id from functional.allcomplextypes t where " +
+ "id %s " +
+ "(select f1 from t.struct_array_col a where t.int_struct_col.f1 < a.f1) " +
+ "or id < 10", op),
+ String.format("Subqueries in OR predicates are not supported: id %s " +
+ "(SELECT f1 FROM t.struct_array_col a WHERE t.int_struct_col.f1 < a.f1) " +
+ "OR id < 10", op));
+
+ // Binary predicate with non-comparable operands
+ AnalysisError(String.format("select * from functional.alltypes t where " +
+ "(id %s (select id from functional.alltypestiny)) = 'string_val'", op),
+ String.format("operands of type BOOLEAN and STRING are not comparable: " +
+ "(id %s (SELECT id FROM functional.alltypestiny)) = 'string_val'", op));
+
+ // TODO: Modify the StmtRewriter to allow this case with relative refs.
+ // Correlated subquery with relative table refs and OR predicate is not allowed
+ AnalysisError(String.format("select id from functional.allcomplextypes t where " +
+ "id %s (select f1 from t.struct_array_col a where t.int_struct_col.f1 < a.f1 " +
+ "or id < 10)", op),
+ "Disjunctions with correlated predicates are not supported: " +
+ "t.int_struct_col.f1 < a.f1 OR id < 10");
+ // Correlated subquery with absolute table refs and OR predicate is not allowed
+ AnalysisError(String.format("select * from functional.alltypes t where id %s " +
+ "(select id from functional.alltypesagg a where " +
+ "a.int_col = t.int_col or a.bool_col = false)", op), "Disjunctions " +
+ "with correlated predicates are not supported: a.int_col = " +
+ "t.int_col OR a.bool_col = FALSE");
- // OR with subquery predicates
- AnalysisError("select * from functional.alltypes t where t.id in " +
- "(select id from functional.alltypesagg) or t.bool_col = false",
- "Subqueries in OR predicates are not supported: t.id IN " +
- "(SELECT id FROM functional.alltypesagg) OR t.bool_col = FALSE");
+ AnalyzesOk(String.format("select * from functional.alltypes t where id %s " +
+ "(select id from functional.alltypestiny) and (bool_col = false or " +
+ "int_col = 10)", op));
+
+ // Correlated subqueries with GROUP BY, AGG functions or DISTINCT are not allowed
+ // with relative table refs in the subquery
+ // TODO: Modify the StmtRewriter to allow this case with relative refs
+ AnalysisError(String.format("select id from functional.allcomplextypes t where " +
+ "id %s" +
+ "(select count(f1) from t.struct_array_col a where t.int_struct_col.f1 < a.f1)",
+ op),
+ "Unsupported correlated subquery with grouping and/or aggregation: " +
+ "SELECT count(f1) FROM t.struct_array_col a WHERE t.int_struct_col.f1 < a.f1");
+ // Correlated subqueries with GROUP BY, AGG functions or DISTINCT are not allowed
+ // with absolute table refs in the subquery
+ AnalysisError(String.format("select * from functional.alltypes t where t.id %s " +
+ "(select max(a.id) from functional.alltypesagg a where " +
+ "t.int_col = a.int_col)", op),
+ "Unsupported correlated subquery with grouping and/or aggregation: " +
+ "SELECT max(a.id) FROM functional.alltypesagg a " +
+ "WHERE t.int_col = a.int_col");
+ AnalysisError(String.format("select * from functional.alltypes t where t.id %s " +
+ "(select a.id from functional.alltypesagg a where " +
+ "t.int_col = a.int_col group by a.id)", op), "Unsupported correlated " +
+ "subquery with grouping and/or aggregation: SELECT a.id FROM " +
+ "functional.alltypesagg a WHERE t.int_col = a.int_col GROUP BY a.id");
+ AnalysisError(String.format("select * from functional.alltypes t where t.id %s " +
+ "(select distinct a.id from functional.alltypesagg a where " +
+ "a.bigint_col = t.bigint_col)", op), "Unsupported correlated subquery with " +
+ "grouping and/or aggregation: SELECT DISTINCT a.id FROM " +
+ "functional.alltypesagg a WHERE a.bigint_col = t.bigint_col");
+
+ // Multiple subquery predicates
+ AnalyzesOk(String.format("select * from functional.alltypes t where id %s " +
+ "(select id from functional.alltypestiny where int_col = 10) and int_col %s " +
+ "(select int_col from functional.alltypessmall where bigint_col = 1000) and " +
+ "string_col not in (select string_col from functional.alltypesagg where " +
+ "tinyint_col > 10) and bool_col = false", op, op));
+ AnalyzesOk(String.format("select id, year, month from " +
+ "functional.allcomplextypes t where id %s " +
+ "(select item from t.int_array_col where item < 10) and id not in " +
+ "(select f1 from t.struct_array_col where f2 = 'test')", op));
+
+ // Correlated subquery with a LIMIT clause
+ AnalysisError(String.format("select * from functional.alltypes t where id %s " +
+ "(select s.id from functional.alltypesagg s where s.int_col = t.int_col " +
+ "limit 1)", op), "Unsupported correlated subquery with a LIMIT clause: " +
+ "SELECT s.id FROM functional.alltypesagg s WHERE s.int_col = t.int_col " +
+ "LIMIT 1");
+
+ // Correlated IN with an analytic function
+ AnalysisError(String.format("select id, int_col, bool_col from " +
+ "functional.alltypestiny t1 " +
+ "where int_col %s (select min(bigint_col) over (partition by bool_col) " +
+ "from functional.alltypessmall t2 where t1.id < t2.id)", op), "Unsupported " +
+ "correlated subquery with grouping and/or aggregation: SELECT " +
+ "min(bigint_col) OVER (PARTITION BY bool_col) FROM " +
+ "functional.alltypessmall t2 WHERE t1.id < t2.id");
+
+ // IN subquery in binary predicate
+ AnalysisError(String.format("select * from functional.alltypestiny where " +
+ "(tinyint_col %s (1,2)) = (bool_col %s (select bool_col from " +
+ "functional.alltypes))", op, op),
+ String.format("IN subquery predicates are not supported " +
+ "in binary predicates: (tinyint_col %s (1, 2)) = (bool_col %s (SELECT " +
+ "bool_col FROM functional.alltypes))", op, op));
+
+ // Column labels may conflict after the rewrite as an inline view
+ AnalyzesOk(String.format("select int_col from functional.alltypestiny where " +
+ "int_col %s (select 1 as int_col from functional.alltypesagg)", op));
+ AnalyzesOk(String.format("select int_col from functional.alltypestiny a where " +
+ "int_col %s (select 1 as int_col from functional.alltypesagg b " +
+ "where a.int_col = b.int_col)", op));
+
+ // NOT compound predicates with OR
+ AnalyzesOk(String.format("select * from functional.alltypes t where not (" +
+ "id %s (select id from functional.alltypesagg) or int_col < 10)", op));
+ AnalyzesOk(String.format("select * from functional.alltypes t where not (" +
+ "t.id < 10 or not (t.int_col %s (select int_col from " +
+ "functional.alltypesagg) and t.bool_col = false))", op));
+ }
+
+ // Negated [NOT] IN subquery with disjunction.
AnalysisError("select * from functional.alltypes t where not (t.id in " +
"(select id from functional.alltypesagg) and t.int_col = 10)",
"Subqueries in OR predicates are not supported: t.id NOT IN " +
"(SELECT id FROM functional.alltypesagg) OR t.int_col != 10");
+ AnalysisError("select * from functional.alltypes t where not (t.id not in " +
+ "(select id from functional.alltypesagg) and t.int_col = 10)",
+ "Subqueries in OR predicates are not supported: t.id IN " +
+ "(SELECT id FROM functional.alltypesagg) OR t.int_col != 10");
+
+ // Exists subquery with disjunction.
AnalysisError("select * from functional.alltypes t where exists " +
"(select * from functional.alltypesagg g where g.bool_col = false) " +
"or t.bool_col = true", "Subqueries in OR predicates are not " +
"supported: EXISTS (SELECT * FROM functional.alltypesagg g WHERE " +
"g.bool_col = FALSE) OR t.bool_col = TRUE");
+
+ // Comparator-based subquery with disjunction.
AnalysisError("select * from functional.alltypes t where t.id = " +
"(select min(id) from functional.alltypesagg g) or t.id = 10",
"Subqueries in OR predicates are not supported: t.id = " +
"(SELECT min(id) FROM functional.alltypesagg g) OR t.id = 10");
- AnalysisError("select id from functional.allcomplextypes t where id in" +
- "(select f1 from t.struct_array_col a where t.int_struct_col.f1 < a.f1) " +
- "or id < 10", "Subqueries in OR predicates are not supported: " +
- "id IN (SELECT f1 FROM t.struct_array_col a WHERE t.int_struct_col.f1 < a.f1) " +
- "OR id < 10");
-
- // TODO for 2.3: Modify the StmtRewriter to allow this case with relative refs.
- // Correlated subquery with relative table refs and OR predicate is not allowed
- AnalysisError("select id from functional.allcomplextypes t where id in" +
- "(select f1 from t.struct_array_col a where t.int_struct_col.f1 < a.f1 " +
- "or id < 10)", "Disjunctions with correlated predicates are not supported: " +
- "t.int_struct_col.f1 < a.f1 OR id < 10");
- // Correlated subquery with absolute table refs and OR predicate is not allowed
- AnalysisError("select * from functional.alltypes t where id in " +
- "(select id from functional.alltypesagg a where " +
- "a.int_col = t.int_col or a.bool_col = false)", "Disjunctions " +
- "with correlated predicates are not supported: a.int_col = " +
- "t.int_col OR a.bool_col = FALSE");
-
- AnalyzesOk("select * from functional.alltypes t where id in " +
- "(select id from functional.alltypestiny) and (bool_col = false or " +
- "int_col = 10)");
-
- // Correlated subqueries with GROUP BY, AGG functions or DISTINCT are not allowed
- // with relative table refs in the subquery
- // TODO for 2.3: Modify the StmtRewriter to allow this case with relative refs
- AnalysisError("select id from functional.allcomplextypes t where id in" +
- "(select count(f1) from t.struct_array_col a where t.int_struct_col.f1 < a.f1)",
- "Unsupported correlated subquery with grouping and/or aggregation: " +
- "SELECT count(f1) FROM t.struct_array_col a WHERE t.int_struct_col.f1 < a.f1");
- // Correlated subqueries with GROUP BY, AGG functions or DISTINCT are not allowed
- // with absolute table refs in the subquery
- AnalysisError("select * from functional.alltypes t where t.id in " +
- "(select max(a.id) from functional.alltypesagg a where " +
- "t.int_col = a.int_col)", "Unsupported correlated subquery with grouping " +
- "and/or aggregation: SELECT max(a.id) FROM functional.alltypesagg a " +
- "WHERE t.int_col = a.int_col");
- AnalysisError("select * from functional.alltypes t where t.id in " +
- "(select a.id from functional.alltypesagg a where " +
- "t.int_col = a.int_col group by a.id)", "Unsupported correlated " +
- "subquery with grouping and/or aggregation: SELECT a.id FROM " +
- "functional.alltypesagg a WHERE t.int_col = a.int_col GROUP BY a.id");
- AnalysisError("select * from functional.alltypes t where t.id in " +
- "(select distinct a.id from functional.alltypesagg a where " +
- "a.bigint_col = t.bigint_col)", "Unsupported correlated subquery with " +
- "grouping and/or aggregation: SELECT DISTINCT a.id FROM " +
- "functional.alltypesagg a WHERE a.bigint_col = t.bigint_col");
-
- // NOT compound predicates with OR
- AnalyzesOk("select * from functional.alltypes t where not (" +
- "id in (select id from functional.alltypesagg) or int_col < 10)");
- AnalyzesOk("select * from functional.alltypes t where not (" +
- "t.id < 10 or not (t.int_col in (select int_col from " +
- "functional.alltypesagg) and t.bool_col = false))");
-
- // Multiple subquery predicates
- AnalyzesOk("select * from functional.alltypes t where id in " +
- "(select id from functional.alltypestiny where int_col = 10) and int_col in " +
- "(select int_col from functional.alltypessmall where bigint_col = 1000) and " +
- "string_col not in (select string_col from functional.alltypesagg where " +
- "tinyint_col > 10) and bool_col = false");
- AnalyzesOk("select id, year, month from functional.allcomplextypes t where id in " +
- "(select item from t.int_array_col where item < 10) and id not in " +
- "(select f1 from t.struct_array_col where f2 = 'test')");
+ }
- // Correlated subquery with a LIMIT clause
- AnalysisError("select * from functional.alltypes t where id in " +
- "(select s.id from functional.alltypesagg s where s.int_col = t.int_col " +
- "limit 1)", "Unsupported correlated subquery with a LIMIT clause: " +
- "SELECT s.id FROM functional.alltypesagg s WHERE s.int_col = t.int_col " +
- "LIMIT 1");
-
- // Correlated IN with an analytic function
- AnalysisError("select id, int_col, bool_col from functional.alltypestiny t1 " +
- "where int_col in (select min(bigint_col) over (partition by bool_col) " +
- "from functional.alltypessmall t2 where t1.id < t2.id)", "Unsupported " +
- "correlated subquery with grouping and/or aggregation: SELECT " +
- "min(bigint_col) OVER (PARTITION BY bool_col) FROM " +
- "functional.alltypessmall t2 WHERE t1.id < t2.id");
-
- // IN subquery in binary predicate
- AnalysisError("select * from functional.alltypestiny where " +
- "(tinyint_col in (1,2)) = (bool_col in (select bool_col from " +
- "functional.alltypes))", "IN subquery predicates are not supported " +
- "in binary predicates: (tinyint_col IN (1, 2)) = (bool_col IN (SELECT " +
- "bool_col FROM functional.alltypes))");
+ @Test
+ public void TestInConstantLHSSubqueries() throws AnalysisException {
+ // [NOT] IN subquery predicates
+ String operators[] = {"IN", "NOT IN"};
- // Column labels may conflict after the rewrite as an inline view
- AnalyzesOk("select int_col from functional.alltypestiny where " +
- "int_col in (select 1 as int_col from functional.alltypesagg)");
- AnalyzesOk("select int_col from functional.alltypestiny a where " +
- "int_col not in (select 1 as int_col from functional.alltypesagg b " +
- "where a.int_col = b.int_col)");
+ for (String op: operators) {
+ // Uncorrelated subquery.
+ AnalyzesOk(String.format("select * from functional.alltypes t where 1 %s " +
+ "(select int_col from functional.alltypesagg)", op));
+ // Uncorrelated and limited subquery.
+ AnalyzesOk(String.format("select * from functional.alltypes t where 1 %s " +
+ "(select int_col from functional.alltypesagg limit 1)", op));
+ // Select * in uncorrelated subquery.
+ AnalyzesOk(String.format("select * from functional.alltypes t where 1 %s " +
+ "(select * from functional.tinyinttable)", op));
+ // Select * in subquery, uncorrelated inline view.
+ AnalyzesOk(String.format("select * from functional.alltypes t where 1 %s " +
+ "(select * from (select f2 from functional.emptytable) s)", op));
+ // Uncorrelated aggregate subquery.
+ AnalyzesOk(String.format("select * from functional.alltypestiny t1 where " +
+ "10 %s (select max(int_col) from functional.alltypestiny)", op));
+ AnalyzesOk(String.format("select * from functional.alltypestiny t1 where " +
+ "(10 - 2) %s (select count(*) from functional.alltypestiny)", op));
+ // Uncorrelated analytic function.
+ AnalyzesOk(String.format("select id, int_col, bool_col from " +
+ "functional.alltypestiny t1 where 1 %s (select min(bigint_col) " +
+ "over (partition by bool_col) from functional.alltypessmall t2 where " +
+ "int_col < 10)", op));
+ // Uncorrelated group by with no aggregate function.
+ AnalyzesOk(String.format("select id, int_col, bool_col from " +
+ "functional.alltypestiny t1 where int_col %s (select int_col " +
+ "from functional.alltypessmall t2 where " +
+ "int_col < 10 group by int_col)", op));
+ // Uncorrelated group by with only aggregate function.
+ AnalyzesOk(String.format("select id, int_col, bool_col from " +
+ "functional.alltypestiny t1 where int_col %s (select max(id) " +
+ "from functional.alltypessmall t2 where " +
+ "int_col < 10 group by int_col)", op));
+ // Nested subqueries.
+ AnalyzesOk(String.format("select * from functional.alltypes t where 1 %s " +
+ "(select int_col from functional.tinyinttable where " +
+ "1 %s (select int_col from functional.alltypestiny))", op, op));
+ // Nested subqueries with *.
+ AnalyzesOk(String.format("select * from functional.alltypes t where 1 %s " +
+ "(select * from functional.tinyinttable where " +
+ "1 %s (select * from functional.tinyinttable))", op, op));
+ // Limit + correlation
+ AnalysisError(String.format("select id from functional.alltypessmall a where 1 " +
+ "%s (select int_col from functional.alltypestiny b where b.id = a.id limit 5)",
+ op), "Unsupported correlated subquery with a LIMIT clause: ");
+ // Order + limit + correlation.
+ AnalysisError(String.format("select id from functional.alltypessmall a where 1 %s " +
+ "(select int_col from functional.alltypestiny b where b.id = a.id " +
+ "order by int_col limit 5)", op),
+ "Unsupported correlated subquery with a LIMIT clause");
+ }
- // NOT IN uncorrelated aggregate subquery with a constant
- AnalysisError("select * from functional.alltypestiny t1 where " +
- "10 not in (select max(int_col) from functional.alltypestiny)",
- "Unsupported NOT IN predicate with subquery: 10 NOT IN (SELECT " +
- "max(int_col) FROM functional.alltypestiny)");
- AnalysisError("select * from functional.alltypestiny t1 where " +
- "(10 - 2) not in (select count(*) from functional.alltypestiny)",
- "Unsupported NOT IN predicate with subquery: (10 - 2) NOT IN " +
- "(SELECT count(*) FROM functional.alltypestiny)");
+ // Non-equijoins are not supported for NOT IN (not supported for EXISTS) when there
+ // is a constant on the left-hand side and a correlation.
+ // TODO: move into above, main test loop when NOT IN is supported.
+ for (String cmpOp: nonEquiCmpOperators) {
+ // Constant on the left hand side: correlated, non-equijoins
+ AnalyzesOk(String.format("select 1 from functional.alltypes t where 1 IN " +
+ "(select int_col from functional.alltypesagg g where g.id %s t.id)", cmpOp));
+ AnalysisError(String.format("select 1 from functional.alltypes t where 1 NOT IN " +
+ "(select int_col from functional.alltypesagg g where g.id %s t.id)", cmpOp),
+ String.format("Unsupported NOT IN predicate with subquery: 1 NOT IN", cmpOp));
+
+ // Constant on the left hand side: correlated, non-equijoin with a group by.
+ AnalyzesOk(String.format("select id, count(*) from functional.alltypes t " +
+ "where 1 IN (select id from functional.alltypesagg g where t.int_col %s " +
+ "g.int_col) group by id", cmpOp));
+ AnalysisError(String.format("select id, count(*) from functional.alltypes t " +
+ "where 1 NOT IN (select id from functional.alltypesagg g where t.int_col %s " +
+ "g.int_col) group by id", cmpOp),
+ String.format("Unsupported NOT IN predicate with subquery: 1 NOT IN", cmpOp));
+ }
+
+ // Correlated subquery.
+ AnalyzesOk("select * from functional.alltypes a where 1 in " +
+ "(select id from functional.alltypesagg s where s.int_col = a.int_col)");
+ AnalysisError("select * from functional.alltypes a where 1 not in " +
+ "(select id from functional.alltypesagg s where s.int_col = a.int_col)",
+ "Unsupported NOT IN predicate with subquery: 1 NOT IN");
+
+ // Select * in correlated subquery.
+ AnalyzesOk("select * from functional.alltypes t where 1 in "
+ + "(select * from functional.tinyinttable x where t.id = x.int_col)");
+ AnalysisError("select * from functional.alltypes t where 1 not in "
+ + "(select * from functional.tinyinttable x where t.id = x.int_col)",
+ "Unsupported NOT IN predicate with subquery: 1 NOT IN");
+
+ // Select * in subquery, correlated inline view.
+ AnalyzesOk("select * from functional.alltypes t where 1 in "
+ + "(select * from (select f2 from functional.emptytable) s "
+ + "where s.f2 = t.int_col)");
+ AnalysisError("select * from functional.alltypes t where 1 not in "
+ + "(select * from (select f2 from functional.emptytable) s "
+ + "where s.f2 = t.int_col)",
+ "Unsupported NOT IN predicate with subquery: 1 NOT IN");
}
@Test
@@ -604,8 +690,6 @@ public class AnalyzeSubqueriesTest extends AnalyzerTest {
"where EXISTS (select id, min(int_col) over (partition by bool_col) " +
"from functional.alltypesagg a where bigint_col < 10)");
- // Different non-equi comparison operators in the correlated predicate
- String nonEquiCmpOperators[] = {"!=", "<=", ">=", ">", "<"};
for (String cmpOp: nonEquiCmpOperators) {
// Allowed because the subquery only has relative table refs.
AnalyzesOk(String.format(
@@ -980,7 +1064,7 @@ public class AnalyzeSubqueriesTest extends AnalyzerTest {
}
// Correlated aggregate subquery with a GROUP BY and a relative table ref
- // TODO for 2.3: Modify the StmtRewriter to allow this query with only relative refs.
+ // TODO: Modify the StmtRewriter to allow this query with only relative refs.
AnalysisError("select min(t.year) from functional.allcomplextypes t " +
"where t.id < (select max(f1) from t.struct_array_col a " +
"where a.f1 = t.id group by a.f2 order by 1 limit 1)",
@@ -1008,7 +1092,7 @@ public class AnalyzeSubqueriesTest extends AnalyzerTest {
"functional.alltypessmall t2 WHERE int_col < 10)");
// Aggregate subquery with analytic function + limit 1 and a relative table ref
- // TODO for 2.3: Modify the StmtRewriter to allow this query with only relative refs.
+ // TODO: Modify the StmtRewriter to allow this query with only relative refs.
AnalysisError("select id from functional.allcomplextypes t where year = " +
"(select min(f1) over (partition by f2) from t.struct_array_col a where " +
"t.id = f1 limit 1)",
http://git-wip-us.apache.org/repos/asf/impala/blob/633dbff7/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test b/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test
index 790bcc3..43dd8d0 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test
@@ -1831,7 +1831,7 @@ PLAN-ROOT SINK
select * from functional.alltypesagg t1
where t1.id is not distinct from
(select min(id) from functional.alltypes t2
-where t1.int_col is not distinct from t2.int_col);
+where t1.int_col is not distinct from t2.int_col)
---- PLAN
PLAN-ROOT SINK
|
@@ -1853,7 +1853,7 @@ PLAN-ROOT SINK
select * from functional.alltypesagg t1
where t1.id is distinct from
(select min(id) from functional.alltypes t2
-where t1.int_col is not distinct from t2.int_col);
+where t1.int_col is not distinct from t2.int_col)
---- PLAN
PLAN-ROOT SINK
|
@@ -1876,7 +1876,7 @@ PLAN-ROOT SINK
select * from functional.alltypesagg t1
where t1.id =
(select min(id) from functional.alltypes t2
-where t1.int_col is not distinct from t2.int_col);
+where t1.int_col is not distinct from t2.int_col)
---- PLAN
PLAN-ROOT SINK
|
@@ -1898,7 +1898,7 @@ PLAN-ROOT SINK
select * from functional.alltypesagg t1
where t1.id !=
(select min(id) from functional.alltypes t2
-where t1.int_col is not distinct from t2.int_col);
+where t1.int_col is not distinct from t2.int_col)
---- PLAN
PLAN-ROOT SINK
|
@@ -1921,7 +1921,7 @@ PLAN-ROOT SINK
select * from functional.alltypesagg t1
where t1.id is not distinct from
(select min(id) from functional.alltypes t2
-where t1.int_col = t2.int_col);
+where t1.int_col = t2.int_col)
---- PLAN
PLAN-ROOT SINK
|
@@ -1943,7 +1943,7 @@ PLAN-ROOT SINK
select * from functional.alltypesagg t1
where t1.id is distinct from
(select min(id) from functional.alltypes t2
-where t1.int_col = t2.int_col);
+where t1.int_col = t2.int_col)
---- PLAN
PLAN-ROOT SINK
|
@@ -1966,7 +1966,7 @@ PLAN-ROOT SINK
select * from functional.alltypesagg t1
where t1.id =
(select min(id) from functional.alltypes t2
-where t1.int_col = t2.int_col);
+where t1.int_col = t2.int_col)
---- PLAN
PLAN-ROOT SINK
|
@@ -1988,7 +1988,7 @@ PLAN-ROOT SINK
select * from functional.alltypesagg t1
where t1.id !=
(select min(id) from functional.alltypes t2
-where t1.int_col = t2.int_col);
+where t1.int_col = t2.int_col)
---- PLAN
PLAN-ROOT SINK
|
@@ -2114,3 +2114,207 @@ PLAN-ROOT SINK
01:SCAN HDFS [functional.alltypestiny]
partitions=4/4 files=4 size=460B
====
+# Constant on LHS of IN, uncorrelated subquery
+select * from functional.alltypessmall where
+1 in (select int_col from functional.alltypestiny)
+---- PLAN
+PLAN-ROOT SINK
+|
+02:NESTED LOOP JOIN [LEFT SEMI JOIN]
+|
+|--01:SCAN HDFS [functional.alltypestiny]
+| partitions=4/4 files=4 size=460B
+| predicates: 1 = functional.alltypestiny.int_col
+|
+00:SCAN HDFS [functional.alltypessmall]
+ partitions=4/4 files=4 size=6.32KB
+====
+# Constant on LHS of NOT IN, uncorrelated subquery
+select * from functional.alltypessmall where
+1 not in (select int_col from functional.alltypestiny)
+---- PLAN
+PLAN-ROOT SINK
+|
+02:NESTED LOOP JOIN [LEFT ANTI JOIN]
+|
+|--01:SCAN HDFS [functional.alltypestiny]
+| partitions=4/4 files=4 size=460B
+| predicates: 1 IS NULL OR functional.alltypestiny.int_col IS NULL OR functional.alltypestiny.int_col = 1
+| limit: 1
+|
+00:SCAN HDFS [functional.alltypessmall]
+ partitions=4/4 files=4 size=6.32KB
+====
+# Constant on LHS of IN, correlated subquery
+select * from functional.alltypessmall a where
+1 in (select int_col from functional.alltypestiny b where b.id = a.id)
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [LEFT SEMI JOIN]
+| hash predicates: a.id = b.id
+| runtime filters: RF000 <- b.id
+|
+|--01:SCAN HDFS [functional.alltypestiny b]
+| partitions=4/4 files=4 size=460B
+| predicates: 1 = b.int_col
+|
+00:SCAN HDFS [functional.alltypessmall a]
+ partitions=4/4 files=4 size=6.32KB
+ runtime filters: RF000 -> a.id
+====
+# Constant on LHS of IN, subquery with group by
+select * from functional.alltypessmall where
+1 in (select int_col from functional.alltypestiny group by int_col)
+---- PLAN
+PLAN-ROOT SINK
+|
+03:NESTED LOOP JOIN [LEFT SEMI JOIN]
+|
+|--02:AGGREGATE [FINALIZE]
+| | group by: int_col
+| |
+| 01:SCAN HDFS [functional.alltypestiny]
+| partitions=4/4 files=4 size=460B
+| predicates: 1 = functional.alltypestiny.int_col
+|
+00:SCAN HDFS [functional.alltypessmall]
+ partitions=4/4 files=4 size=6.32KB
+====
+# Constant on LHS of NOT IN, subquery with group by
+select * from functional.alltypessmall where
+1 not in (select int_col from functional.alltypestiny group by int_col)
+---- PLAN
+PLAN-ROOT SINK
+|
+03:NESTED LOOP JOIN [LEFT ANTI JOIN]
+|
+|--02:AGGREGATE [FINALIZE]
+| | group by: int_col
+| | limit: 1
+| |
+| 01:SCAN HDFS [functional.alltypestiny]
+| partitions=4/4 files=4 size=460B
+| predicates: 1 IS NULL OR functional.alltypestiny.int_col IS NULL OR functional.alltypestiny.int_col = 1
+|
+00:SCAN HDFS [functional.alltypessmall]
+ partitions=4/4 files=4 size=6.32KB
+====
+# Constant on LHS of IN, subquery with aggregate
+select * from functional.alltypessmall where
+1 in (select max(int_col) from functional.alltypestiny)
+---- PLAN
+PLAN-ROOT SINK
+|
+03:NESTED LOOP JOIN [LEFT SEMI JOIN]
+|
+|--02:AGGREGATE [FINALIZE]
+| | output: max(int_col)
+| | having: 1 = max(int_col)
+| |
+| 01:SCAN HDFS [functional.alltypestiny]
+| partitions=4/4 files=4 size=460B
+|
+00:SCAN HDFS [functional.alltypessmall]
+ partitions=4/4 files=4 size=6.32KB
+====
+# Constant on LHS of NOT IN, subquery with aggregate
+select * from functional.alltypessmall where
+1 not in (select max(int_col) from functional.alltypestiny)
+---- PLAN
+PLAN-ROOT SINK
+|
+03:NESTED LOOP JOIN [CROSS JOIN]
+|
+|--02:AGGREGATE [FINALIZE]
+| | output: max(int_col)
+| | having: 1 != max(int_col)
+| |
+| 01:SCAN HDFS [functional.alltypestiny]
+| partitions=4/4 files=4 size=460B
+|
+00:SCAN HDFS [functional.alltypessmall]
+ partitions=4/4 files=4 size=6.32KB
+====
+# Constant on LHS of IN, subquery with limit
+select * from functional.alltypessmall where
+1 in (select int_col from functional.alltypestiny limit 1)
+---- PLAN
+PLAN-ROOT SINK
+|
+03:NESTED LOOP JOIN [LEFT SEMI JOIN]
+|
+|--02:SELECT
+| | predicates: 1 = int_col
+| |
+| 01:SCAN HDFS [functional.alltypestiny]
+| partitions=4/4 files=4 size=460B
+| limit: 1
+|
+00:SCAN HDFS [functional.alltypessmall]
+ partitions=4/4 files=4 size=6.32KB
+====
+# Constant on LHS of NOT IN, subquery with limit
+select * from functional.alltypessmall where
+1 not in (select int_col from functional.alltypestiny limit 1)
+---- PLAN
+PLAN-ROOT SINK
+|
+03:NESTED LOOP JOIN [CROSS JOIN]
+|
+|--02:SELECT
+| | predicates: 1 != int_col
+| |
+| 01:SCAN HDFS [functional.alltypestiny]
+| partitions=4/4 files=4 size=460B
+| limit: 1
+|
+00:SCAN HDFS [functional.alltypessmall]
+ partitions=4/4 files=4 size=6.32KB
+====
+# Constant on LHS of IN for nested subqueries (no correlation)
+select * from functional.alltypes t where 1 in
+(select int_col from functional.tinyinttable where
+ 1 in (select int_col from functional.alltypestiny))
+---- PLAN
+PLAN-ROOT SINK
+|
+04:NESTED LOOP JOIN [LEFT SEMI JOIN]
+|
+|--03:NESTED LOOP JOIN [LEFT SEMI JOIN]
+| |
+| |--02:SCAN HDFS [functional.alltypestiny]
+| | partitions=4/4 files=4 size=460B
+| | predicates: 1 = functional.alltypestiny.int_col
+| |
+| 01:SCAN HDFS [functional.tinyinttable]
+| partitions=1/1 files=1 size=20B
+| predicates: 1 = functional.tinyinttable.int_col
+|
+00:SCAN HDFS [functional.alltypes t]
+ partitions=24/24 files=24 size=478.45KB
+====
+# Constant on LHS of IN for nested subqueries (correlation)
+select * from functional.alltypes t where 1 in
+(select int_col from functional.alltypessmall t where
+ bigint_col in (select bigint_col from functional.alltypestiny where id = t.id))
+---- PLAN
+PLAN-ROOT SINK
+|
+04:NESTED LOOP JOIN [LEFT SEMI JOIN]
+|
+|--03:HASH JOIN [LEFT SEMI JOIN]
+| | hash predicates: bigint_col = bigint_col, t.id = id
+| | runtime filters: RF000 <- bigint_col, RF001 <- id
+| |
+| |--02:SCAN HDFS [functional.alltypestiny]
+| | partitions=4/4 files=4 size=460B
+| |
+| 01:SCAN HDFS [functional.alltypessmall t]
+| partitions=4/4 files=4 size=6.32KB
+| predicates: 1 = t.int_col
+| runtime filters: RF000 -> bigint_col, RF001 -> t.id
+|
+00:SCAN HDFS [functional.alltypes t]
+ partitions=24/24 files=24 size=478.45KB
+====
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/impala/blob/633dbff7/testdata/workloads/functional-query/queries/QueryTest/subquery-in-constant-lhs.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/subquery-in-constant-lhs.test b/testdata/workloads/functional-query/queries/QueryTest/subquery-in-constant-lhs.test
new file mode 100644
index 0000000..df9303b
--- /dev/null
+++ b/testdata/workloads/functional-query/queries/QueryTest/subquery-in-constant-lhs.test
@@ -0,0 +1,296 @@
+====
+---- QUERY
+# Constant on LHS of [NOT] IN. Basics to test:
+# LHS constant: [not-null, null]
+# Predicate : [IN, NOT IN]
+# RHS : [NULL, Empty, Non-empty: has match, has no match/no null,
+# has no match/null, (un)correlated]
+# LHS not-null, Predicate IN, RHS NULL. Expect no results.
+SELECT a.id, a.int_col, a.string_col FROM alltypessmall a
+WHERE 1 IN (SELECT null);
+---- RESULTS
+---- TYPES
+INT, INT, STRING
+====
+---- QUERY
+# LHS null, Predicate IN, RHS NULL. Expect no results.
+SELECT a.id, a.int_col, a.string_col FROM alltypessmall a
+WHERE NULL IN (SELECT null);
+---- RESULTS
+---- TYPES
+INT, INT, STRING
+====
+---- QUERY
+# LHS not-null, Predicate NOT IN, RHS NULL. Expect no results.
+SELECT a.id, a.int_col, a.string_col FROM alltypessmall a
+WHERE NULL NOT IN (SELECT null);
+---- RESULTS
+---- TYPES
+INT, INT, STRING
+====
+---- QUERY
+# LHS not-null, Predicate IN, RHS NULL. Expect no results.
+SELECT a.id, a.int_col, a.string_col FROM alltypessmall a
+WHERE 1 IN (SELECT null);
+---- RESULTS
+---- TYPES
+INT, INT, STRING
+====
+---- QUERY
+# LHS not-null, Predicate IN, RHS empty. Expect no results.
+SELECT a.id, a.int_col, a.string_col FROM alltypessmall a
+WHERE 1 IN (SELECT int_col from alltypessmall where int_col < 0);
+---- RESULTS
+---- TYPES
+INT, INT, STRING
+====
+---- QUERY
+# LHS null, Predicate IN, RHS empty. Expect no results.
+SELECT a.id, a.int_col, a.string_col FROM alltypessmall a
+WHERE NULL IN (SELECT int_col from alltypessmall where int_col < 0);
+---- RESULTS
+---- TYPES
+INT, INT, STRING
+====
+---- QUERY
+# LHS not-null, Predicate NOT IN, RHS has no results. Expect no results.
+SELECT a.id, a.int_col, a.string_col FROM alltypessmall a
+WHERE 1 NOT IN (SELECT id FROM alltypestiny WHERE bool_col = false) and a.id < 5;
+---- RESULTS
+---- TYPES
+INT, INT, STRING
+====
+---- QUERY
+# LHS not-null, Predicate NOT IN, RHS empty. Expect all results.
+SELECT count(*) FROM alltypessmall a
+WHERE 1 NOT IN (SELECT int_col from alltypessmall where int_col < 0);
+---- RESULTS
+100
+---- TYPES
+BIGINT
+====
+---- QUERY
+# LHS null, Predicate NOT IN, RHS empty. Expect all results.
+SELECT count(*) FROM alltypessmall a
+WHERE NULL NOT IN (SELECT int_col from alltypessmall where int_col < 0);
+---- RESULTS
+100
+---- TYPES
+BIGINT
+====
+---- QUERY
+# LHS not-null, Predicate IN, RHS not-empty with match. Uncorrelated. Expect all results.
+SELECT count(*) FROM alltypessmall a
+WHERE 1 IN (SELECT int_col FROM alltypestiny);
+---- RESULTS
+100
+---- TYPES
+BIGINT
+====
+---- QUERY
+# LHS not-null, Predicate IN, RHS not empty/correlated. Expect results.
+SELECT a.id, a.int_col, a.string_col FROM alltypessmall a
+WHERE 1 IN (SELECT id FROM alltypestiny WHERE bool_col = false AND id = a.id)
+---- RESULTS
+1,1,'1'
+---- TYPES
+INT, INT, STRING
+====
+---- QUERY
+# LHS not-null, Predicate IN, RHS has matches. Expect all
+# records with id < 5 are returned since subquery is always true.
+SELECT a.id, a.int_col, a.string_col FROM alltypessmall a
+WHERE 1 IN (SELECT id FROM alltypestiny WHERE bool_col = false) and a.id < 5
+---- RESULTS
+0,0,'0'
+1,1,'1'
+2,2,'2'
+3,3,'3'
+4,4,'4'
+---- TYPES
+INT, INT, STRING
+====
+---- QUERY
+# LHS null, Predicate IN, RHS not-empty with match. Uncorrelated. Expect no results.
+SELECT count(*) FROM alltypessmall a
+WHERE NULL IN (SELECT d FROM nulltable);
+---- RESULTS
+0
+---- TYPES
+BIGINT
+====
+---- QUERY
+# LHS not-null, Predicate IN, RHS not-empty with match. Correlated. Expect 4 results,
+# where int_col = 1.
+SELECT count(*) FROM alltypessmall a
+WHERE 1 IN (SELECT int_col FROM alltypestiny b where b.id = a.id);
+---- RESULTS
+4
+---- TYPES
+BIGINT
+====
+====
+---- QUERY
+# LHS null, Predicate IN, RHS not-empty with match. Correlated. Expect no results,
+# where int_col = 1.
+SELECT count(*) FROM alltypessmall a
+WHERE NULL IN (SELECT NULL FROM alltypestiny b where b.id = a.id);
+---- RESULTS
+0
+---- TYPES
+BIGINT
+====
+---- QUERY
+# LHS not-null, Predicate IN, RHS not-empty with no match. Uncorrelated.
+# Expect no results.
+SELECT a.id, a.int_col, a.string_col FROM alltypessmall a
+WHERE 1 IN (SELECT int_col FROM alltypestiny where int_col != 1);
+---- RESULTS
+---- TYPES
+INT, INT, STRING
+====
+---- QUERY
+# LHS not-null, Predicate IN, RHS not-empty with no match, but with null. Uncorrelated.
+# Expect no results.
+SELECT a.id, a.int_col, a.string_col FROM alltypessmall a
+WHERE 1 IN (SELECT CASE WHEN id = 1 THEN NULL ELSE ID END FROM alltypestiny);
+---- RESULTS
+---- TYPES
+INT, INT, STRING
+====
+---- QUERY
+# LHS not-null, Predicate NOT IN, RHS not-empty with match. Uncorrelated.
+# Expect no results.
+SELECT a.id, a.int_col, a.string_col FROM alltypessmall a
+WHERE 1 NOT IN (SELECT int_col FROM alltypestiny);
+---- RESULTS
+---- TYPES
+INT, INT, STRING
+====
+---- QUERY
+# LHS null, Predicate NOT IN, RHS not-empty with match. Uncorrelated. Expect no results.
+SELECT a.id, a.int_col, a.string_col FROM alltypessmall a
+WHERE NULL NOT IN (SELECT NULL FROM alltypestiny);
+---- RESULTS
+---- TYPES
+INT, INT, STRING
+====
+----QUERY
+# LHS not-null, Predicate NOT IN, RHS not-empty with no match. Uncorrelated.
+# Expect all results.
+SELECT count(*) FROM alltypessmall a
+WHERE 1 NOT IN (SELECT int_col FROM alltypestiny where int_col != 1);
+---- RESULTS
+100
+---- TYPES
+BIGINT
+====
+----QUERY
+# LHS null, Predicate NOT IN, RHS not-empty with no match. Uncorrelated.
+# Expect no results.
+SELECT count(*) FROM alltypessmall a
+WHERE NULL NOT IN (SELECT int_col FROM alltypestiny where int_col != 1);
+---- RESULTS
+0
+---- TYPES
+BIGINT
+====
+---- QUERY
+# LHS not-null, Predicate NOT IN, RHS not-empty with no match, but null. Uncorrelated.
+# Expect no results.
+SELECT a.id, a.int_col, a.string_col FROM alltypessmall a
+WHERE 1 NOT IN (SELECT CASE WHEN id = 1 then null ELSE id END FROM alltypestiny);
+---- RESULTS
+---- TYPES
+INT, INT, STRING
+====
+---- QUERY
+# LHS null, Predicate NOT IN, RHS not-empty with no match, but null. Uncorrelated.
+# Expect no results.
+SELECT a.id, a.int_col, a.string_col FROM alltypessmall a
+WHERE NULL NOT IN (SELECT CASE WHEN id = 1 then null ELSE id END FROM alltypestiny);
+---- RESULTS
+---- TYPES
+INT, INT, STRING
+====
+---- QUERY
+# LHS is non-null, Predicate is IN, RHS is an aggregate.
+SELECT count(*) FROM alltypessmall a WHERE 0 IN (SELECT MIN(int_col) from tinyinttable);
+---- RESULTS
+100
+---- TYPES
+BIGINT
+====
+---- QUERY
+# LHS is non-null, Predicate is NOT IN, RHS is an aggregate.
+SELECT a.id FROM alltypessmall a WHERE 0 NOT IN (SELECT MIN(int_col) from tinyinttable);
+---- RESULTS
+---- TYPES
+INT
+====
+---- QUERY
+# LHS is not-null, Predicate is IN, RHS is group by without aggregation. Expect all results.
+SELECT count(*) from alltypessmall a where "a" IN (SELECT a from nulltable group by a);
+---- RESULTS
+100
+---- TYPES
+BIGINT
+====
+---- QUERY
+# LHS is null, Predicate is IN, RHS is group by without aggregation. Expect no results.
+SELECT a.id from alltypessmall a where NULL IN (SELECT d from nulltable group by d);
+---- RESULTS
+---- TYPES
+INT
+====
+---- QUERY
+# LHS is null, Predicate is NOT IN, RHS is group by without aggregation. Expect no results.
+SELECT a.id from alltypessmall a where NULL NOT IN (SELECT d from nulltable group by d);
+---- RESULTS
+---- TYPES
+INT
+====
+---- QUERY
+# LHS is non-null, Predicate is IN, RHS is select list is "*".
+SELECT a.id FROM alltypessmall a WHERE 1 IN (SELECT * FROM tinyinttable) and a.id < 3;
+---- RESULTS
+0
+1
+2
+---- TYPES
+INT
+====
+---- QUERY
+# LHS is non-null, Predicate is NOT IN, RHS is select list is "*".
+SELECT a.id FROM alltypessmall a WHERE 1 NOT IN (SELECT * FROM tinyinttable);
+---- RESULTS
+---- TYPES
+INT
+====
+---- QUERY
+# LHS is non-null, Predicate is IN, RHS select list is "*" and table is an inline view.
+SELECT count(*) from alltypessmall a WHERE 1 IN (SELECT * FROM
+(SELECT int_col from alltypessmall) b);
+---- RESULTS
+100
+---- TYPES
+BIGINT
+====
+---- QUERY
+# LHS is non-null, Predicate is NOT IN, RHS is select list is "*" and table is an
+# inline view.
+SELECT count(*) from alltypessmall a WHERE 1 NOT IN (SELECT * FROM
+(SELECT int_col from alltypessmall) b);
+---- RESULTS
+0
+---- TYPES
+BIGINT
+====
+---- QUERY
+# LHS is non-null, Predicate is IN, RHS includes nested subqueries.
+SELECT a.id FROM alltypessmall a WHERE
+1 IN (SELECT int_col FROM alltypessmall WHERE -10000 IN (SELECT * FROM tinyinttable));
+---- RESULTS
+---- TYPES
+INT
+====
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/impala/blob/633dbff7/tests/query_test/test_queries.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_queries.py b/tests/query_test/test_queries.py
index 47921ac..3caeebe 100644
--- a/tests/query_test/test_queries.py
+++ b/tests/query_test/test_queries.py
@@ -110,6 +110,9 @@ class TestQueries(ImpalaTestSuite):
def test_subquery(self, vector):
self.run_test_case('QueryTest/subquery', vector)
+ def test_subquery_in_constant_lhs(self, vector):
+ self.run_test_case('QueryTest/subquery-in-constant-lhs', vector)
+
def test_empty(self, vector):
self.run_test_case('QueryTest/empty', vector)