You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2017/01/10 20:53:32 UTC
[9/9] hive git commit: HIVE-15481 : Support multiple and nested
subqueries (Vineet Garg via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan
HIVE-15481 : Support multiple and nested subqueries (Vineet Garg via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b0ed8241
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b0ed8241
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b0ed8241
Branch: refs/heads/master
Commit: b0ed8241acecb11da8f53be906d47861636fad24
Parents: 1749d70
Author: Vineet Garg <vg...@hortonworks.com>
Authored: Tue Dec 20 21:44:00 2016 -0800
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Tue Jan 10 12:52:44 2017 -0800
----------------------------------------------------------------------
data/files/part_tiny_nulls.txt | 26 +
.../test/resources/testconfiguration.properties | 4 +-
.../calcite/HiveSubQRemoveRelBuilder.java | 2 +-
.../calcite/reloperators/HiveFilter.java | 6 +
.../calcite/rules/HiveSubQueryRemoveRule.java | 8 +-
.../hadoop/hive/ql/parse/CalcitePlanner.java | 58 +-
.../apache/hadoop/hive/ql/parse/QBSubQuery.java | 111 +-
.../clientnegative/subquery_restrictions.q | 92 -
.../subquery_subquery_chain_exists.q | 4 +
.../test/queries/clientpositive/perf/query16.q | 30 +
.../test/queries/clientpositive/perf/query23.q | 48 +
.../test/queries/clientpositive/perf/query33.q | 73 +
.../test/queries/clientpositive/perf/query45.q | 1 -
.../test/queries/clientpositive/perf/query56.q | 65 +
.../test/queries/clientpositive/perf/query60.q | 77 +
.../test/queries/clientpositive/perf/query69.q | 46 +
.../test/queries/clientpositive/perf/query83.q | 65 +
.../queries/clientpositive/subquery_exists.q | 18 +
.../test/queries/clientpositive/subquery_in.q | 6 +-
.../queries/clientpositive/subquery_in_having.q | 39 +
.../queries/clientpositive/subquery_multi.q | 111 +
.../queries/clientpositive/subquery_notexists.q | 19 +
.../queries/clientpositive/subquery_notin.q | 17 +
.../subquery_corr_grandparent.q.out | 2 +-
.../clientnegative/subquery_restrictions.q.out | 1 -
.../subquery_subquery_chain_exists.q.out | 1 +
.../clientpositive/llap/explainuser_1.q.out | 16 +-
.../results/clientpositive/llap/lineage3.q.out | 2 +-
.../clientpositive/llap/subquery_exists.q.out | 630 ++
.../clientpositive/llap/subquery_in.q.out | 273 +
.../clientpositive/llap/subquery_multi.q.out | 5610 ++++++++++++++++++
.../clientpositive/llap/subquery_notin.q.out | 971 ++-
.../llap/vector_groupby_mapjoin.q.out | 8 +-
.../results/clientpositive/perf/query16.q.out | 330 ++
.../results/clientpositive/perf/query23.q.out | 383 ++
.../results/clientpositive/perf/query33.q.out | 437 ++
.../results/clientpositive/perf/query56.q.out | 421 ++
.../results/clientpositive/perf/query60.q.out | 443 ++
.../results/clientpositive/perf/query69.q.out | 591 ++
.../results/clientpositive/perf/query83.q.out | 440 ++
.../clientpositive/spark/subquery_exists.q.out | 623 ++
.../clientpositive/spark/subquery_in.q.out | 254 +
.../clientpositive/subquery_exists.q.out | 625 ++
.../clientpositive/subquery_in_having.q.out | 2140 ++++++-
.../clientpositive/subquery_notexists.q.out | 213 +
.../clientpositive/subquery_notin_having.q.out | 18 +-
.../clientpositive/vector_groupby_mapjoin.q.out | 18 +-
47 files changed, 15036 insertions(+), 340 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/data/files/part_tiny_nulls.txt
----------------------------------------------------------------------
diff --git a/data/files/part_tiny_nulls.txt b/data/files/part_tiny_nulls.txt
new file mode 100644
index 0000000..c2b80ff
--- /dev/null
+++ b/data/files/part_tiny_nulls.txt
@@ -0,0 +1,26 @@
+121152,almond antique burnished rose metallic,Manufacturer#1,Brand#14,PROMO PLATED TIN,2,JUMBO BOX,1173.15,e pinto beans h
+121152,almond antique burnished rose metallic,Manufacturer#1,Brand#14,PROMO PLATED TIN,2,JUMBO BOX,1173.15,e pinto beans h
+85768,almond antique chartreuse lavender yellow,Manufacturer#1,Brand#12,LARGE BRUSHED STEEL,34,SM BAG,1753.76,refull
+110592,almond antique salmon chartreuse burlywood,Manufacturer#1,Brand#15,PROMO BURNISHED NICKEL,6,JUMBO PKG,1602.59, to the furiously
+86428,almond aquamarine burnished black steel,Manufacturer#1,Brand#12,STANDARD ANODIZED STEEL,28,WRAP BAG,1414.42,arefully
+65667,almond aquamarine pink moccasin thistle,Manufacturer#1,Brand#12,LARGE BURNISHED STEEL,42,JUMBO CASE,1632.66,e across the expr
+105685,almond antique violet chocolate turquoise,Manufacturer#2,Brand#22,MEDIUM ANODIZED COPPER,14,MED CAN,1690.68,ly pending requ
+191709,almond antique violet turquoise frosted,Manufacturer#2,Brand#22,ECONOMY POLISHED STEEL,40,MED BOX,1800.7, haggle
+146985,almond aquamarine midnight light salmon,Manufacturer#2,Brand#23,MEDIUM BURNISHED COPPER,2,SM CASE,2031.98,s cajole caref
+132666,almond aquamarine rose maroon antique,Manufacturer#2,Brand#24,SMALL POLISHED NICKEL,25,MED BOX,1698.66,even
+195606,almond aquamarine sandy cyan gainsboro,Manufacturer#2,Brand#25,STANDARD PLATED TIN,18,SM PKG,1701.6,ic de
+90681,almond antique chartreuse khaki white,Manufacturer#3,Brand#31,MEDIUM BURNISHED TIN,17,SM CASE,1671.68,are slyly after the sl
+17273,almond antique forest lavender goldenrod,Manufacturer#3,Brand#35,PROMO ANODIZED TIN,14,JUMBO CASE,1190.27,along the
+112398,almond antique metallic orange dim,Manufacturer#3,Brand#32,MEDIUM BURNISHED BRASS,19,JUMBO JAR,1410.39,ole car
+40982,almond antique misty red olive,Manufacturer#3,Brand#32,ECONOMY PLATED COPPER,1,LG PKG,1922.98,c foxes can s
+144293,almond antique olive coral navajo,Manufacturer#3,Brand#34,STANDARD POLISHED STEEL,45,JUMBO CAN,1337.29,ag furiously about
+49671,almond antique gainsboro frosted violet,Manufacturer#4,Brand#41,SMALL BRUSHED BRASS,10,SM BOX,1620.67,ccounts run quick
+48427,almond antique violet mint lemon,Manufacturer#4,Brand#42,PROMO POLISHED STEEL,39,SM CASE,1375.42,hely ironic i
+45261,almond aquamarine floral ivory bisque,Manufacturer#4,Brand#42,SMALL PLATED STEEL,27,WRAP CASE,1206.26,careful
+17927,almond aquamarine yellow dodger mint,Manufacturer#4,Brand#41,ECONOMY BRUSHED COPPER,7,SM PKG,1844.92,ites. eve
+33357,almond azure aquamarine papaya violet,Manufacturer#4,Brand#41,STANDARD ANODIZED TIN,12,WRAP CASE,1290.35,reful
+192697,almond antique blue firebrick mint,Manufacturer#5,Brand#52,MEDIUM BURNISHED TIN,31,LG DRUM,1789.69,ickly ir
+42669,almond antique medium spring khaki,Manufacturer#5,Brand#51,STANDARD BURNISHED TIN,6,MED CAN,1611.66,sits haggl
+155733,almond antique sky peru orange,Manufacturer#5,Brand#53,SMALL PLATED BRASS,2,WRAP DRUM,1788.73,furiously. bra
+15103,almond aquamarine dodger light gainsboro,Manufacturer#5,Brand#53,ECONOMY BURNISHED STEEL,46,LG PACK,1018.1,packages hinder carefu
+78486,almond azure blanched chiffon midnight,Manufacturer#5,Brand#52,LARGE BRUSHED BRASS,23,MED BAG,1464.48,hely blith
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 70e7197..be5a747 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -224,7 +224,6 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\
stats_only_null.q,\
subquery_exists.q,\
subquery_in.q,\
- subquery_restrictions,\
temp_table.q,\
tez_bmj_schema_evolution.q,\
tez_dml.q,\
@@ -647,7 +646,7 @@ minillaplocal.query.files=acid_globallimit.q,\
offset_limit_ppd_optimizer.q,\
cluster.q,\
subquery_in.q,\
- subquery_restrictions,\
+ subquery_multi.q,\
stats11.q,\
orc_create.q,\
orc_split_elimination.q,\
@@ -1327,7 +1326,6 @@ spark.query.files=add_part_multiple.q, \
statsfs.q, \
subquery_exists.q, \
subquery_in.q, \
- subquery_restrictions, \
subquery_multiinsert.q, \
table_access_keys_stats.q, \
temp_table.q, \
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java
index e028a99..e4f3057 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java
@@ -148,7 +148,7 @@ public class HiveSubQRemoveRelBuilder {
RelFactories.DEFAULT_AGGREGATE_FACTORY);
this.filterFactory =
Util.first(context.unwrap(RelFactories.FilterFactory.class),
- RelFactories.DEFAULT_FILTER_FACTORY);
+ HiveRelFactories.HIVE_FILTER_FACTORY);
this.projectFactory =
Util.first(context.unwrap(RelFactories.ProjectFactory.class),
RelFactories.DEFAULT_PROJECT_FACTORY);
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java
index ce207da..b7b16b8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java
@@ -103,6 +103,12 @@ public class HiveFilter extends Filter implements HiveRelNode {
return allCorrVars;
}
+ public Set<CorrelationId> getVariablesSet(RexSubQuery e) {
+ Set<CorrelationId> allCorrVars = new HashSet<>();
+ traverseFilter(e, allCorrVars);
+ return allCorrVars;
+ }
+
public RelNode accept(RelShuttle shuttle) {
if (shuttle instanceof HiveRelShuttle) {
return ((HiveRelShuttle)shuttle).visit(this);
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java
index 5f58aae..f1e8ebd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java
@@ -48,6 +48,7 @@ import java.util.Set;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveSubQRemoveRelBuilder;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter;
/**
* NOTE: this rule is replicated from Calcite's SubqueryRemoveRule
@@ -74,16 +75,21 @@ public abstract class HiveSubQueryRemoveRule extends RelOptRule{
public void onMatch(RelOptRuleCall call) {
final Filter filter = call.rel(0);
//final RelBuilder builder = call.builder();
+ //TODO: replace HiveSubQRemoveRelBuilder with calcite's once calcite 1.11.0 is released
final HiveSubQRemoveRelBuilder builder = new HiveSubQRemoveRelBuilder(null, call.rel(0).getCluster(), null);
final RexSubQuery e =
RexUtil.SubQueryFinder.find(filter.getCondition());
assert e != null;
+
final RelOptUtil.Logic logic =
LogicVisitor.find(RelOptUtil.Logic.TRUE,
ImmutableList.of(filter.getCondition()), e);
builder.push(filter.getInput());
final int fieldCount = builder.peek().getRowType().getFieldCount();
- final RexNode target = apply(e, filter.getVariablesSet(), logic,
+
+ assert(filter instanceof HiveFilter);
+
+ final RexNode target = apply(e, ((HiveFilter)filter).getVariablesSet(e), logic,
builder, 1, fieldCount);
final RexShuttle shuttle = new ReplaceSubQueryShuttle(e, target);
builder.filter(shuttle.apply(filter.getCondition()));
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index fdb468d..cc357c5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -111,7 +111,6 @@ import org.apache.calcite.util.CompositeList;
import org.apache.calcite.util.ImmutableBitSet;
import org.apache.calcite.util.ImmutableIntList;
import org.apache.calcite.util.Pair;
-import org.apache.commons.lang.mutable.MutableBoolean;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.Constants;
import org.apache.hadoop.hive.conf.HiveConf;
@@ -246,7 +245,6 @@ import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableList.Builder;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
-import com.google.common.math.IntMath;
public class CalcitePlanner extends SemanticAnalyzer {
@@ -1030,7 +1028,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
RelNode calciteGenPlan = null;
RelNode calcitePreCboPlan = null;
RelNode calciteOptimizedPlan = null;
- subqueryId = -1;
+ subqueryId = 0;
/*
* recreate cluster, so that it picks up the additional traitDef
@@ -2076,40 +2074,23 @@ public class CalcitePlanner extends SemanticAnalyzer {
return filterRel;
}
- private void subqueryRestritionCheck(QB qb, ASTNode searchCond, RelNode srcRel,
+ private void subqueryRestrictionCheck(QB qb, ASTNode searchCond, RelNode srcRel,
boolean forHavingClause, Map<String, RelNode> aliasToRel ) throws SemanticException {
List<ASTNode> subQueriesInOriginalTree = SubQueryUtils.findSubQueries(searchCond);
- if (subQueriesInOriginalTree.size() > 0) {
-
- /*
- * Restriction.9.m :: disallow nested SubQuery expressions.
- */
- if (qb.getSubQueryPredicateDef() != null) {
- throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(
- subQueriesInOriginalTree.get(0),
- "Nested SubQuery expressions are not supported."));
- }
-
- /*
- * Restriction.8.m :: We allow only 1 SubQuery expression per Query.
- */
- if (subQueriesInOriginalTree.size() > 1) {
-
- throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(
- subQueriesInOriginalTree.get(1), "Only 1 SubQuery expression is supported."));
- }
+ ASTNode clonedSearchCond = (ASTNode) SubQueryUtils.adaptor.dupTree(searchCond);
+ List<ASTNode> subQueries = SubQueryUtils.findSubQueries(clonedSearchCond);
+ for(int i=0; i<subQueriesInOriginalTree.size(); i++){
//we do not care about the transformation or rewriting of AST
// which following statement does
// we only care about the restriction checks they perform.
// We plan to get rid of these restrictions later
int sqIdx = qb.incrNumSubQueryPredicates();
- ASTNode originalSubQueryAST = subQueriesInOriginalTree.get(0);
+ ASTNode originalSubQueryAST = subQueriesInOriginalTree.get(i);
- ASTNode clonedSearchCond = (ASTNode) SubQueryUtils.adaptor.dupTree(searchCond);
- List<ASTNode> subQueries = SubQueryUtils.findSubQueries(clonedSearchCond);
- ASTNode subQueryAST = subQueries.get(0);
- clonedSearchCond = SubQueryUtils.rewriteParentQueryWhere(clonedSearchCond, subQueryAST);
+ ASTNode subQueryAST = subQueries.get(i);
+
+ SubQueryUtils.rewriteParentQueryWhere(clonedSearchCond, subQueryAST);
QBSubQuery subQuery = SubQueryUtils.buildSubQuery(qb.getId(), sqIdx, subQueryAST,
originalSubQueryAST, ctx);
@@ -2123,19 +2104,15 @@ public class CalcitePlanner extends SemanticAnalyzer {
aliasToRel.put(havingInputAlias, srcRel);
}
- subQuery.validateAndRewriteAST(inputRR, forHavingClause, havingInputAlias,
- aliasToRel.keySet());
-
- // Missing Check: Check.5.h :: For In and Not In the SubQuery must implicitly or
- // explicitly only contain one select item.
- }
+ subQuery.subqueryRestrictionsCheck(inputRR, forHavingClause, havingInputAlias);
+ }
}
private boolean genSubQueryRelNode(QB qb, ASTNode node, RelNode srcRel, boolean forHavingClause,
Map<ASTNode, RelNode> subQueryToRelNode,
Map<String, RelNode> aliasToRel) throws SemanticException {
//disallow subqueries which HIVE doesn't currently support
- subqueryRestritionCheck(qb, node, srcRel, forHavingClause, aliasToRel);
+ subqueryRestrictionCheck(qb, node, srcRel, forHavingClause, aliasToRel);
Deque<ASTNode> stack = new ArrayDeque<ASTNode>();
stack.push(node);
@@ -2146,12 +2123,20 @@ public class CalcitePlanner extends SemanticAnalyzer {
switch(next.getType()) {
case HiveParser.TOK_SUBQUERY_EXPR:
+ /*
+ * Restriction 2.h Subquery isnot allowed in LHS
+ */
+ if(next.getChildren().size() == 3
+ && next.getChild(2).getType() == HiveParser.TOK_SUBQUERY_EXPR){
+ throw new CalciteSemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(
+ next.getChild(2),
+ "SubQuery in LHS expressions are not supported."));
+ }
String sbQueryAlias = "sq_" + qb.incrNumSubQueryPredicates();
QB qbSQ = new QB(qb.getId(), sbQueryAlias, true);
Phase1Ctx ctx1 = initPhase1Ctx();
doPhase1((ASTNode)next.getChild(1), qbSQ, ctx1, null);
getMetaData(qbSQ);
- subqueryId++;
RelNode subQueryRelNode = genLogicalPlan(qbSQ, false, relToHiveColNameCalcitePosMap.get(srcRel),
relToHiveRR.get(srcRel));
subQueryToRelNode.put(next, subQueryRelNode);
@@ -2189,6 +2174,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
this.relToHiveColNameCalcitePosMap.put(filterRel, this.relToHiveColNameCalcitePosMap
.get(srcRel));
relToHiveRR.put(filterRel, relToHiveRR.get(srcRel));
+ this.subqueryId++;
// semi-join opt doesn't work with subqueries
conf.setBoolVar(ConfVars.SEMIJOIN_CONVERSION, false);
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java
index cfac6c0..24381b9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java
@@ -503,6 +503,106 @@ public class QBSubQuery implements ISubQueryJoinInfo {
originalSQASTOrigin.getUsageNode());
}
+ void subqueryRestrictionsCheck(RowResolver parentQueryRR,
+ boolean forHavingClause,
+ String outerQueryAlias)
+ throws SemanticException {
+ ASTNode insertClause = getChildFromSubqueryAST("Insert", HiveParser.TOK_INSERT);
+
+ ASTNode selectClause = (ASTNode) insertClause.getChild(1);
+
+
+ int selectExprStart = 0;
+ if ( selectClause.getChild(0).getType() == HiveParser.TOK_HINTLIST ) {
+ selectExprStart = 1;
+ }
+
+ /*
+ * Check.5.h :: For In and Not In the SubQuery must implicitly or
+ * explicitly only contain one select item.
+ */
+ if ( operator.getType() != SubQueryType.EXISTS &&
+ operator.getType() != SubQueryType.NOT_EXISTS &&
+ selectClause.getChildCount() - selectExprStart > 1 ) {
+ subQueryAST.setOrigin(originalSQASTOrigin);
+ throw new SemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg(
+ subQueryAST, "SubQuery can contain only 1 item in Select List."));
+ }
+
+ boolean hasAggreateExprs = false;
+ boolean hasWindowing = false;
+ for(int i= selectExprStart; i < selectClause.getChildCount(); i++ ) {
+
+ ASTNode selectItem = (ASTNode) selectClause.getChild(i);
+ int r = SubQueryUtils.checkAggOrWindowing(selectItem);
+
+ hasWindowing = hasWindowing | ( r == 2);
+ hasAggreateExprs = hasAggreateExprs | ( r == 1 );
+ }
+
+ /*
+ * Restriction.13.m :: In the case of an implied Group By on a
+ * correlated SubQuery, the SubQuery always returns 1 row.
+ * An exists on a SubQuery with an implied GBy will always return true.
+ * Whereas Algebraically transforming to a Join may not return true. See
+ * Specification doc for details.
+ * Similarly a not exists on a SubQuery with a implied GBY will always return false.
+ */
+ boolean noImplicityGby = true;
+ if ( insertClause.getChild(1).getChildCount() > 3 &&
+ insertClause.getChild(1).getChild(3).getType() == HiveParser.TOK_GROUPBY ) {
+ if((ASTNode) insertClause.getChild(1).getChild(3) != null){
+ noImplicityGby = false;
+ }
+ }
+ if ( operator.getType() == SubQueryType.EXISTS &&
+ hasAggreateExprs &&
+ noImplicityGby) {
+ throw new SemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg(
+ subQueryAST,
+ "An Exists predicate on SubQuery with implicit Aggregation(no Group By clause) " +
+ "cannot be rewritten. (predicate will always return true)."));
+ }
+ if ( operator.getType() == SubQueryType.NOT_EXISTS &&
+ hasAggreateExprs &&
+ noImplicityGby) {
+ throw new SemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg(
+ subQueryAST,
+ "A Not Exists predicate on SubQuery with implicit Aggregation(no Group By clause) " +
+ "cannot be rewritten. (predicate will always return false)."));
+ }
+
+ ASTNode whereClause = SubQueryUtils.subQueryWhere(insertClause);
+
+ if ( whereClause == null ) {
+ return;
+ }
+ ASTNode searchCond = (ASTNode) whereClause.getChild(0);
+ List<ASTNode> conjuncts = new ArrayList<ASTNode>();
+ SubQueryUtils.extractConjuncts(searchCond, conjuncts);
+
+ ConjunctAnalyzer conjunctAnalyzer = new ConjunctAnalyzer(parentQueryRR,
+ forHavingClause, outerQueryAlias);
+ ASTNode sqNewSearchCond = null;
+
+ boolean hasCorrelation = false;
+ for(ASTNode conjunctAST : conjuncts) {
+ Conjunct conjunct = conjunctAnalyzer.analyzeConjunct(conjunctAST);
+ if(conjunct.isCorrelated()){
+ hasCorrelation = true;
+ break;
+ }
+ }
+
+ /*
+ * Restriction.14.h :: Correlated Sub Queries cannot contain Windowing clauses.
+ */
+ if ( hasWindowing && hasCorrelation) {
+ throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(
+ subQueryAST, "Correlated Sub Queries cannot contain Windowing clauses."));
+ }
+ }
+
void validateAndRewriteAST(RowResolver outerQueryRR,
boolean forHavingClause,
String outerQueryAlias,
@@ -776,17 +876,6 @@ public class QBSubQuery implements ISubQueryJoinInfo {
Conjunct conjunct = conjunctAnalyzer.analyzeConjunct(conjunctAST);
/*
- * Restriction.11.m :: A SubQuery predicate that refers to an Outer
- * Query column must be a valid Join predicate.
- */
- if ( conjunct.eitherSideRefersBoth() ) {
- throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(
- conjunctAST,
- "SubQuery expression refers to both Parent and SubQuery expressions and " +
- "is not a valid join condition."));
- }
-
- /*
* Check.12.h :: SubQuery predicates cannot only refer to Outer Query columns.
*/
if ( conjunct.refersOuterOnly() ) {
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/queries/clientnegative/subquery_restrictions.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientnegative/subquery_restrictions.q b/ql/src/test/queries/clientnegative/subquery_restrictions.q
deleted file mode 100644
index 80870d7..0000000
--- a/ql/src/test/queries/clientnegative/subquery_restrictions.q
+++ /dev/null
@@ -1,92 +0,0 @@
---Restriction.1.h SubQueries only supported in the SQL Where Clause.
-select src.key in (select key from src s1 where s1.key > '9')
-from src;
-
-select count(*)
-from src
-group by src.key in (select key from src s1 where s1.key > '9') ;
-
---Restriction.2.h The subquery can only be the RHS of an expression
-----curently paser doesn't allow such queries
---select * from part where (select p_size from part) IN (1,2);
-
---Restriction.3.m The predicate operators supported are In, Not In, exists and Not exists.
-----select * from part where p_brand > (select key from src);
-
---Check.4.h For Exists and Not Exists, the Sub Query must have 1 or more correlated predicates.
-select * from src where exists (select * from part);
-
---Check.5.h multiple columns in subquery select
-select * from src where src.key in (select * from src s1 where s1.key > '9');
-
---Restriction.6.m The LHS in a SubQuery must have all its Column References be qualified
---This is not restriction anymore
-
---Restriction 7.h subquery with or condition
-select count(*)
-from src
-where src.key in (select key from src s1 where s1.key > '9') or src.value is not null
-;
-
---Restriction.8.m We allow only 1 SubQuery expression per Query
-select * from part where p_size IN (select p_size from part) AND p_brand IN (select p_brand from part);
-
---Restriction 9.m nested subquery
-select *
-from part x
-where x.p_name in (select y.p_name from part y where exists (select z.p_name from part z where y.p_name = z.p_name))
-;
-
---Restriction.10.h In a SubQuery references to Parent Query columns is only supported in the where clause.
-select * from part where p_size in (select p.p_size + part.p_size from part p);
-select * from part where part.p_size IN (select min(p_size) from part p group by part.p_type);
-
-
---Restriction.11.m A SubQuery predicate that refers to a Parent Query column must be a valid Join predicate
-select * from part where p_size in (select p_size from part p where p.p_type > part.p_type);
-select * from part where part.p_size IN (select min(p_size) from part p where NOT(part.p_type = p.p_type));
-
-
---Check.12.h SubQuery predicates cannot only refer to Parent Query columns
-select * from part where p_name IN (select p_name from part p where part.p_type <> 1);
-
---Restriction.13.m In the case of an implied Group By on a correlated Sub- Query, the SubQuery always returns 1 row. For e.g. a count on an empty set is 0, while all other UDAFs return null. Converting such a SubQuery into a Join by computing all Groups in one shot, changes the semantics: the Group By SubQuery output will not contain rows for Groups that don\u2019t exist.
-select *
-from src b
-where exists
- (select count(*)
- from src a
- where b.value = a.value and a.key = b.key and a.value > 'val_9'
- )
-;
-
---Restriction.14.h Correlated Sub Queries cannot contain Windowing clauses.
-select p_mfgr, p_name, p_size
-from part a
-where a.p_size in
- (select first_value(p_size) over(partition by p_mfgr order by p_size)
- from part b
- where a.p_brand = b.p_brand)
-;
-
---Restriction 15.h all unqualified column references in a SubQuery will resolve to table sources within the SubQuery.
-select *
-from src
-where src.key in (select key from src where key > '9')
-;
-
-----------------------------------------------------------------
--- Following tests does not fall under any restrictions per-se, they just currently don't work with HIVE
-----------------------------------------------------------------
-
--- correlated var which refers to outer query join table
-explain select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_partkey = p.l_partkey) ;
-
--- union, not in, corr
-explain select * from part where p_name NOT IN (select p_name from part p where p.p_mfgr = part.p_comment UNION ALL select p_brand from part);
-
--- union, not in, corr, cor var in both queries
-explain select * from part where p_name NOT IN (select p_name from part p where p.p_mfgr = part.p_comment UNION ALL select p_brand from part pp where pp.p_mfgr = part.p_comment);
-
--- IN, union, corr
-explain select * from part where p_name IN (select p_name from part p where p.p_mfgr = part.p_comment UNION ALL select p_brand from part);
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/queries/clientnegative/subquery_subquery_chain_exists.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientnegative/subquery_subquery_chain_exists.q b/ql/src/test/queries/clientnegative/subquery_subquery_chain_exists.q
new file mode 100644
index 0000000..0a771e2
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/subquery_subquery_chain_exists.q
@@ -0,0 +1,4 @@
+explain
+select *
+from src
+where (exists(select key from src)) in (select key from src);
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/queries/clientpositive/perf/query16.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/perf/query16.q b/ql/src/test/queries/clientpositive/perf/query16.q
new file mode 100644
index 0000000..0243bf3
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/perf/query16.q
@@ -0,0 +1,30 @@
+set hive.mapred.mode=nonstrict;
+
+explain select
+ count(distinct cs_order_number) as `order count`
+ ,sum(cs_ext_ship_cost) as `total shipping cost`
+ ,sum(cs_net_profit) as `total net profit`
+from
+ catalog_sales cs1
+ ,date_dim
+ ,customer_address
+ ,call_center
+where
+ d_date between '2001-4-01' and
+ (cast('2001-4-01' as date) + 60 days)
+and cs1.cs_ship_date_sk = d_date_sk
+and cs1.cs_ship_addr_sk = ca_address_sk
+and ca_state = 'NY'
+and cs1.cs_call_center_sk = cc_call_center_sk
+and cc_county in ('Ziebach County','Levy County','Huron County','Franklin Parish',
+ 'Daviess County'
+)
+and exists (select *
+ from catalog_sales cs2
+ where cs1.cs_order_number = cs2.cs_order_number
+ and cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk)
+and not exists(select *
+ from catalog_returns cr1
+ where cs1.cs_order_number = cr1.cr_order_number)
+order by `order count`
+limit 100;
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/queries/clientpositive/perf/query23.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/perf/query23.q b/ql/src/test/queries/clientpositive/perf/query23.q
new file mode 100644
index 0000000..e8ebd86
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/perf/query23.q
@@ -0,0 +1,48 @@
+set hive.mapred.mode=nonstrict;
+
+explain with frequent_ss_items as
+ (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt
+ from store_sales
+ ,date_dim
+ ,item
+ where ss_sold_date_sk = d_date_sk
+ and ss_item_sk = i_item_sk
+ and d_year in (1999,1999+1,1999+2,1999+3)
+ group by substr(i_item_desc,1,30),i_item_sk,d_date
+ having count(*) >4),
+ max_store_sales as
+ (select max(csales) tpcds_cmax
+ from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales
+ from store_sales
+ ,customer
+ ,date_dim
+ where ss_customer_sk = c_customer_sk
+ and ss_sold_date_sk = d_date_sk
+ and d_year in (1999,1999+1,1999+2,1999+3)
+ group by c_customer_sk) x),
+ best_ss_customer as
+ (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales
+ from store_sales
+ ,customer
+ where ss_customer_sk = c_customer_sk
+ group by c_customer_sk
+ having sum(ss_quantity*ss_sales_price) > (95/100.0))
+ select sum(sales)
+ from (select cs_quantity*cs_list_price sales
+ from catalog_sales
+ ,date_dim
+ where d_year = 1999
+ and d_moy = 1
+ and cs_sold_date_sk = d_date_sk
+ and cs_item_sk in (select item_sk from frequent_ss_items)
+ and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer)
+ union all
+ select ws_quantity*ws_list_price sales
+ from web_sales
+ ,date_dim
+ where d_year = 1999
+ and d_moy = 1
+ and ws_sold_date_sk = d_date_sk
+ and ws_item_sk in (select item_sk from frequent_ss_items)
+ and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer)) y
+ limit 100;
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/queries/clientpositive/perf/query33.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/perf/query33.q b/ql/src/test/queries/clientpositive/perf/query33.q
new file mode 100644
index 0000000..06628bd
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/perf/query33.q
@@ -0,0 +1,73 @@
+-- start query 1 in stream 0 using template query33.tpl and seed 1930872976
+explain with ss as (
+ select
+ i_manufact_id,sum(ss_ext_sales_price) total_sales
+ from
+ store_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_manufact_id in (select
+ i_manufact_id
+from
+ item
+where i_category in ('Books'))
+ and ss_item_sk = i_item_sk
+ and ss_sold_date_sk = d_date_sk
+ and d_year = 1999
+ and d_moy = 3
+ and ss_addr_sk = ca_address_sk
+ and ca_gmt_offset = -6
+ group by i_manufact_id),
+ cs as (
+ select
+ i_manufact_id,sum(cs_ext_sales_price) total_sales
+ from
+ catalog_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_manufact_id in (select
+ i_manufact_id
+from
+ item
+where i_category in ('Books'))
+ and cs_item_sk = i_item_sk
+ and cs_sold_date_sk = d_date_sk
+ and d_year = 1999
+ and d_moy = 3
+ and cs_bill_addr_sk = ca_address_sk
+ and ca_gmt_offset = -6
+ group by i_manufact_id),
+ ws as (
+ select
+ i_manufact_id,sum(ws_ext_sales_price) total_sales
+ from
+ web_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_manufact_id in (select
+ i_manufact_id
+from
+ item
+where i_category in ('Books'))
+ and ws_item_sk = i_item_sk
+ and ws_sold_date_sk = d_date_sk
+ and d_year = 1999
+ and d_moy = 3
+ and ws_bill_addr_sk = ca_address_sk
+ and ca_gmt_offset = -6
+ group by i_manufact_id)
+ select i_manufact_id ,sum(total_sales) total_sales
+ from (select * from ss
+ union all
+ select * from cs
+ union all
+ select * from ws) tmp1
+ group by i_manufact_id
+ order by total_sales
+limit 100;
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/queries/clientpositive/perf/query45.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/perf/query45.q b/ql/src/test/queries/clientpositive/perf/query45.q
deleted file mode 100644
index 0b34bfd..0000000
--- a/ql/src/test/queries/clientpositive/perf/query45.q
+++ /dev/null
@@ -1 +0,0 @@
-explain select ca_zip, ca_county, sum(ws_sales_price) from web_sales JOIN customer ON web_sales.ws_bill_customer_sk = customer.c_customer_sk JOIN customer_address ON customer.c_current_addr_sk = customer_address.ca_address_sk JOIN date_dim ON web_sales.ws_sold_date_sk = date_dim.d_date_sk JOIN item ON web_sales.ws_item_sk = item.i_item_sk where ( item.i_item_id in (select i_item_id from item i2 where i2.i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) ) ) and d_qoy = 2 and d_year = 2000 group by ca_zip, ca_county order by ca_zip, ca_county limit 100;
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/queries/clientpositive/perf/query56.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/perf/query56.q b/ql/src/test/queries/clientpositive/perf/query56.q
new file mode 100644
index 0000000..63e53ea
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/perf/query56.q
@@ -0,0 +1,65 @@
+explain with ss as (
+ select i_item_id,sum(ss_ext_sales_price) total_sales
+ from
+ store_sales,
+ date_dim,
+ customer_address,
+ item
+ where i_item_id in (select
+ i_item_id
+from item
+where i_color in ('orchid','chiffon','lace'))
+ and ss_item_sk = i_item_sk
+ and ss_sold_date_sk = d_date_sk
+ and d_year = 2000
+ and d_moy = 1
+ and ss_addr_sk = ca_address_sk
+ and ca_gmt_offset = -8
+ group by i_item_id),
+ cs as (
+ select i_item_id,sum(cs_ext_sales_price) total_sales
+ from
+ catalog_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_item_id in (select
+ i_item_id
+from item
+where i_color in ('orchid','chiffon','lace'))
+ and cs_item_sk = i_item_sk
+ and cs_sold_date_sk = d_date_sk
+ and d_year = 2000
+ and d_moy = 1
+ and cs_bill_addr_sk = ca_address_sk
+ and ca_gmt_offset = -8
+ group by i_item_id),
+ ws as (
+ select i_item_id,sum(ws_ext_sales_price) total_sales
+ from
+ web_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_item_id in (select
+ i_item_id
+from item
+where i_color in ('orchid','chiffon','lace'))
+ and ws_item_sk = i_item_sk
+ and ws_sold_date_sk = d_date_sk
+ and d_year = 2000
+ and d_moy = 1
+ and ws_bill_addr_sk = ca_address_sk
+ and ca_gmt_offset = -8
+ group by i_item_id)
+ select i_item_id ,sum(total_sales) total_sales
+ from (select * from ss
+ union all
+ select * from cs
+ union all
+ select * from ws) tmp1
+ group by i_item_id
+ order by total_sales
+ limit 100;
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/queries/clientpositive/perf/query60.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/perf/query60.q b/ql/src/test/queries/clientpositive/perf/query60.q
new file mode 100644
index 0000000..efa86d1
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/perf/query60.q
@@ -0,0 +1,77 @@
+-- start query 1 in stream 0 using template query60.tpl and seed 1930872976
+explain with ss as (
+ select
+ i_item_id,sum(ss_ext_sales_price) total_sales
+ from
+ store_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_item_id in (select
+ i_item_id
+from
+ item
+where i_category in ('Children'))
+ and ss_item_sk = i_item_sk
+ and ss_sold_date_sk = d_date_sk
+ and d_year = 1999
+ and d_moy = 9
+ and ss_addr_sk = ca_address_sk
+ and ca_gmt_offset = -6
+ group by i_item_id),
+ cs as (
+ select
+ i_item_id,sum(cs_ext_sales_price) total_sales
+ from
+ catalog_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_item_id in (select
+ i_item_id
+from
+ item
+where i_category in ('Children'))
+ and cs_item_sk = i_item_sk
+ and cs_sold_date_sk = d_date_sk
+ and d_year = 1999
+ and d_moy = 9
+ and cs_bill_addr_sk = ca_address_sk
+ and ca_gmt_offset = -6
+ group by i_item_id),
+ ws as (
+ select
+ i_item_id,sum(ws_ext_sales_price) total_sales
+ from
+ web_sales,
+ date_dim,
+ customer_address,
+ item
+ where
+ i_item_id in (select
+ i_item_id
+from
+ item
+where i_category in ('Children'))
+ and ws_item_sk = i_item_sk
+ and ws_sold_date_sk = d_date_sk
+ and d_year = 1999
+ and d_moy = 9
+ and ws_bill_addr_sk = ca_address_sk
+ and ca_gmt_offset = -6
+ group by i_item_id)
+ select
+ i_item_id
+,sum(total_sales) total_sales
+ from (select * from ss
+ union all
+ select * from cs
+ union all
+ select * from ws) tmp1
+ group by i_item_id
+ order by i_item_id
+ ,total_sales
+ limit 100;
+
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/queries/clientpositive/perf/query69.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/perf/query69.q b/ql/src/test/queries/clientpositive/perf/query69.q
new file mode 100644
index 0000000..d9528ae
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/perf/query69.q
@@ -0,0 +1,46 @@
+set hive.mapred.mode=nonstrict;
+
+explain select
+ cd_gender,
+ cd_marital_status,
+ cd_education_status,
+ count(*) cnt1,
+ cd_purchase_estimate,
+ count(*) cnt2,
+ cd_credit_rating,
+ count(*) cnt3
+ from
+ customer c,customer_address ca,customer_demographics
+ where
+ c.c_current_addr_sk = ca.ca_address_sk and
+ ca_state in ('CO','IL','MN') and
+ cd_demo_sk = c.c_current_cdemo_sk and
+ exists (select *
+ from store_sales,date_dim
+ where c.c_customer_sk = ss_customer_sk and
+ ss_sold_date_sk = d_date_sk and
+ d_year = 1999 and
+ d_moy between 1 and 1+2) and
+ (not exists (select *
+ from web_sales,date_dim
+ where c.c_customer_sk = ws_bill_customer_sk and
+ ws_sold_date_sk = d_date_sk and
+ d_year = 1999 and
+ d_moy between 1 and 1+2) and
+ not exists (select *
+ from catalog_sales,date_dim
+ where c.c_customer_sk = cs_ship_customer_sk and
+ cs_sold_date_sk = d_date_sk and
+ d_year = 1999 and
+ d_moy between 1 and 1+2))
+ group by cd_gender,
+ cd_marital_status,
+ cd_education_status,
+ cd_purchase_estimate,
+ cd_credit_rating
+ order by cd_gender,
+ cd_marital_status,
+ cd_education_status,
+ cd_purchase_estimate,
+ cd_credit_rating
+ limit 100;
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/queries/clientpositive/perf/query83.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/perf/query83.q b/ql/src/test/queries/clientpositive/perf/query83.q
new file mode 100644
index 0000000..0186e03
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/perf/query83.q
@@ -0,0 +1,65 @@
+-- start query 1 in stream 0 using template query83.tpl and seed 1930872976
+explain with sr_items as
+ (select i_item_id item_id,
+ sum(sr_return_quantity) sr_item_qty
+ from store_returns,
+ item,
+ date_dim
+ where sr_item_sk = i_item_sk
+ and d_date in
+ (select d_date
+ from date_dim
+ where d_week_seq in
+ (select d_week_seq
+ from date_dim
+ where d_date in ('1998-01-02','1998-10-15','1998-11-10')))
+ and sr_returned_date_sk = d_date_sk
+ group by i_item_id),
+ cr_items as
+ (select i_item_id item_id,
+ sum(cr_return_quantity) cr_item_qty
+ from catalog_returns,
+ item,
+ date_dim
+ where cr_item_sk = i_item_sk
+ and d_date in
+ (select d_date
+ from date_dim
+ where d_week_seq in
+ (select d_week_seq
+ from date_dim
+ where d_date in ('1998-01-02','1998-10-15','1998-11-10')))
+ and cr_returned_date_sk = d_date_sk
+ group by i_item_id),
+ wr_items as
+ (select i_item_id item_id,
+ sum(wr_return_quantity) wr_item_qty
+ from web_returns,
+ item,
+ date_dim
+ where wr_item_sk = i_item_sk
+ and d_date in
+ (select d_date
+ from date_dim
+ where d_week_seq in
+ (select d_week_seq
+ from date_dim
+ where d_date in ('1998-01-02','1998-10-15','1998-11-10')))
+ and wr_returned_date_sk = d_date_sk
+ group by i_item_id)
+ select sr_items.item_id
+ ,sr_item_qty
+ ,sr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 sr_dev
+ ,cr_item_qty
+ ,cr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 cr_dev
+ ,wr_item_qty
+ ,wr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 wr_dev
+ ,(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 average
+ from sr_items
+ ,cr_items
+ ,wr_items
+ where sr_items.item_id=cr_items.item_id
+ and sr_items.item_id=wr_items.item_id
+ order by sr_items.item_id
+ ,sr_item_qty
+ limit 100;
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/queries/clientpositive/subquery_exists.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/subquery_exists.q b/ql/src/test/queries/clientpositive/subquery_exists.q
index be80721..cac8e1b 100644
--- a/ql/src/test/queries/clientpositive/subquery_exists.q
+++ b/ql/src/test/queries/clientpositive/subquery_exists.q
@@ -58,3 +58,21 @@ where exists
)
;
+-- uncorr exists
+explain
+select *
+from src b
+where exists
+ (select a.key
+ from src a
+ where a.value > 'val_9'
+ );
+
+select *
+from src b
+where exists
+ (select a.key
+ from src a
+ where a.value > 'val_9'
+ );
+
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/queries/clientpositive/subquery_in.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/subquery_in.q b/ql/src/test/queries/clientpositive/subquery_in.q
index 5b22dce..fe0c9c8 100644
--- a/ql/src/test/queries/clientpositive/subquery_in.q
+++ b/ql/src/test/queries/clientpositive/subquery_in.q
@@ -35,6 +35,7 @@ where b.key in
)
;
+
-- agg, non corr
explain
select p_name, p_size
@@ -120,6 +121,10 @@ where li.l_linenumber = 1 and
li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber)
;
+-- corr, agg in outer and inner
+explain select sum(l_extendedprice) from lineitem, part where p_partkey = l_partkey and l_quantity IN (select avg(l_quantity) from lineitem where l_partkey = p_partkey);
+select sum(l_extendedprice) from lineitem, part where p_partkey = l_partkey and l_quantity IN (select avg(l_quantity) from lineitem where l_partkey = p_partkey);
+
--where has multiple conjuction
explain select * from part where p_brand <> 'Brand#14' AND p_size IN (select min(p_size) from part p where p.p_type = part.p_type group by p_type) AND p_size <> 340;
@@ -219,4 +224,3 @@ select * from part where p_size IN (select i from tnull);
select * from tnull where i IN (select i from tnull);
drop table tempty;
-
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/queries/clientpositive/subquery_in_having.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/subquery_in_having.q b/ql/src/test/queries/clientpositive/subquery_in_having.q
index 2433556..40b7e32 100644
--- a/ql/src/test/queries/clientpositive/subquery_in_having.q
+++ b/ql/src/test/queries/clientpositive/subquery_in_having.q
@@ -96,6 +96,12 @@ where b.key in (select key from src where src.key > '8')
group by key, value
having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key )
;
+select key, value, count(*)
+from src b
+where b.key in (select key from src where src.key > '8')
+group by key, value
+having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key )
+;
set hive.auto.convert.join=true;
-- Plan is:
@@ -113,6 +119,21 @@ group by key, value
having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key )
;
+-- both having and where corr
+explain
+select key, value, count(*)
+from src b
+where b.key in (select key from src where src.value = b.value)
+group by key, value
+having count(*) in (select count(*) from src s1 where s1.key > '9' and s1.value = b.value group by s1.key )
+;
+select key, value, count(*)
+from src b
+where b.key in (select key from src where src.value = b.value)
+group by key, value
+having count(*) in (select count(*) from src s1 where s1.key > '9' and s1.value = b.value group by s1.key )
+;
+
-- non agg, non corr, windowing
explain
select p_mfgr, p_name, avg(p_size)
@@ -122,4 +143,22 @@ having p_name in
(select first_value(p_name) over(partition by p_mfgr order by p_size) from part_subq)
;
+CREATE TABLE src_null (key STRING COMMENT 'default', value STRING COMMENT 'default') STORED AS TEXTFILE;
+LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" INTO TABLE src_null;
+INSERT INTO src_null values('5444', null);
+
+explain
+select key, value, count(*)
+from src_null b
+where NOT EXISTS (select key from src_null where src_null.value <> b.value)
+group by key, value
+having count(*) not in (select count(*) from src_null s1 where s1.key > '9' and s1.value <> b.value group by s1.key );
+
+select key, value, count(*)
+from src_null b
+where NOT EXISTS (select key from src_null where src_null.value <> b.value)
+group by key, value
+having count(*) not in (select count(*) from src_null s1 where s1.key > '9' and s1.value <> b.value group by s1.key );
+
+DROP TABLE src_null;
DROP TABLE part_subq;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/queries/clientpositive/subquery_multi.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/subquery_multi.q b/ql/src/test/queries/clientpositive/subquery_multi.q
new file mode 100644
index 0000000..aff7f20
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/subquery_multi.q
@@ -0,0 +1,111 @@
+set hive.mapred.mode=nonstrict;
+set hive.explain.user=false;
+
+create table tnull(i int, c char(2));
+insert into tnull values(NULL, NULL), (NULL, NULL);
+
+create table tempty(c char(2));
+
+CREATE TABLE part_null(
+ p_partkey INT,
+ p_name STRING,
+ p_mfgr STRING,
+ p_brand STRING,
+ p_type STRING,
+ p_size INT,
+ p_container STRING,
+ p_retailprice DOUBLE,
+ p_comment STRING
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY ","
+;
+
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny_nulls.txt' overwrite into table part_null;
+
+insert into part_null values(78487,NULL,'Manufacturer#6','Brand#52','LARGE BRUSHED BRASS', 23, 'MED BAG',1464.48,'hely blith');
+
+-- multiple subquery
+
+-- Both IN are always true so should return all rows
+explain select * from part_null where p_size IN (select p_size from part_null) AND p_brand IN (select p_brand from part_null);
+select * from part_null where p_size IN (select p_size from part_null) AND p_brand IN (select p_brand from part_null);
+
+-- NOT IN has null value so should return 0 rows
+explain select * from part_null where p_name IN (select p_name from part_null) AND p_brand NOT IN (select p_name from part_null);
+select * from part_null where p_name IN (select p_name from part_null) AND p_brand NOT IN (select p_name from part_null);
+
+-- NOT IN is always true and IN is false for where p_name is NULL, hence should return all but one row
+explain select * from part_null where p_name IN (select p_name from part_null) AND p_brand NOT IN (select p_type from part_null);
+select * from part_null where p_name IN (select p_name from part_null) AND p_brand NOT IN (select p_type from part_null);
+
+-- NOT IN has one NULL value so this whole query should not return any row
+explain select * from part_null where p_brand IN (select p_brand from part_null) AND p_brand NOT IN (select p_name from part_null);
+select * from part_null where p_brand IN (select p_brand from part_null) AND p_brand NOT IN (select p_name from part_null);
+
+-- NOT IN is always true irrespective of p_name being null/non-null since inner query is empty
+-- second query is always true so this should return all rows
+explain select * from part_null where p_name NOT IN (select c from tempty) AND p_brand IN (select p_brand from part_null);
+select * from part_null where p_name NOT IN (select c from tempty) AND p_brand IN (select p_brand from part_null);
+
+-- IN, EXISTS
+explain select * from part_null where p_name IN (select p_name from part_null) AND EXISTS (select c from tnull);
+select * from part_null where p_name IN (select p_name from part_null) AND EXISTS (select c from tnull);
+
+explain select * from part_null where p_size IN (select p_size from part_null) AND EXISTS (select c from tempty);
+select * from part_null where p_size IN (select p_size from part_null) AND EXISTS (select c from tempty);
+
+explain select * from part_null where p_name IN (select p_name from part_null) AND NOT EXISTS (select c from tempty);
+select * from part_null where p_name IN (select p_name from part_null) AND NOT EXISTS (select c from tempty);
+
+-- corr, mix of IN/NOT IN
+explain select * from part_null where p_name IN ( select p_name from part where part.p_type = part_null.p_type) AND p_brand NOT IN (select p_container from part where part.p_type = part_null.p_type AND p_brand IN (select p_brand from part pp where part.p_type = pp.p_type));
+select * from part_null where p_name IN ( select p_name from part where part.p_type = part_null.p_type) AND p_brand NOT IN (select p_container from part where part.p_type = part_null.p_type AND p_brand IN (select p_brand from part pp where part.p_type = pp.p_type));
+
+-- mix of corr and uncorr
+explain select * from part_null where p_name IN ( select p_name from part) AND p_brand IN (select p_brand from part where part.p_type = part_null.p_type);
+select * from part_null where p_name IN ( select p_name from part) AND p_brand IN (select p_brand from part where part.p_type = part_null.p_type);
+
+-- one query has multiple corr
+explain select * from part_null where p_name IN ( select p_name from part where part.p_type = part_null.p_type AND part.p_container=part_null.p_container) AND p_brand NOT IN (select p_container from part where part.p_type = part_null.p_type AND p_brand IN (select p_brand from part pp where part.p_type = pp.p_type));
+select * from part_null where p_name IN ( select p_name from part where part.p_type = part_null.p_type AND part.p_container=part_null.p_container) AND p_brand NOT IN (select p_container from part where part.p_type = part_null.p_type AND p_brand IN (select p_brand from part pp where part.p_type = pp.p_type));
+
+--diff corr var (all reffering to diff outer var)
+explain select * from part_null where p_name IN (select p_name from part where part.p_type = part_null.p_type) AND p_brand NOT IN (select p_type from part where part.p_size = part_null.p_size);
+select * from part_null where p_name IN (select p_name from part where part.p_type = part_null.p_type) AND p_brand NOT IN (select p_type from part where part.p_size = part_null.p_size);
+
+-- NESTED QUERIES
+-- both queries are correlated
+explain select * from part_null where p_name IN (select p_name from part where part.p_type = part_null.p_type AND p_brand IN (select p_brand from part pp where part.p_type = pp.p_type));
+select * from part_null where p_name IN (select p_name from part where part.p_type = part_null.p_type AND p_brand IN (select p_brand from part pp where part.p_type = pp.p_type));
+
+-- in, not in corr
+explain select p.p_partkey, li.l_suppkey
+from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey
+where li.l_linenumber = 1 and
+ li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber AND l_quantity NOT IN (select avg(l_quantity) from lineitem));
+select p.p_partkey, li.l_suppkey
+from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey
+where li.l_linenumber = 1 and
+ li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber AND l_quantity NOT IN (select avg(l_quantity) from lineitem));
+
+explain
+select key, value, count(*)
+from src b
+where b.key in (select key from src where src.value = b.value)
+group by key, value
+having count(*) in (select count(*) from src s1 where s1.key > '9' and exists (select * from src s2 where s1.value = s2.value) group by s1.key )
+ ;
+select key, value, count(*)
+from src b
+where b.key in (select key from src where src.value = b.value)
+group by key, value
+having count(*) in (select count(*) from src s1 where s1.key > '9' and exists (select * from src s2 where s1.value = s2.value) group by s1.key ) ;
+
+-- subquery pred only refer to parent query column
+explain select * from part where p_name IN (select p_name from part p where part.p_type <> '1');
+select * from part where p_name IN (select p_name from part p where part.p_type <> '1');
+
+drop table tnull;
+drop table tempty;
+drop table part_null;
+
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/queries/clientpositive/subquery_notexists.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/subquery_notexists.q b/ql/src/test/queries/clientpositive/subquery_notexists.q
index 2d7e9ce..dc48c3e 100644
--- a/ql/src/test/queries/clientpositive/subquery_notexists.q
+++ b/ql/src/test/queries/clientpositive/subquery_notexists.q
@@ -39,4 +39,23 @@ where not exists
from src a
where b.value = a.value and a.value > 'val_2'
)
+;
+
+-- non equi predicate
+explain
+select *
+from src b
+where not exists
+ (select a.key
+ from src a
+ where b.value <> a.value and a.key > b.key and a.value > 'val_2'
+ )
+;
+select *
+from src b
+where not exists
+ (select a.key
+ from src a
+ where b.value <> a.value and a.key > b.key and a.value > 'val_2'
+ )
;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/queries/clientpositive/subquery_notin.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/subquery_notin.q b/ql/src/test/queries/clientpositive/subquery_notin.q
index c29e63f..f9b5405 100644
--- a/ql/src/test/queries/clientpositive/subquery_notin.q
+++ b/ql/src/test/queries/clientpositive/subquery_notin.q
@@ -228,3 +228,20 @@ select t.i from t where t.i NOT IN (select t1.i from t t1 );
drop table t1;
+-- corr predicate is not equi
+explain select *
+from src b
+where b.key not in
+ (select a.key
+ from src a
+ where b.value > a.value and a.key > '9'
+ )
+;
+select *
+from src b
+where b.key not in
+ (select a.key
+ from src a
+ where b.value > a.value and a.key > '9'
+ );
+
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/results/clientnegative/subquery_corr_grandparent.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/subquery_corr_grandparent.q.out b/ql/src/test/results/clientnegative/subquery_corr_grandparent.q.out
index fb72270..4475502 100644
--- a/ql/src/test/results/clientnegative/subquery_corr_grandparent.q.out
+++ b/ql/src/test/results/clientnegative/subquery_corr_grandparent.q.out
@@ -1 +1 @@
-FAILED: SemanticException [Error 10249]: Line 4:53 Unsupported SubQuery Expression 'p_name': SubQuery expression refers to both Parent and SubQuery expressions and is not a valid join condition.
+FAILED: SemanticException [Error 10249]: Line 4:53 Unsupported SubQuery Expression 'p_name': Nested SubQuery expressions are not supported.
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/results/clientnegative/subquery_restrictions.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/subquery_restrictions.q.out b/ql/src/test/results/clientnegative/subquery_restrictions.q.out
deleted file mode 100644
index a546d49..0000000
--- a/ql/src/test/results/clientnegative/subquery_restrictions.q.out
+++ /dev/null
@@ -1 +0,0 @@
-FAILED: SemanticException [Error 10249]: Unsupported SubQuery Expression Currently SubQuery expressions are only allowed as Where and Having Clause predicates
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/results/clientnegative/subquery_subquery_chain_exists.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/subquery_subquery_chain_exists.q.out b/ql/src/test/results/clientnegative/subquery_subquery_chain_exists.q.out
new file mode 100644
index 0000000..0a79559
--- /dev/null
+++ b/ql/src/test/results/clientnegative/subquery_subquery_chain_exists.q.out
@@ -0,0 +1 @@
+FAILED: SemanticException [Error 10249]: Line 4:56 Unsupported SubQuery Expression 'key': Only 1 SubQuery expression is supported.
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
index fa54bb7..bea58fb 100644
--- a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
@@ -2459,14 +2459,14 @@ Stage-0
Stage-1
Reducer 4 llap
File Output Operator [FS_27]
- Select Operator [SEL_26] (rows=250 width=178)
+ Select Operator [SEL_26] (rows=500 width=178)
Output:["_col0","_col1"]
<-Reducer 3 [SIMPLE_EDGE] llap
SHUFFLE [RS_25]
- Select Operator [SEL_24] (rows=250 width=178)
+ Select Operator [SEL_24] (rows=500 width=178)
Output:["_col0","_col1"]
- Filter Operator [FIL_23] (rows=250 width=198)
- predicate:(not CASE WHEN ((_col2 = 0)) THEN (false) WHEN (_col5 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col3 < _col2)) THEN (true) ELSE (false) END)
+ Filter Operator [FIL_23] (rows=500 width=198)
+ predicate:((_col2 = 0) or (_col5 is null and _col0 is not null and (_col3 >= _col2)))
Merge Join Operator [MERGEJOIN_32] (rows=500 width=198)
Conds:RS_20._col0=RS_21._col0(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5"]
<-Reducer 2 [SIMPLE_EDGE] llap
@@ -2683,14 +2683,14 @@ Stage-0
Stage-1
Reducer 4 llap
File Output Operator [FS_36]
- Select Operator [SEL_35] (rows=13 width=125)
+ Select Operator [SEL_35] (rows=26 width=125)
Output:["_col0","_col1"]
<-Reducer 3 [SIMPLE_EDGE] llap
SHUFFLE [RS_34]
- Select Operator [SEL_33] (rows=13 width=125)
+ Select Operator [SEL_33] (rows=26 width=125)
Output:["_col0","_col1"]
- Filter Operator [FIL_32] (rows=13 width=145)
- predicate:(not CASE WHEN ((_col2 = 0)) THEN (false) WHEN (_col5 is not null) THEN (true) WHEN (_col1 is null) THEN (null) WHEN ((_col3 < _col2)) THEN (true) ELSE (false) END)
+ Filter Operator [FIL_32] (rows=26 width=145)
+ predicate:((_col2 = 0) or (_col5 is null and _col1 is not null and (_col3 >= _col2)))
Merge Join Operator [MERGEJOIN_42] (rows=26 width=145)
Conds:RS_29.UDFToDouble(_col1)=RS_30._col0(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5"]
<-Reducer 2 [SIMPLE_EDGE] llap
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/results/clientpositive/llap/lineage3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/lineage3.q.out b/ql/src/test/results/clientpositive/llap/lineage3.q.out
index 72a9344..43e6b4b 100644
--- a/ql/src/test/results/clientpositive/llap/lineage3.q.out
+++ b/ql/src/test/results/clientpositive/llap/lineage3.q.out
@@ -186,7 +186,7 @@ where key not in (select key+18 from src1) order by key
PREHOOK: type: QUERY
PREHOOK: Input: default@src1
#### A masked pattern was here ####
-{"version":"1.0","engine":"tez","database":"default","hash":"9b488fe1d7cf018aad3825173808cd36","queryText":"select key, value from src1\nwhere key not in (select key+18 from src1) order by key","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(src1.key) = (UDFToDouble(src1.key) + 18.0))","edgeType":"PREDICATE"},{"sources":[4,2],"targets":[0,1],"expression":"(not CASE WHEN ((count(*) = 0)) THEN (false) WHEN (i is not null) THEN (true) WHEN (src1.key is null) THEN (null) WHEN ((count((UDFToDouble(src1.key) + 18.0)) < count(*))) THEN (true) ELSE (false) END)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"key"},{"id":1,"vertexType":"COLUMN","vertexId":"value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":4,"vertexType":"TABLE","vertexId":"default.s
rc1"}]}
+{"version":"1.0","engine":"tez","database":"default","hash":"9b488fe1d7cf018aad3825173808cd36","queryText":"select key, value from src1\nwhere key not in (select key+18 from src1) order by key","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(src1.key) = (UDFToDouble(src1.key) + 18.0))","edgeType":"PREDICATE"},{"sources":[4,2],"targets":[0,1],"expression":"((count(*) = 0) or (i is null and src1.key is not null and (count((UDFToDouble(src1.key) + 18.0)) >= count(*))))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"key"},{"id":1,"vertexType":"COLUMN","vertexId":"value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":4,"vertexType":"TABLE","vertexId":"default.src1"}]}
PREHOOK: query: select * from src1 a
where not exists
(select cint from alltypesorc b
http://git-wip-us.apache.org/repos/asf/hive/blob/b0ed8241/ql/src/test/results/clientpositive/llap/subquery_exists.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/subquery_exists.q.out b/ql/src/test/results/clientpositive/llap/subquery_exists.q.out
index b132cb6..3d8251f 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_exists.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_exists.q.out
@@ -431,3 +431,633 @@ STAGE PLANS:
Processor Tree:
ListSink
+Warning: Shuffle Join MERGEJOIN[16][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+PREHOOK: query: -- uncorr exists
+explain
+select *
+from src b
+where exists
+ (select a.key
+ from src a
+ where a.value > 'val_9'
+ )
+PREHOOK: type: QUERY
+POSTHOOK: query: -- uncorr exists
+explain
+select *
+from src b
+where exists
+ (select a.key
+ from src a
+ where a.value > 'val_9'
+ )
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
+ Reducer 4 <- Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: b
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: string), _col1 (type: string)
+ Execution mode: llap
+ LLAP IO: no inputs
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (value > 'val_9') (type: boolean)
+ Statistics: Num rows: 166 Data size: 15106 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 166 Data size: 15106 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: true (type: boolean)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: boolean)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: boolean)
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 4
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: boolean)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+Warning: Shuffle Join MERGEJOIN[16][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+PREHOOK: query: select *
+from src b
+where exists
+ (select a.key
+ from src a
+ where a.value > 'val_9'
+ )
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select *
+from src b
+where exists
+ (select a.key
+ from src a
+ where a.value > 'val_9'
+ )
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 val_0
+0 val_0
+0 val_0
+10 val_10
+100 val_100
+100 val_100
+103 val_103
+103 val_103
+104 val_104
+104 val_104
+105 val_105
+11 val_11
+111 val_111
+113 val_113
+113 val_113
+114 val_114
+116 val_116
+118 val_118
+118 val_118
+119 val_119
+119 val_119
+119 val_119
+12 val_12
+12 val_12
+120 val_120
+120 val_120
+125 val_125
+125 val_125
+126 val_126
+128 val_128
+128 val_128
+128 val_128
+129 val_129
+129 val_129
+131 val_131
+133 val_133
+134 val_134
+134 val_134
+136 val_136
+137 val_137
+137 val_137
+138 val_138
+138 val_138
+138 val_138
+138 val_138
+143 val_143
+145 val_145
+146 val_146
+146 val_146
+149 val_149
+149 val_149
+15 val_15
+15 val_15
+150 val_150
+152 val_152
+152 val_152
+153 val_153
+155 val_155
+156 val_156
+157 val_157
+158 val_158
+160 val_160
+162 val_162
+163 val_163
+164 val_164
+164 val_164
+165 val_165
+165 val_165
+166 val_166
+167 val_167
+167 val_167
+167 val_167
+168 val_168
+169 val_169
+169 val_169
+169 val_169
+169 val_169
+17 val_17
+170 val_170
+172 val_172
+172 val_172
+174 val_174
+174 val_174
+175 val_175
+175 val_175
+176 val_176
+176 val_176
+177 val_177
+178 val_178
+179 val_179
+179 val_179
+18 val_18
+18 val_18
+180 val_180
+181 val_181
+183 val_183
+186 val_186
+187 val_187
+187 val_187
+187 val_187
+189 val_189
+19 val_19
+190 val_190
+191 val_191
+191 val_191
+192 val_192
+193 val_193
+193 val_193
+193 val_193
+194 val_194
+195 val_195
+195 val_195
+196 val_196
+197 val_197
+197 val_197
+199 val_199
+199 val_199
+199 val_199
+2 val_2
+20 val_20
+200 val_200
+200 val_200
+201 val_201
+202 val_202
+203 val_203
+203 val_203
+205 val_205
+205 val_205
+207 val_207
+207 val_207
+208 val_208
+208 val_208
+208 val_208
+209 val_209
+209 val_209
+213 val_213
+213 val_213
+214 val_214
+216 val_216
+216 val_216
+217 val_217
+217 val_217
+218 val_218
+219 val_219
+219 val_219
+221 val_221
+221 val_221
+222 val_222
+223 val_223
+223 val_223
+224 val_224
+224 val_224
+226 val_226
+228 val_228
+229 val_229
+229 val_229
+230 val_230
+230 val_230
+230 val_230
+230 val_230
+230 val_230
+233 val_233
+233 val_233
+235 val_235
+237 val_237
+237 val_237
+238 val_238
+238 val_238
+239 val_239
+239 val_239
+24 val_24
+24 val_24
+241 val_241
+242 val_242
+242 val_242
+244 val_244
+247 val_247
+248 val_248
+249 val_249
+252 val_252
+255 val_255
+255 val_255
+256 val_256
+256 val_256
+257 val_257
+258 val_258
+26 val_26
+26 val_26
+260 val_260
+262 val_262
+263 val_263
+265 val_265
+265 val_265
+266 val_266
+27 val_27
+272 val_272
+272 val_272
+273 val_273
+273 val_273
+273 val_273
+274 val_274
+275 val_275
+277 val_277
+277 val_277
+277 val_277
+277 val_277
+278 val_278
+278 val_278
+28 val_28
+280 val_280
+280 val_280
+281 val_281
+281 val_281
+282 val_282
+282 val_282
+283 val_283
+284 val_284
+285 val_285
+286 val_286
+287 val_287
+288 val_288
+288 val_288
+289 val_289
+291 val_291
+292 val_292
+296 val_296
+298 val_298
+298 val_298
+298 val_298
+30 val_30
+302 val_302
+305 val_305
+306 val_306
+307 val_307
+307 val_307
+308 val_308
+309 val_309
+309 val_309
+310 val_310
+311 val_311
+311 val_311
+311 val_311
+315 val_315
+316 val_316
+316 val_316
+316 val_316
+317 val_317
+317 val_317
+318 val_318
+318 val_318
+318 val_318
+321 val_321
+321 val_321
+322 val_322
+322 val_322
+323 val_323
+325 val_325
+325 val_325
+327 val_327
+327 val_327
+327 val_327
+33 val_33
+331 val_331
+331 val_331
+332 val_332
+333 val_333
+333 val_333
+335 val_335
+336 val_336
+338 val_338
+339 val_339
+34 val_34
+341 val_341
+342 val_342
+342 val_342
+344 val_344
+344 val_344
+345 val_345
+348 val_348
+348 val_348
+348 val_348
+348 val_348
+348 val_348
+35 val_35
+35 val_35
+35 val_35
+351 val_351
+353 val_353
+353 val_353
+356 val_356
+360 val_360
+362 val_362
+364 val_364
+365 val_365
+366 val_366
+367 val_367
+367 val_367
+368 val_368
+369 val_369
+369 val_369
+369 val_369
+37 val_37
+37 val_37
+373 val_373
+374 val_374
+375 val_375
+377 val_377
+378 val_378
+379 val_379
+382 val_382
+382 val_382
+384 val_384
+384 val_384
+384 val_384
+386 val_386
+389 val_389
+392 val_392
+393 val_393
+394 val_394
+395 val_395
+395 val_395
+396 val_396
+396 val_396
+396 val_396
+397 val_397
+397 val_397
+399 val_399
+399 val_399
+4 val_4
+400 val_400
+401 val_401
+401 val_401
+401 val_401
+401 val_401
+401 val_401
+402 val_402
+403 val_403
+403 val_403
+403 val_403
+404 val_404
+404 val_404
+406 val_406
+406 val_406
+406 val_406
+406 val_406
+407 val_407
+409 val_409
+409 val_409
+409 val_409
+41 val_41
+411 val_411
+413 val_413
+413 val_413
+414 val_414
+414 val_414
+417 val_417
+417 val_417
+417 val_417
+418 val_418
+419 val_419
+42 val_42
+42 val_42
+421 val_421
+424 val_424
+424 val_424
+427 val_427
+429 val_429
+429 val_429
+43 val_43
+430 val_430
+430 val_430
+430 val_430
+431 val_431
+431 val_431
+431 val_431
+432 val_432
+435 val_435
+436 val_436
+437 val_437
+438 val_438
+438 val_438
+438 val_438
+439 val_439
+439 val_439
+44 val_44
+443 val_443
+444 val_444
+446 val_446
+448 val_448
+449 val_449
+452 val_452
+453 val_453
+454 val_454
+454 val_454
+454 val_454
+455 val_455
+457 val_457
+458 val_458
+458 val_458
+459 val_459
+459 val_459
+460 val_460
+462 val_462
+462 val_462
+463 val_463
+463 val_463
+466 val_466
+466 val_466
+466 val_466
+467 val_467
+468 val_468
+468 val_468
+468 val_468
+468 val_468
+469 val_469
+469 val_469
+469 val_469
+469 val_469
+469 val_469
+47 val_47
+470 val_470
+472 val_472
+475 val_475
+477 val_477
+478 val_478
+478 val_478
+479 val_479
+480 val_480
+480 val_480
+480 val_480
+481 val_481
+482 val_482
+483 val_483
+484 val_484
+485 val_485
+487 val_487
+489 val_489
+489 val_489
+489 val_489
+489 val_489
+490 val_490
+491 val_491
+492 val_492
+492 val_492
+493 val_493
+494 val_494
+495 val_495
+496 val_496
+497 val_497
+498 val_498
+498 val_498
+498 val_498
+5 val_5
+5 val_5
+5 val_5
+51 val_51
+51 val_51
+53 val_53
+54 val_54
+57 val_57
+58 val_58
+58 val_58
+64 val_64
+65 val_65
+66 val_66
+67 val_67
+67 val_67
+69 val_69
+70 val_70
+70 val_70
+70 val_70
+72 val_72
+72 val_72
+74 val_74
+76 val_76
+76 val_76
+77 val_77
+78 val_78
+8 val_8
+80 val_80
+82 val_82
+83 val_83
+83 val_83
+84 val_84
+84 val_84
+85 val_85
+86 val_86
+87 val_87
+9 val_9
+90 val_90
+90 val_90
+90 val_90
+92 val_92
+95 val_95
+95 val_95
+96 val_96
+97 val_97
+97 val_97
+98 val_98
+98 val_98