You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by vg...@apache.org on 2019/07/03 17:17:41 UTC
[hive] branch master updated: HIVE-21921: Support for correlated
quantified predicates (Vineet Garg, reviewed by Jesus Camacho Rodriguez)
This is an automated email from the ASF dual-hosted git repository.
vgarg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 3f0935e HIVE-21921: Support for correlated quantified predicates (Vineet Garg,reviewed by Jesus Camacho Rodriguez)
3f0935e is described below
commit 3f0935e4f2e56dc483860e74240f3f9826f74e8f
Author: Vineet Garg <vg...@apache.org>
AuthorDate: Wed Jul 3 10:16:59 2019 -0700
HIVE-21921: Support for correlated quantified predicates (Vineet Garg,reviewed by Jesus Camacho Rodriguez)
---
.../calcite/CalciteSubqueryRuntimeException.java | 47 +
.../calcite/rules/HiveSubQueryRemoveRule.java | 323 +++--
.../hadoop/hive/ql/parse/CalcitePlanner.java | 18 +-
.../clientnegative/subquery_any_aggregate.q | 10 +
ql/src/test/queries/clientpositive/subquery_ANY.q | 41 +
.../clientnegative/subquery_any_aggregate.q.out | 37 +
.../results/clientpositive/llap/subquery_ANY.q.out | 1294 ++++++++++++++++++++
7 files changed, 1627 insertions(+), 143 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSubqueryRuntimeException.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSubqueryRuntimeException.java
new file mode 100644
index 0000000..a0412e3
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSubqueryRuntimeException.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.calcite;
+
+import org.apache.hadoop.hive.ql.ErrorMsg;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+
+/**
+ * Exception from Subquery rewrite.
+ */
+
+public class CalciteSubqueryRuntimeException extends RuntimeException{
+
+ private static final long serialVersionUID = 1L;
+
+ public CalciteSubqueryRuntimeException() {
+ super();
+ }
+
+ public CalciteSubqueryRuntimeException(String message) {
+ super(message);
+ }
+
+ public CalciteSubqueryRuntimeException(Throwable cause) {
+ super(cause);
+ }
+
+ public CalciteSubqueryRuntimeException(String message, Throwable cause) {
+ super(message, cause);
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java
index 6c57474..bad49f9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java
@@ -55,17 +55,20 @@ import java.util.List;
import java.util.Set;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubqueryRuntimeException;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveSubQRemoveRelBuilder;
import org.apache.hadoop.hive.ql.optimizer.calcite.SubqueryConf;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
/**
* NOTE: this rule is replicated from Calcite's SubqueryRemoveRule
* Transform that converts IN, EXISTS and scalar sub-queries into joins.
* TODO:
- * Reason this is replicated instead of using Calcite's is
- * Calcite creates null literal with null type but hive needs it to be properly typed
+ * Reason this is replicated instead of using Calcite's is
+ * Calcite creates null literal with null type but hive needs it to be properly typed
*
* <p>Sub-queries are represented by {@link RexSubQuery} expressions.
*
@@ -79,52 +82,48 @@ public class HiveSubQueryRemoveRule extends RelOptRule {
private HiveConf conf;
public HiveSubQueryRemoveRule(HiveConf conf) {
- super(operand(RelNode.class, null, HiveSubQueryFinder.RELNODE_PREDICATE,
- any()),
+ super(operand(RelNode.class, null, HiveSubQueryFinder.RELNODE_PREDICATE, any()),
HiveRelFactories.HIVE_BUILDER, "SubQueryRemoveRule:Filter");
this.conf = conf;
}
- @Override
- public void onMatch(RelOptRuleCall call) {
+
+ @Override public void onMatch(RelOptRuleCall call) {
final RelNode relNode = call.rel(0);
final HiveSubQRemoveRelBuilder builder =
new HiveSubQRemoveRelBuilder(null, call.rel(0).getCluster(), null);
// if subquery is in FILTER
- if(relNode instanceof Filter) {
+ if (relNode instanceof Filter) {
final Filter filter = call.rel(0);
- final RexSubQuery e =
- RexUtil.SubQueryFinder.find(filter.getCondition());
+ final RexSubQuery e = RexUtil.SubQueryFinder.find(filter.getCondition());
assert e != null;
final RelOptUtil.Logic logic =
- LogicVisitor.find(RelOptUtil.Logic.TRUE,
- ImmutableList.of(filter.getCondition()), e);
+ LogicVisitor.find(RelOptUtil.Logic.TRUE, ImmutableList.of(filter.getCondition()), e);
builder.push(filter.getInput());
final int fieldCount = builder.peek().getRowType().getFieldCount();
- assert(filter instanceof HiveFilter);
+ assert (filter instanceof HiveFilter);
SubqueryConf subqueryConfig = filter.getCluster().getPlanner().
getContext().unwrap(SubqueryConf.class);
boolean isCorrScalarQuery = subqueryConfig.getCorrScalarRexSQWithAgg().contains(e.rel);
- final RexNode target = apply(call.getMetadataQuery(), e, HiveFilter.getVariablesSet(e), logic,
- builder, 1, fieldCount, isCorrScalarQuery);
+ final RexNode target =
+ apply(call.getMetadataQuery(), e, HiveFilter.getVariablesSet(e), logic, builder, 1,
+ fieldCount, isCorrScalarQuery);
final RexShuttle shuttle = new ReplaceSubQueryShuttle(e, target);
builder.filter(shuttle.apply(filter.getCondition()));
builder.project(fields(builder, filter.getRowType().getFieldCount()));
RelNode newRel = builder.build();
call.transformTo(newRel);
- } else if(relNode instanceof Project) {
+ } else if (relNode instanceof Project) {
// if subquery is in PROJECT
final Project project = call.rel(0);
- final RexSubQuery e =
- RexUtil.SubQueryFinder.find(project.getProjects());
+ final RexSubQuery e = RexUtil.SubQueryFinder.find(project.getProjects());
assert e != null;
final RelOptUtil.Logic logic =
- LogicVisitor.find(RelOptUtil.Logic.TRUE_FALSE_UNKNOWN,
- project.getProjects(), e);
+ LogicVisitor.find(RelOptUtil.Logic.TRUE_FALSE_UNKNOWN, project.getProjects(), e);
builder.push(project.getInput());
final int fieldCount = builder.peek().getRowType().getFieldCount();
@@ -132,11 +131,11 @@ public class HiveSubQueryRemoveRule extends RelOptRule {
project.getCluster().getPlanner().getContext().unwrap(SubqueryConf.class);
boolean isCorrScalarQuery = subqueryConfig.getCorrScalarRexSQWithAgg().contains(e.rel);
- final RexNode target = apply(call.getMetadataQuery(), e, HiveFilter.getVariablesSet(e),
- logic, builder, 1, fieldCount, isCorrScalarQuery);
+ final RexNode target =
+ apply(call.getMetadataQuery(), e, HiveFilter.getVariablesSet(e), logic, builder, 1,
+ fieldCount, isCorrScalarQuery);
final RexShuttle shuttle = new ReplaceSubQueryShuttle(e, target);
- builder.project(shuttle.apply(project.getProjects()),
- project.getRowType().getFieldNames());
+ builder.project(shuttle.apply(project.getProjects()), project.getRowType().getFieldNames());
call.transformTo(builder.build());
}
}
@@ -145,32 +144,31 @@ public class HiveSubQueryRemoveRule extends RelOptRule {
/// if COUNT returns true since COUNT produces 0 on empty result set
private boolean isAggZeroOnEmpty(RexSubQuery e) {
//as this is corr scalar subquery with agg we expect one aggregate
- assert(e.getKind() == SqlKind.SCALAR_QUERY);
- assert(e.rel.getInputs().size() == 1);
- Aggregate relAgg = (Aggregate)e.rel.getInput(0);
- assert(relAgg.getAggCallList().size() == 1); //should only have one aggregate
- if(relAgg.getAggCallList().get(0).getAggregation().getKind() == SqlKind.COUNT) {
+ assert (e.getKind() == SqlKind.SCALAR_QUERY);
+ assert (e.rel.getInputs().size() == 1);
+ Aggregate relAgg = (Aggregate) e.rel.getInput(0);
+ assert (relAgg.getAggCallList().size() == 1); //should only have one aggregate
+ if (relAgg.getAggCallList().get(0).getAggregation().getKind() == SqlKind.COUNT) {
return true;
}
return false;
}
private SqlTypeName getAggTypeForScalarSub(RexSubQuery e) {
- assert(e.getKind() == SqlKind.SCALAR_QUERY);
- assert(e.rel.getInputs().size() == 1);
- Aggregate relAgg = (Aggregate)e.rel.getInput(0);
- assert(relAgg.getAggCallList().size() == 1); //should only have one aggregate
+ assert (e.getKind() == SqlKind.SCALAR_QUERY);
+ assert (e.rel.getInputs().size() == 1);
+ Aggregate relAgg = (Aggregate) e.rel.getInput(0);
+ assert (relAgg.getAggCallList().size() == 1); //should only have one aggregate
return relAgg.getAggCallList().get(0).getType().getSqlTypeName();
}
private RexNode rewriteScalar(RelMetadataQuery mq, RexSubQuery e, Set<CorrelationId> variablesSet,
- HiveSubQRemoveRelBuilder builder, int offset, int inputCount,
- boolean isCorrScalarAgg) {
+ HiveSubQRemoveRelBuilder builder, int offset, int inputCount, boolean isCorrScalarAgg) {
// if scalar query has aggregate and no windowing and no gby avoid adding sq_count_check
// since it is guaranteed to produce at most one row
Double maxRowCount = mq.getMaxRowCount(e.rel);
- boolean shouldIntroSQCountCheck = maxRowCount== null || maxRowCount > 1.0;
- if(shouldIntroSQCountCheck) {
+ boolean shouldIntroSQCountCheck = maxRowCount == null || maxRowCount > 1.0;
+ if (shouldIntroSQCountCheck) {
builder.push(e.rel);
// returns single row/column
builder.aggregate(builder.groupKey(), builder.count(false, "cnt"));
@@ -191,7 +189,7 @@ public class HiveSubQueryRemoveRule extends RelOptRule {
}
offset++;
}
- if(isCorrScalarAgg) {
+ if (isCorrScalarAgg) {
// Transformation :
// Outer Query Left Join (inner query) on correlated predicate
// and preserve rows only from left side.
@@ -208,7 +206,7 @@ public class HiveSubQueryRemoveRule extends RelOptRule {
final ImmutableList.Builder<RexNode> operands = ImmutableList.builder();
RexNode literal;
- if(isAggZeroOnEmpty(e)) {
+ if (isAggZeroOnEmpty(e)) {
// since count has a return type of BIG INT we need to make a literal of type big int
// relbuilder's literal doesn't allow this
literal = e.rel.getCluster().getRexBuilder().makeBigintLiteral(new BigDecimal(0));
@@ -216,7 +214,7 @@ public class HiveSubQueryRemoveRule extends RelOptRule {
literal = e.rel.getCluster().getRexBuilder().makeNullLiteral(getAggTypeForScalarSub(e));
}
operands.add((builder.isNull(builder.field(indicator))), literal);
- operands.add(field(builder, 1, builder.fields().size()-2));
+ operands.add(field(builder, 1, builder.fields().size() - 2));
return builder.call(SqlStdOperatorTable.CASE, operands.build());
}
@@ -230,33 +228,89 @@ public class HiveSubQueryRemoveRule extends RelOptRule {
private RexNode rewriteSomeAll(RexSubQuery e, Set<CorrelationId> variablesSet,
HiveSubQRemoveRelBuilder builder) {
final SqlQuantifyOperator op = (SqlQuantifyOperator) e.op;
- assert(op == SqlStdOperatorTable.SOME_GE
- || op == SqlStdOperatorTable.SOME_LE
- || op == SqlStdOperatorTable.SOME_LT
- || op == SqlStdOperatorTable.SOME_GT);
- builder.push(e.rel)
- .aggregate(builder.groupKey(),
- op.comparisonKind == SqlKind.GREATER_THAN
- || op.comparisonKind == SqlKind.GREATER_THAN_OR_EQUAL
- ? builder.min("m", builder.field(0))
- : builder.max("m", builder.field(0)),
- builder.count(false, "c"),
- builder.count(false, "d", builder.field(0)))
- .as("q")
- .join(JoinRelType.INNER);
- return builder.call(SqlStdOperatorTable.CASE,
- builder.call(SqlStdOperatorTable.EQUALS,
- builder.field("q", "c"), builder.literal(0)),
- builder.literal(false),
- builder.call(SqlStdOperatorTable.IS_TRUE,
- builder.call(RelOptUtil.op(op.comparisonKind, null),
- e.operands.get(0), builder.field("q", "m"))),
- builder.literal(true),
- builder.call(SqlStdOperatorTable.GREATER_THAN,
- builder.field("q", "c"), builder.field("q", "d")),
- e.rel.getCluster().getRexBuilder().makeNullLiteral(SqlTypeName.BOOLEAN),
- builder.call(RelOptUtil.op(op.comparisonKind, null),
- e.operands.get(0), builder.field("q", "m")));
+
+ // SOME_EQ & SOME_NE should have been rewritten into IN/ NOT IN
+ assert (op == SqlStdOperatorTable.SOME_GE || op == SqlStdOperatorTable.SOME_LE
+ || op == SqlStdOperatorTable.SOME_LT || op == SqlStdOperatorTable.SOME_GT);
+
+ if (variablesSet.isEmpty()) {
+ // for non-correlated case queries such as
+ // select e.deptno, e.deptno < some (select deptno from emp) as v
+ // from emp as e
+ //
+ // becomes
+ //
+ // select e.deptno,
+ // case
+ // when q.c = 0 then false // sub-query is empty
+ // when (e.deptno < q.m) is true then true
+ // when q.c > q.d then unknown // sub-query has at least one null
+ // else e.deptno < q.m
+ // end as v
+ // from emp as e
+ // cross join (
+ // select max(deptno) as m, count(*) as c, count(deptno) as d
+ // from emp) as q
+ builder.push(e.rel).aggregate(builder.groupKey(), op.comparisonKind == SqlKind.GREATER_THAN
+ || op.comparisonKind == SqlKind.GREATER_THAN_OR_EQUAL ? builder
+ .min("m", builder.field(0)) : builder.max("m", builder.field(0)),
+ builder.count(false, "c"), builder.count(false, "d", builder.field(0))).as("q")
+ .join(JoinRelType.INNER);
+ return builder.call(SqlStdOperatorTable.CASE,
+ builder.call(SqlStdOperatorTable.EQUALS, builder.field("q", "c"), builder.literal(0)),
+ builder.literal(false), builder.call(SqlStdOperatorTable.IS_TRUE, builder
+ .call(RelOptUtil.op(op.comparisonKind, null), e.operands.get(0),
+ builder.field("q", "m"))), builder.literal(true), builder
+ .call(SqlStdOperatorTable.GREATER_THAN, builder.field("q", "c"),
+ builder.field("q", "d")),
+ e.rel.getCluster().getRexBuilder().makeNullLiteral(SqlTypeName.BOOLEAN), builder
+ .call(RelOptUtil.op(op.comparisonKind, null), e.operands.get(0),
+ builder.field("q", "m")));
+ } else {
+ // for correlated case queries such as
+ // select e.deptno, e.deptno < some (select deptno from emp where emp.name = e.name) as v
+ // from emp as e
+ //
+ // becomes
+ //
+ // select e.deptno,
+ // case
+ // when indicator is null then false // sub-query is empty for corresponding corr value
+ // when q.c = 0 then false // sub-query is empty
+ // when (e.deptno < q.m) is true then true
+ // when q.c > q.d then unknown // sub-query has at least one null
+ // else e.deptno < q.m
+ // end as v
+ // from emp as e
+ // left outer join (
+ // select max(deptno) as m, count(*) as c, count(deptno) as d, "alwaysTrue" as indicator
+ // group by name from emp) as q on e.name = q.name
+ subqueryRestriction(e.rel);
+ builder.push(e.rel);
+ builder.aggregate(builder.groupKey(), op.comparisonKind == SqlKind.GREATER_THAN
+ || op.comparisonKind == SqlKind.GREATER_THAN_OR_EQUAL ? builder
+ .min("m", builder.field(0)) : builder.max("m", builder.field(0)),
+ builder.count(false, "c"), builder.count(false, "d", builder.field(0)));
+
+ final List<RexNode> parentQueryFields = new ArrayList<>();
+ parentQueryFields.addAll(builder.fields());
+ String indicator = "alwaysTrue" + e.rel.getId();
+ parentQueryFields.add(builder.alias(builder.literal(true), indicator));
+ builder.project(parentQueryFields).as("q");
+ builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet);
+ return builder.call(SqlStdOperatorTable.CASE,
+ builder.call(SqlStdOperatorTable.IS_NULL, builder.field(indicator)),
+ builder.literal(false),
+ builder.call(SqlStdOperatorTable.EQUALS, builder.field("q", "c"), builder.literal(0)),
+ builder.literal(false), builder.call(SqlStdOperatorTable.IS_TRUE, builder
+ .call(RelOptUtil.op(op.comparisonKind, null), e.operands.get(0),
+ builder.field("q", "m"))), builder.literal(true), builder
+ .call(SqlStdOperatorTable.GREATER_THAN, builder.field("q", "c"),
+ builder.field("q", "d")),
+ e.rel.getCluster().getRexBuilder().makeNullLiteral(SqlTypeName.BOOLEAN), builder
+ .call(RelOptUtil.op(op.comparisonKind, null), e.operands.get(0),
+ builder.field("q", "m")));
+ }
}
@@ -318,17 +372,16 @@ public class HiveSubQueryRemoveRule extends RelOptRule {
builder.push(e.rel);
final List<RexNode> fields = new ArrayList<>();
- if(e.getKind() == SqlKind.IN) {
+ if (e.getKind() == SqlKind.IN) {
fields.addAll(builder.fields());
// Transformation: sq_count_check(count(*), true) FILTER is generated on top
// of subquery which is then joined (LEFT or INNER) with outer query
// This transformation is done to add run time check using sq_count_check to
// throw an error if subquery is producing zero row, since with aggregate this
// will produce wrong results (because we further rewrite such queries into JOIN)
- if(isCorrScalarAgg) {
+ if (isCorrScalarAgg) {
// returns single row/column
- builder.aggregate(builder.groupKey(),
- builder.count(false, "cnt_in"));
+ builder.aggregate(builder.groupKey(), builder.count(false, "cnt_in"));
if (!variablesSet.isEmpty()) {
builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet);
@@ -336,9 +389,10 @@ public class HiveSubQueryRemoveRule extends RelOptRule {
builder.join(JoinRelType.INNER, builder.literal(true), variablesSet);
}
- SqlFunction inCountCheck = new SqlFunction("sq_count_check", SqlKind.OTHER_FUNCTION,
- ReturnTypes.BIGINT, InferTypes.RETURN_TYPE, OperandTypes.NUMERIC,
- SqlFunctionCategory.USER_DEFINED_FUNCTION);
+ SqlFunction inCountCheck =
+ new SqlFunction("sq_count_check", SqlKind.OTHER_FUNCTION, ReturnTypes.BIGINT,
+ InferTypes.RETURN_TYPE, OperandTypes.NUMERIC,
+ SqlFunctionCategory.USER_DEFINED_FUNCTION);
// we create FILTER (sq_count_check(count()) > 0) instead of PROJECT
// because RelFieldTrimmer ends up getting rid of Project
@@ -347,7 +401,7 @@ public class HiveSubQueryRemoveRule extends RelOptRule {
//true here indicates that sq_count_check is for IN/NOT IN subqueries
builder.call(inCountCheck, builder.field("cnt_in"), builder.literal(true)),
builder.literal(0)));
- offset = offset + 1;
+ offset = offset + 1;
builder.push(e.rel);
}
}
@@ -362,12 +416,10 @@ public class HiveSubQueryRemoveRule extends RelOptRule {
logic = RelOptUtil.Logic.TRUE_FALSE;
break;
}
- builder.aggregate(builder.groupKey(),
- builder.count(false, "c"),
- builder.aggregateCall(SqlStdOperatorTable.COUNT, false, null, "ck",
- builder.fields()));
+ builder.aggregate(builder.groupKey(), builder.count(false, "c"),
+ builder.aggregateCall(SqlStdOperatorTable.COUNT, false, null, "ck", builder.fields()));
builder.as("ct");
- if(!variablesSet.isEmpty()) {
+ if (!variablesSet.isEmpty()) {
//builder.join(JoinRelType.INNER, builder.literal(true), variablesSet);
builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet);
} else {
@@ -384,8 +436,8 @@ public class HiveSubQueryRemoveRule extends RelOptRule {
case TRUE:
if (fields.isEmpty()) {
builder.project(builder.alias(builder.literal(true), "i" + e.rel.getId()));
- if(!variablesSet.isEmpty()
- && (e.getKind() == SqlKind.EXISTS || e.getKind() == SqlKind.IN)) {
+ if (!variablesSet.isEmpty() && (e.getKind() == SqlKind.EXISTS
+ || e.getKind() == SqlKind.IN)) {
// avoid adding group by for correlated IN/EXISTS queries
// since this is rewritting into semijoin
break;
@@ -393,8 +445,8 @@ public class HiveSubQueryRemoveRule extends RelOptRule {
builder.aggregate(builder.groupKey(0));
}
} else {
- if(!variablesSet.isEmpty()
- && (e.getKind() == SqlKind.EXISTS || e.getKind() == SqlKind.IN)) {
+ if (!variablesSet.isEmpty() && (e.getKind() == SqlKind.EXISTS
+ || e.getKind() == SqlKind.IN)) {
// avoid adding group by for correlated IN/EXISTS queries
// since this is rewritting into semijoin
break;
@@ -410,10 +462,8 @@ public class HiveSubQueryRemoveRule extends RelOptRule {
}
builder.as("dt");
final List<RexNode> conditions = new ArrayList<>();
- for (Pair<RexNode, RexNode> pair
- : Pair.zip(e.getOperands(), builder.fields())) {
- conditions.add(
- builder.equals(pair.left, RexUtil.shift(pair.right, offset)));
+ for (Pair<RexNode, RexNode> pair : Pair.zip(e.getOperands(), builder.fields())) {
+ conditions.add(builder.equals(pair.left, RexUtil.shift(pair.right, offset)));
}
switch (logic) {
case TRUE:
@@ -432,8 +482,7 @@ public class HiveSubQueryRemoveRule extends RelOptRule {
switch (logic) {
case TRUE_FALSE_UNKNOWN:
case UNKNOWN_AS_TRUE:
- operands.add(
- builder.equals(builder.field("ct", "c"), builder.literal(0)),
+ operands.add(builder.equals(builder.field("ct", "c"), builder.literal(0)),
builder.literal(false));
//now that we are using LEFT OUTER JOIN to join inner count, count(*)
// with outer table, we wouldn't be able to tell if count is zero
@@ -444,8 +493,8 @@ public class HiveSubQueryRemoveRule extends RelOptRule {
operands.add((builder.isNull(builder.field("ct", "c"))), builder.literal(false));
break;
}
- operands.add(builder.isNotNull(builder.field("dt", "i" + e.rel.getId())),
- builder.literal(true));
+ operands
+ .add(builder.isNotNull(builder.field("dt", "i" + e.rel.getId())), builder.literal(true));
if (!keyIsNulls.isEmpty()) {
//Calcite creates null literal with Null type here but
// because HIVE doesn't support null type it is appropriately typed boolean
@@ -460,9 +509,8 @@ public class HiveSubQueryRemoveRule extends RelOptRule {
b = e.rel.getCluster().getRexBuilder().makeNullLiteral(SqlTypeName.BOOLEAN);
// fall through
case UNKNOWN_AS_TRUE:
- operands.add(
- builder.call(SqlStdOperatorTable.LESS_THAN,
- builder.field("ct", "ck"), builder.field("ct", "c")),
+ operands.add(builder
+ .call(SqlStdOperatorTable.LESS_THAN, builder.field("ct", "ck"), builder.field("ct", "c")),
b);
break;
}
@@ -471,8 +519,7 @@ public class HiveSubQueryRemoveRule extends RelOptRule {
}
protected RexNode apply(RelMetadataQuery mq, RexSubQuery e, Set<CorrelationId> variablesSet,
- RelOptUtil.Logic logic,
- HiveSubQRemoveRelBuilder builder, int inputCount, int offset,
+ RelOptUtil.Logic logic, HiveSubQRemoveRelBuilder builder, int inputCount, int offset,
boolean isCorrScalarAgg) {
switch (e.getKind()) {
case SCALAR_QUERY:
@@ -487,10 +534,12 @@ public class HiveSubQueryRemoveRule extends RelOptRule {
}
}
- /** Returns a reference to a particular field, by offset, across several
- * inputs on a {@link RelBuilder}'s stack. */
+ /**
+ * Returns a reference to a particular field, by offset, across several
+ * inputs on a {@link RelBuilder}'s stack.
+ */
private RexInputRef field(HiveSubQRemoveRelBuilder builder, int inputCount, int offset) {
- for (int inputOrdinal = 0;;) {
+ for (int inputOrdinal = 0; ;) {
final RelNode r = builder.peek(inputCount, inputOrdinal);
if (offset < r.getRowType().getFieldCount()) {
return builder.field(inputCount, inputOrdinal, offset);
@@ -500,8 +549,10 @@ public class HiveSubQueryRemoveRule extends RelOptRule {
}
}
- /** Returns a list of expressions that project the first {@code fieldCount}
- * fields of the top input on a {@link RelBuilder}'s stack. */
+ /**
+ * Returns a list of expressions that project the first {@code fieldCount}
+ * fields of the top input on a {@link RelBuilder}'s stack.
+ */
private static List<RexNode> fields(HiveSubQRemoveRelBuilder builder, int fieldCount) {
final List<RexNode> projects = new ArrayList<>();
for (int i = 0; i < fieldCount; i++) {
@@ -510,9 +561,11 @@ public class HiveSubQueryRemoveRule extends RelOptRule {
return projects;
}
- /** Shuttle that replaces occurrences of a given
+ /**
+ * Shuttle that replaces occurrences of a given
* {@link org.apache.calcite.rex.RexSubQuery} with a replacement
- * expression. */
+ * expression.
+ */
private static class ReplaceSubQueryShuttle extends RexShuttle {
private final RexSubQuery subQuery;
private final RexNode replacement;
@@ -531,37 +584,40 @@ public class HiveSubQueryRemoveRule extends RelOptRule {
// Following HiveSubQueryFinder has been copied from RexUtil::SubQueryFinder
// since there is BUG in there (CALCITE-1726).
// Once CALCITE-1726 is fixed we should get rid of the following code
- /** Visitor that throws {@link org.apache.calcite.util.Util.FoundOne} if
- * applied to an expression that contains a {@link RexSubQuery}. */
+
+ /**
+ * Visitor that throws {@link org.apache.calcite.util.Util.FoundOne} if
+ * applied to an expression that contains a {@link RexSubQuery}.
+ */
public static final class HiveSubQueryFinder extends RexVisitorImpl<Void> {
public static final HiveSubQueryFinder INSTANCE = new HiveSubQueryFinder();
- /** Returns whether a {@link Project} contains a sub-query. */
- public static final Predicate<RelNode> RELNODE_PREDICATE=
- new Predicate<RelNode>() {
- @Override
- public boolean apply(RelNode relNode) {
- if (relNode instanceof Project) {
- Project project = (Project)relNode;
- for (RexNode node : project.getProjects()) {
- try {
- node.accept(INSTANCE);
- } catch (Util.FoundOne e) {
- return true;
- }
- }
- return false;
- } else if (relNode instanceof Filter) {
- try {
- ((Filter)relNode).getCondition().accept(INSTANCE);
- return false;
- } catch (Util.FoundOne e) {
- return true;
- }
+ /**
+ * Returns whether a {@link Project} contains a sub-query.
+ */
+ public static final Predicate<RelNode> RELNODE_PREDICATE = new Predicate<RelNode>() {
+ @Override public boolean apply(RelNode relNode) {
+ if (relNode instanceof Project) {
+ Project project = (Project) relNode;
+ for (RexNode node : project.getProjects()) {
+ try {
+ node.accept(INSTANCE);
+ } catch (Util.FoundOne e) {
+ return true;
}
+ }
+ return false;
+ } else if (relNode instanceof Filter) {
+ try {
+ ((Filter) relNode).getCondition().accept(INSTANCE);
return false;
+ } catch (Util.FoundOne e) {
+ return true;
}
- };
+ }
+ return false;
+ }
+ };
private HiveSubQueryFinder() {
super(true);
@@ -592,6 +648,17 @@ public class HiveSubQueryRemoveRule extends RelOptRule {
}
}
+ public static void subqueryRestriction(RelNode relNode) {
+ if (relNode instanceof HiveAggregate) {
+ HiveAggregate aggregate = (HiveAggregate) relNode;
+ if (!aggregate.getAggCallList().isEmpty() && aggregate.getGroupSet().isEmpty()) {
+ throw new CalciteSubqueryRuntimeException(
+ "Subquery rewrite: Aggregate without group by is not allowed");
+ }
+ } else if (relNode instanceof HiveProject || relNode instanceof HiveFilter) {
+ subqueryRestriction(relNode.getInput(0));
+ }
+ }
}
// End SubQueryRemoveRule.java
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 7a30239..cce87b4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -139,21 +139,8 @@ import org.apache.hadoop.hive.ql.metadata.NotNullConstraint;
import org.apache.hadoop.hive.ql.metadata.PrimaryKeyInfo;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
-import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException;
+import org.apache.hadoop.hive.ql.optimizer.calcite.*;
import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature;
-import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException;
-import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteViewSemanticException;
-import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
-import org.apache.hadoop.hive.ql.optimizer.calcite.HiveConfPlannerContext;
-import org.apache.hadoop.hive.ql.optimizer.calcite.HiveDefaultRelMetadataProvider;
-import org.apache.hadoop.hive.ql.optimizer.calcite.HivePlannerContext;
-import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
-import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptMaterializationValidator;
-import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil;
-import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRexExecutorImpl;
-import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl;
-import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
-import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil;
import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveAlgorithmsConf;
import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveVolcanoPlanner;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
@@ -595,7 +582,8 @@ public class CalcitePlanner extends SemanticAnalyzer {
this.ctx.setCboInfo("Plan not optimized by CBO.");
}
}
- if( e instanceof CalciteSubquerySemanticException) {
+ if( e instanceof CalciteSubquerySemanticException
+ || e instanceof CalciteSubqueryRuntimeException) {
// non-cbo path retries to execute subqueries and throws completely different exception/error
// to eclipse the original error message
// so avoid executing subqueries on non-cbo
diff --git a/ql/src/test/queries/clientnegative/subquery_any_aggregate.q b/ql/src/test/queries/clientnegative/subquery_any_aggregate.q
new file mode 100644
index 0000000..485a33c
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/subquery_any_aggregate.q
@@ -0,0 +1,10 @@
+create table t(i int, j int);
+insert into t values(0,1), (0,2);
+
+create table tt(i int, j int);
+insert into tt values(0,3);
+
+select * from t where i > ANY (select count(i) from tt where tt.j = t.j);
+
+drop table t;
+drop table tt;
diff --git a/ql/src/test/queries/clientpositive/subquery_ANY.q b/ql/src/test/queries/clientpositive/subquery_ANY.q
index 1c36edb..37dd801 100644
--- a/ql/src/test/queries/clientpositive/subquery_ANY.q
+++ b/ql/src/test/queries/clientpositive/subquery_ANY.q
@@ -7,6 +7,9 @@ create table tempty(i int, j int);
CREATE TABLE part_null_n0 as select * from part;
insert into part_null_n0 values(NULL,NULL,NULL,NULL,NULL, NULL, NULL,NULL,NULL);
+CREATE TABLE part_null_n1 as select * from part;
+insert into part_null_n1 values(17273,NULL,NULL,NULL,NULL, NULL, NULL,NULL,NULL);
+
-- test all six comparison operators
explain cbo select count(*) from part where p_partkey = ANY (select p_partkey from part);
select count(*) from part where p_partkey = ANY (select p_partkey from part);
@@ -76,5 +79,43 @@ select p_partkey, (p_partkey > ANY (select null from part_null_n0)) from part_nu
select p_partkey, (p_partkey > ANY (select i from tempty)) from part_null_n0;
+-- correlated
+explain select * from part where p_partkey > ANY (select p_partkey from part p where p.p_type = part.p_type);
+select * from part where p_partkey > ANY (select p_partkey from part p where p.p_type = part.p_type);
+
+-- correlated, select, with empty results, should produce false
+explain select p_partkey, (p_partkey >= ANY (select p_partkey from part pp where pp.p_type = part.p_name)) from part;
+select p_partkey, (p_partkey >= ANY (select p_partkey from part pp where pp.p_type = part.p_name)) from part;
+
+-- correlated, correlation condtion matches but subquery will not produce result due to false prediate, should produce false
+explain select p_partkey, (p_partkey >= ANY (select p_partkey from part pp where pp.p_type = part.p_type and p_partkey < 0)) from part;
+select p_partkey, (p_partkey >= ANY (select p_partkey from part pp where pp.p_type = part.p_type and p_partkey < 0)) from part;
+
+-- correlated, subquery has match, should produce true
+explain select p_partkey, (p_partkey >= ANY (select p_partkey from part pp where pp.p_type = part.p_type)) from part;
+select p_partkey, (p_partkey >= ANY (select p_partkey from part pp where pp.p_type = part.p_type)) from part;
+
+-- correlated, subquery has match but has NULL for one row, should produce one NULL
+explain select p_partkey, (p_size >= ANY (select 3*p_size from part_null_n1 pp where pp.p_partkey = part.p_partkey)) from part;
+select p_partkey, (p_size >= ANY (select 3*p_size from part_null_n1 pp where pp.p_partkey = part.p_partkey)) from part;
+
+-- correlated, with an aggregate and explicit group by
+explain select p_partkey, (p_partkey >= ANY (select min(p_partkey) from part pp where pp.p_type = part.p_name group by p_partkey)) from part;
+select p_partkey, (p_partkey >= ANY (select min(p_partkey) from part pp where pp.p_type = part.p_name group by p_partkey)) from part;
+
+-- nested
+explain select * from part_null_n1 where p_name IN (select p_name from part where part.p_type = part_null_n1.p_type
+ AND p_size >= ANY(select p_size from part pp where part.p_type = pp.p_type));
+select * from part_null_n1 where p_name IN (select p_name from part where part.p_type = part_null_n1.p_type
+ AND p_size >= ANY(select p_size from part pp where part.p_type = pp.p_type));
+
+-- multi
+explain select * from part_null_n1 where p_name IN (select p_name from part where part.p_type = part_null_n1.p_type)
+ AND p_size >= ANY(select p_size from part pp where part_null_n1.p_type = pp.p_type);
+
+select * from part_null_n1 where p_name IN (select p_name from part where part.p_type = part_null_n1.p_type)
+ AND p_size >= ANY(select p_size from part pp where part_null_n1.p_type = pp.p_type);
+
+DROP TABLE part_null_n1;
DROP TABLE part_null_n0;
DROP TABLE tempty;
diff --git a/ql/src/test/results/clientnegative/subquery_any_aggregate.q.out b/ql/src/test/results/clientnegative/subquery_any_aggregate.q.out
new file mode 100644
index 0000000..5176ed7
--- /dev/null
+++ b/ql/src/test/results/clientnegative/subquery_any_aggregate.q.out
@@ -0,0 +1,37 @@
+PREHOOK: query: create table t(i int, j int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t
+POSTHOOK: query: create table t(i int, j int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t
+PREHOOK: query: insert into t values(0,1), (0,2)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values(0,1), (0,2)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.i SCRIPT []
+POSTHOOK: Lineage: t.j SCRIPT []
+PREHOOK: query: create table tt(i int, j int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tt
+POSTHOOK: query: create table tt(i int, j int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tt
+PREHOOK: query: insert into tt values(0,3)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tt
+POSTHOOK: query: insert into tt values(0,3)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tt
+POSTHOOK: Lineage: tt.i SCRIPT []
+POSTHOOK: Lineage: tt.j SCRIPT []
+FAILED: SemanticException org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubqueryRuntimeException: Subquery rewrite: Aggregate without group by is not allowed
diff --git a/ql/src/test/results/clientpositive/llap/subquery_ANY.q.out b/ql/src/test/results/clientpositive/llap/subquery_ANY.q.out
index cb0ec4b..e77f41a 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_ANY.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_ANY.q.out
@@ -42,6 +42,42 @@ POSTHOOK: Lineage: part_null_n0.p_partkey EXPRESSION []
POSTHOOK: Lineage: part_null_n0.p_retailprice EXPRESSION []
POSTHOOK: Lineage: part_null_n0.p_size EXPRESSION []
POSTHOOK: Lineage: part_null_n0.p_type EXPRESSION []
+PREHOOK: query: CREATE TABLE part_null_n1 as select * from part
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@part
+PREHOOK: Output: database:default
+PREHOOK: Output: default@part_null_n1
+POSTHOOK: query: CREATE TABLE part_null_n1 as select * from part
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@part
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@part_null_n1
+POSTHOOK: Lineage: part_null_n1.p_brand SIMPLE [(part)part.FieldSchema(name:p_brand, type:string, comment:null), ]
+POSTHOOK: Lineage: part_null_n1.p_comment SIMPLE [(part)part.FieldSchema(name:p_comment, type:string, comment:null), ]
+POSTHOOK: Lineage: part_null_n1.p_container SIMPLE [(part)part.FieldSchema(name:p_container, type:string, comment:null), ]
+POSTHOOK: Lineage: part_null_n1.p_mfgr SIMPLE [(part)part.FieldSchema(name:p_mfgr, type:string, comment:null), ]
+POSTHOOK: Lineage: part_null_n1.p_name SIMPLE [(part)part.FieldSchema(name:p_name, type:string, comment:null), ]
+POSTHOOK: Lineage: part_null_n1.p_partkey SIMPLE [(part)part.FieldSchema(name:p_partkey, type:int, comment:null), ]
+POSTHOOK: Lineage: part_null_n1.p_retailprice SIMPLE [(part)part.FieldSchema(name:p_retailprice, type:double, comment:null), ]
+POSTHOOK: Lineage: part_null_n1.p_size SIMPLE [(part)part.FieldSchema(name:p_size, type:int, comment:null), ]
+POSTHOOK: Lineage: part_null_n1.p_type SIMPLE [(part)part.FieldSchema(name:p_type, type:string, comment:null), ]
+PREHOOK: query: insert into part_null_n1 values(17273,NULL,NULL,NULL,NULL, NULL, NULL,NULL,NULL)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@part_null_n1
+POSTHOOK: query: insert into part_null_n1 values(17273,NULL,NULL,NULL,NULL, NULL, NULL,NULL,NULL)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@part_null_n1
+POSTHOOK: Lineage: part_null_n1.p_brand EXPRESSION []
+POSTHOOK: Lineage: part_null_n1.p_comment EXPRESSION []
+POSTHOOK: Lineage: part_null_n1.p_container EXPRESSION []
+POSTHOOK: Lineage: part_null_n1.p_mfgr EXPRESSION []
+POSTHOOK: Lineage: part_null_n1.p_name EXPRESSION []
+POSTHOOK: Lineage: part_null_n1.p_partkey SCRIPT []
+POSTHOOK: Lineage: part_null_n1.p_retailprice EXPRESSION []
+POSTHOOK: Lineage: part_null_n1.p_size EXPRESSION []
+POSTHOOK: Lineage: part_null_n1.p_type EXPRESSION []
PREHOOK: query: explain cbo select count(*) from part where p_partkey = ANY (select p_partkey from part)
PREHOOK: type: QUERY
PREHOOK: Input: default@part
@@ -568,6 +604,1264 @@ POSTHOOK: Input: default@tempty
86428 false
90681 false
NULL false
+PREHOOK: query: explain select * from part where p_partkey > ANY (select p_partkey from part p where p.p_type = part.p_type)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: explain select * from part where p_partkey > ANY (select p_partkey from part p where p.p_type = part.p_type)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE)
+ Reducer 4 <- Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: part
+ Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col4 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col4 (type: string)
+ Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: p
+ filterExpr: p_type is not null (type: boolean)
+ Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: p_type is not null (type: boolean)
+ Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: min(p_partkey), count(), count(p_partkey)
+ keys: p_type (type: string)
+ minReductionHashAggr: 0.5
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 13 Data size: 1612 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 13 Data size: 1612 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col4 (type: string)
+ 1 _col4 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+ Statistics: Num rows: 40 Data size: 25120 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (((_col0 > _col9) and (_col12 is null or (_col10 = 0L)) is not true) or ((_col0 > _col9) and (_col12 is null or (_col10 = 0L)) is not true and (_col0 > _col9) is not true and (_col10 > _col11) is not true)) (type: boolean)
+ Statistics: Num rows: 7 Data size: 4405 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 7 Data size: 4333 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 7 Data size: 4333 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 4
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), count(VALUE._col1), count(VALUE._col2)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 13 Data size: 1612 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), true (type: boolean), _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 13 Data size: 1664 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col4 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col4 (type: string)
+ Statistics: Num rows: 13 Data size: 1664 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: boolean)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select * from part where p_partkey > ANY (select p_partkey from part p where p.p_type = part.p_type)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select * from part where p_partkey > ANY (select p_partkey from part p where p.p_type = part.p_type)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir
+PREHOOK: query: explain select p_partkey, (p_partkey >= ANY (select p_partkey from part pp where pp.p_type = part.p_name)) from part
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: explain select p_partkey, (p_partkey >= ANY (select p_partkey from part pp where pp.p_type = part.p_name)) from part
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE)
+ Reducer 4 <- Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: part
+ Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: p_partkey (type: int), p_name (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: pp
+ filterExpr: p_type is not null (type: boolean)
+ Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: p_type is not null (type: boolean)
+ Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: min(p_partkey), count(), count(p_partkey)
+ keys: p_type (type: string)
+ minReductionHashAggr: 0.5
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 13 Data size: 1612 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 13 Data size: 1612 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col2 (type: string)
+ outputColumnNames: _col0, _col2, _col3, _col5, _col6
+ Statistics: Num rows: 37 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: int), (((_col0 >= _col2) is true and (_col3 is null or _col5) is not true) or (_col6 is true and null and (_col3 is null or _col5) is not true and (_col0 >= _col2) is not true) or ((_col0 >= _col2) and (_col3 is null or _col5) is not true and (_col0 >= _col2) is not true and _col6 is not true)) (type: boolean)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 37 Data size: 296 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 37 Data size: 296 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 4
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), count(VALUE._col1), count(VALUE._col2)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 13 Data size: 1612 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col1 (type: int), true (type: boolean), _col0 (type: string), (_col2 = 0L) (type: boolean), (_col2 > _col3) (type: boolean)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 13 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col2 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: string)
+ Statistics: Num rows: 13 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type: boolean), _col3 (type: boolean), _col4 (type: boolean)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select p_partkey, (p_partkey >= ANY (select p_partkey from part pp where pp.p_type = part.p_name)) from part
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select p_partkey, (p_partkey >= ANY (select p_partkey from part pp where pp.p_type = part.p_name)) from part
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+105685 false
+110592 false
+112398 false
+121152 false
+121152 false
+132666 false
+144293 false
+146985 false
+15103 false
+155733 false
+17273 false
+17927 false
+191709 false
+192697 false
+195606 false
+33357 false
+40982 false
+42669 false
+45261 false
+48427 false
+49671 false
+65667 false
+78486 false
+85768 false
+86428 false
+90681 false
+PREHOOK: query: explain select p_partkey, (p_partkey >= ANY (select p_partkey from part pp where pp.p_type = part.p_type and p_partkey < 0)) from part
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: explain select p_partkey, (p_partkey >= ANY (select p_partkey from part pp where pp.p_type = part.p_type and p_partkey < 0)) from part
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 3 (ONE_TO_ONE_EDGE)
+ Reducer 3 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: part
+ Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: p_partkey (type: int), p_type (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int)
+ Filter Operator
+ predicate: ((p_partkey < 0) and p_type is not null) (type: boolean)
+ Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: min(p_partkey), count(), count(p_partkey)
+ keys: p_type (type: string)
+ minReductionHashAggr: 0.0
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 124 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 124 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col2 (type: string)
+ outputColumnNames: _col0, _col2, _col3, _col5, _col6
+ Statistics: Num rows: 27 Data size: 140 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: int), (((_col0 >= _col2) is true and (_col3 is null or _col5) is not true) or (_col6 is true and null and (_col3 is null or _col5) is not true and (_col0 >= _col2) is not true) or ((_col0 >= _col2) and (_col3 is null or _col5) is not true and (_col0 >= _col2) is not true and _col6 is not true)) (type: boolean)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 27 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 27 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), count(VALUE._col1), count(VALUE._col2)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 124 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col1 (type: int), true (type: boolean), _col0 (type: string), (_col2 = 0L) (type: boolean), (_col2 > _col3) (type: boolean)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col2 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: string)
+ Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type: boolean), _col3 (type: boolean), _col4 (type: boolean)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select p_partkey, (p_partkey >= ANY (select p_partkey from part pp where pp.p_type = part.p_type and p_partkey < 0)) from part
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select p_partkey, (p_partkey >= ANY (select p_partkey from part pp where pp.p_type = part.p_type and p_partkey < 0)) from part
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+105685 false
+110592 false
+112398 false
+121152 false
+121152 false
+132666 false
+144293 false
+146985 false
+15103 false
+155733 false
+17273 false
+17927 false
+191709 false
+192697 false
+195606 false
+33357 false
+40982 false
+42669 false
+45261 false
+48427 false
+49671 false
+65667 false
+78486 false
+85768 false
+86428 false
+90681 false
+PREHOOK: query: explain select p_partkey, (p_partkey >= ANY (select p_partkey from part pp where pp.p_type = part.p_type)) from part
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: explain select p_partkey, (p_partkey >= ANY (select p_partkey from part pp where pp.p_type = part.p_type)) from part
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 3 (ONE_TO_ONE_EDGE)
+ Reducer 3 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: part
+ Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: p_partkey (type: int), p_type (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int)
+ Filter Operator
+ predicate: p_type is not null (type: boolean)
+ Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: min(p_partkey), count(), count(p_partkey)
+ keys: p_type (type: string)
+ minReductionHashAggr: 0.5
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 13 Data size: 1612 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 13 Data size: 1612 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col2 (type: string)
+ outputColumnNames: _col0, _col2, _col3, _col5, _col6
+ Statistics: Num rows: 36 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: int), (((_col0 >= _col2) is true and (_col3 is null or _col5) is not true) or (_col6 is true and null and (_col3 is null or _col5) is not true and (_col0 >= _col2) is not true) or ((_col0 >= _col2) and (_col3 is null or _col5) is not true and (_col0 >= _col2) is not true and _col6 is not true)) (type: boolean)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 36 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 36 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), count(VALUE._col1), count(VALUE._col2)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 13 Data size: 1612 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col1 (type: int), true (type: boolean), _col0 (type: string), (_col2 = 0L) (type: boolean), (_col2 > _col3) (type: boolean)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 13 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col2 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: string)
+ Statistics: Num rows: 13 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type: boolean), _col3 (type: boolean), _col4 (type: boolean)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select p_partkey, (p_partkey >= ANY (select p_partkey from part pp where pp.p_type = part.p_type)) from part
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select p_partkey, (p_partkey >= ANY (select p_partkey from part pp where pp.p_type = part.p_type)) from part
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+105685 true
+110592 true
+112398 true
+121152 true
+121152 true
+132666 true
+144293 true
+146985 true
+15103 true
+155733 true
+17273 true
+17927 true
+191709 true
+192697 true
+195606 true
+33357 true
+40982 true
+42669 true
+45261 true
+48427 true
+49671 true
+65667 true
+78486 true
+85768 true
+86428 true
+90681 true
+PREHOOK: query: explain select p_partkey, (p_size >= ANY (select 3*p_size from part_null_n1 pp where pp.p_partkey = part.p_partkey)) from part
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+PREHOOK: Input: default@part_null_n1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select p_partkey, (p_size >= ANY (select 3*p_size from part_null_n1 pp where pp.p_partkey = part.p_partkey)) from part
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+POSTHOOK: Input: default@part_null_n1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE)
+ Reducer 4 <- Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: part
+ Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: p_partkey (type: int), p_size (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: int)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: pp
+ filterExpr: p_partkey is not null (type: boolean)
+ Statistics: Num rows: 27 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: p_partkey is not null (type: boolean)
+ Statistics: Num rows: 27 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: p_partkey (type: int), (3 * p_size) (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 27 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: min(_col1), count(), count(_col1)
+ keys: _col0 (type: int)
+ minReductionHashAggr: 0.5185185
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 13 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 13 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col2 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6
+ Statistics: Num rows: 39 Data size: 536 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: int), (((_col1 >= _col2) is true and (_col3 is null or _col5) is not true) or (_col6 is true and null and (_col3 is null or _col5) is not true and (_col1 >= _col2) is not true) or ((_col1 >= _col2) and (_col3 is null or _col5) is not true and (_col1 >= _col2) is not true and _col6 is not true)) (type: boolean)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 39 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 39 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 4
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), count(VALUE._col1), count(VALUE._col2)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 13 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col1 (type: int), true (type: boolean), _col0 (type: int), (_col2 = 0L) (type: boolean), (_col2 > _col3) (type: boolean)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col2 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: int)
+ Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type: boolean), _col3 (type: boolean), _col4 (type: boolean)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select p_partkey, (p_size >= ANY (select 3*p_size from part_null_n1 pp where pp.p_partkey = part.p_partkey)) from part
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+PREHOOK: Input: default@part_null_n1
+#### A masked pattern was here ####
+POSTHOOK: query: select p_partkey, (p_size >= ANY (select 3*p_size from part_null_n1 pp where pp.p_partkey = part.p_partkey)) from part
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+POSTHOOK: Input: default@part_null_n1
+#### A masked pattern was here ####
+105685 false
+110592 false
+112398 false
+121152 false
+121152 false
+132666 false
+144293 false
+146985 false
+15103 false
+155733 false
+17273 NULL
+17927 false
+191709 false
+192697 false
+195606 false
+33357 false
+40982 false
+42669 false
+45261 false
+48427 false
+49671 false
+65667 false
+78486 false
+85768 false
+86428 false
+90681 false
+PREHOOK: query: explain select p_partkey, (p_partkey >= ANY (select min(p_partkey) from part pp where pp.p_type = part.p_name group by p_partkey)) from part
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: explain select p_partkey, (p_partkey >= ANY (select min(p_partkey) from part pp where pp.p_type = part.p_name group by p_partkey)) from part
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
+ Reducer 4 <- Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: part
+ Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: p_partkey (type: int), p_name (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: pp
+ filterExpr: p_type is not null (type: boolean)
+ Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: p_type is not null (type: boolean)
+ Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: min(p_partkey)
+ keys: p_type (type: string), p_partkey (type: int)
+ minReductionHashAggr: 0.0
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col2 (type: int)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col2 (type: string)
+ outputColumnNames: _col0, _col2, _col3, _col5, _col6
+ Statistics: Num rows: 37 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: int), (((_col0 >= _col2) is true and (_col3 is null or _col5) is not true) or (_col6 is true and null and (_col3 is null or _col5) is not true and (_col0 >= _col2) is not true) or ((_col0 >= _col2) and (_col3 is null or _col5) is not true and (_col0 >= _col2) is not true and _col6 is not true)) (type: boolean)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 37 Data size: 296 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 37 Data size: 296 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 4
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0)
+ keys: KEY._col0 (type: string), KEY._col1 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), _col2 (type: int)
+ outputColumnNames: _col1, _col2
+ Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: min(_col2), count(), count(_col2)
+ keys: _col1 (type: string)
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 13 Data size: 1612 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col1 (type: int), true (type: boolean), _col0 (type: string), (_col2 = 0L) (type: boolean), (_col2 > _col3) (type: boolean)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 13 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col2 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col2 (type: string)
+ Statistics: Num rows: 13 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type: boolean), _col3 (type: boolean), _col4 (type: boolean)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select p_partkey, (p_partkey >= ANY (select min(p_partkey) from part pp where pp.p_type = part.p_name group by p_partkey)) from part
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+#### A masked pattern was here ####
+POSTHOOK: query: select p_partkey, (p_partkey >= ANY (select min(p_partkey) from part pp where pp.p_type = part.p_name group by p_partkey)) from part
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+#### A masked pattern was here ####
+105685 false
+110592 false
+112398 false
+121152 false
+121152 false
+132666 false
+144293 false
+146985 false
+15103 false
+155733 false
+17273 false
+17927 false
+191709 false
+192697 false
+195606 false
+33357 false
+40982 false
+42669 false
+45261 false
+48427 false
+49671 false
+65667 false
+78486 false
+85768 false
+86428 false
+90681 false
+PREHOOK: query: explain select * from part_null_n1 where p_name IN (select p_name from part where part.p_type = part_null_n1.p_type
+ AND p_size >= ANY(select p_size from part pp where part.p_type = pp.p_type))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+PREHOOK: Input: default@part_null_n1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select * from part_null_n1 where p_name IN (select p_name from part where part.p_type = part_null_n1.p_type
+ AND p_size >= ANY(select p_size from part pp where part.p_type = pp.p_type))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+POSTHOOK: Input: default@part_null_n1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
+ Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE)
+ Reducer 6 <- Map 5 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: part_null_n1
+ filterExpr: (p_type is not null and p_name is not null) (type: boolean)
+ Statistics: Num rows: 27 Data size: 16713 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (p_type is not null and p_name is not null) (type: boolean)
+ Statistics: Num rows: 25 Data size: 15475 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 25 Data size: 15475 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col1 (type: string), _col4 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col1 (type: string), _col4 (type: string)
+ Statistics: Num rows: 25 Data size: 15475 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: part
+ filterExpr: (p_type is not null and p_name is not null) (type: boolean)
+ Statistics: Num rows: 26 Data size: 5954 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (p_type is not null and p_name is not null) (type: boolean)
+ Statistics: Num rows: 26 Data size: 5954 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: p_name (type: string), p_type (type: string), p_size (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 26 Data size: 5954 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 26 Data size: 5954 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: string), _col2 (type: int)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: pp
+ filterExpr: p_type is not null (type: boolean)
+ Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: p_type is not null (type: boolean)
+ Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: min(p_size), count(), count(p_size)
+ keys: p_type (type: string)
+ minReductionHashAggr: 0.5
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 13 Data size: 1612 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 13 Data size: 1612 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Semi Join 0 to 1
+ keys:
+ 0 _col1 (type: string), _col4 (type: string)
+ 1 _col0 (type: string), _col1 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 3 Data size: 1857 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3 Data size: 1857 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 4
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col4 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 40 Data size: 9520 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (((_col2 >= _col3) and (_col6 is null or (_col4 = 0L)) is not true) or ((_col2 >= _col3) and (_col6 is null or (_col4 = 0L)) is not true and (_col2 >= _col3) is not true and (_col4 > _col5) is not true)) (type: boolean)
+ Statistics: Num rows: 7 Data size: 1675 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 7 Data size: 1575 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: _col0 (type: string), _col1 (type: string)
+ minReductionHashAggr: 0.28571427
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 3 Data size: 675 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 3 Data size: 675 Basic stats: COMPLETE Column stats: COMPLETE
+ Reducer 6
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), count(VALUE._col1), count(VALUE._col2)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 13 Data size: 1612 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), true (type: boolean), _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 13 Data size: 1664 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col4 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col4 (type: string)
+ Statistics: Num rows: 13 Data size: 1664 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: boolean)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select * from part_null_n1 where p_name IN (select p_name from part where part.p_type = part_null_n1.p_type
+ AND p_size >= ANY(select p_size from part pp where part.p_type = pp.p_type))
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+PREHOOK: Input: default@part_null_n1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from part_null_n1 where p_name IN (select p_name from part where part.p_type = part_null_n1.p_type
+ AND p_size >= ANY(select p_size from part pp where part.p_type = pp.p_type))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+POSTHOOK: Input: default@part_null_n1
+#### A masked pattern was here ####
+105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ
+110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously
+112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car
+121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h
+121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h
+132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even
+144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about
+146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref
+15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu
+155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra
+17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the
+17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve
+191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle
+192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir
+195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de
+33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful
+40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s
+42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl
+45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful
+48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i
+49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick
+65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr
+78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith
+85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull
+86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully
+90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl
+PREHOOK: query: explain select * from part_null_n1 where p_name IN (select p_name from part where part.p_type = part_null_n1.p_type)
+ AND p_size >= ANY(select p_size from part pp where part_null_n1.p_type = pp.p_type)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+PREHOOK: Input: default@part_null_n1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select * from part_null_n1 where p_name IN (select p_name from part where part.p_type = part_null_n1.p_type)
+ AND p_size >= ANY(select p_size from part pp where part_null_n1.p_type = pp.p_type)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+POSTHOOK: Input: default@part_null_n1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE)
+ Reducer 6 <- Map 5 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: part_null_n1
+ filterExpr: (p_type is not null and p_name is not null) (type: boolean)
+ Statistics: Num rows: 27 Data size: 16713 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (p_type is not null and p_name is not null) (type: boolean)
+ Statistics: Num rows: 25 Data size: 15475 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 25 Data size: 15475 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col4 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col4 (type: string), _col1 (type: string)
+ Statistics: Num rows: 25 Data size: 15475 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: part
+ filterExpr: (p_type is not null and p_name is not null) (type: boolean)
+ Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (p_type is not null and p_name is not null) (type: boolean)
+ Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: p_type (type: string), p_name (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: _col0 (type: string), _col1 (type: string)
+ minReductionHashAggr: 0.0
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 13 Data size: 2925 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 13 Data size: 2925 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: pp
+ filterExpr: p_type is not null (type: boolean)
+ Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: p_type is not null (type: boolean)
+ Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: min(p_size), count(), count(p_size)
+ keys: p_type (type: string)
+ minReductionHashAggr: 0.5
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 13 Data size: 1612 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 13 Data size: 1612 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint)
+ Execution mode: vectorized, llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Semi Join 0 to 1
+ keys:
+ 0 _col4 (type: string), _col1 (type: string)
+ 1 _col0 (type: string), _col1 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 13 Data size: 8047 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col4 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col4 (type: string)
+ Statistics: Num rows: 13 Data size: 8047 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col4 (type: string)
+ 1 _col4 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
+ Statistics: Num rows: 26 Data size: 16430 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (((_col5 >= _col9) and (_col12 is null or (_col10 = 0L)) is not true) or ((_col5 >= _col9) and (_col12 is null or (_col10 = 0L)) is not true and (_col5 >= _col9) is not true and (_col10 > _col11) is not true)) (type: boolean)
+ Statistics: Num rows: 5 Data size: 3167 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ Statistics: Num rows: 5 Data size: 3095 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 5 Data size: 3095 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 6
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), count(VALUE._col1), count(VALUE._col2)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 13 Data size: 1612 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), true (type: boolean), _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 13 Data size: 1664 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col4 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col4 (type: string)
+ Statistics: Num rows: 13 Data size: 1664 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: boolean)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select * from part_null_n1 where p_name IN (select p_name from part where part.p_type = part_null_n1.p_type)
+ AND p_size >= ANY(select p_size from part pp where part_null_n1.p_type = pp.p_type)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part
+PREHOOK: Input: default@part_null_n1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from part_null_n1 where p_name IN (select p_name from part where part.p_type = part_null_n1.p_type)
+ AND p_size >= ANY(select p_size from part pp where part_null_n1.p_type = pp.p_type)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part
+POSTHOOK: Input: default@part_null_n1
+#### A masked pattern was here ####
+105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ
+110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously
+112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car
+121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h
+121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h
+132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even
+144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about
+146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref
+15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu
+155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra
+17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the
+17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve
+191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle
+192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir
+195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de
+33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful
+40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s
+42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl
+45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful
+48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i
+49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick
+65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr
+78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith
+85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull
+86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully
+90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl
+PREHOOK: query: DROP TABLE part_null_n1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@part_null_n1
+PREHOOK: Output: default@part_null_n1
+POSTHOOK: query: DROP TABLE part_null_n1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@part_null_n1
+POSTHOOK: Output: default@part_null_n1
PREHOOK: query: DROP TABLE part_null_n0
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@part_null_n0