You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by vg...@apache.org on 2017/11/07 06:27:36 UTC
[17/17] hive git commit: HIVE-17767 Rewrite correlated EXISTS/IN
subqueries into LEFT SEMI JOIN (Vineet Garg, reviewed by Ashutosh Chauhan)
HIVE-17767 Rewrite correlated EXISTS/IN subqueries into LEFT SEMI JOIN (Vineet Garg, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/aee0eaa0
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/aee0eaa0
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/aee0eaa0
Branch: refs/heads/master
Commit: aee0eaa032e94f4562c1f07940e4cb391c9bcafd
Parents: 7195aee
Author: Vineet Garg <vg...@apache.com>
Authored: Mon Nov 6 22:26:50 2017 -0800
Committer: Vineet Garg <vg...@apache.com>
Committed: Mon Nov 6 22:26:50 2017 -0800
----------------------------------------------------------------------
.../test/resources/testconfiguration.properties | 3 +-
.../calcite/HiveSubQRemoveRelBuilder.java | 28 +-
.../calcite/rules/HiveRelDecorrelator.java | 385 ++-
.../calcite/rules/HiveSubQueryRemoveRule.java | 20 +-
.../translator/PlanModifierForASTConv.java | 5 +-
.../hadoop/hive/ql/parse/CalcitePlanner.java | 5 +-
.../queries/clientpositive/subquery_exists.q | 6 +-
.../test/queries/clientpositive/subquery_in.q | 50 +-
.../clientpositive/constprog_partitioner.q.out | 6 +-
.../llap/constprog_semijoin.q.out | 40 +-
.../llap/dynamic_semijoin_reduction_2.q.out | 90 +-
.../clientpositive/llap/explainuser_1.q.out | 140 +-
.../clientpositive/llap/subquery_exists.q.out | 309 +-
.../clientpositive/llap/subquery_in.q.out | 1528 +++++++---
.../llap/subquery_in_having.q.out | 2079 +++++++++++++
.../clientpositive/llap/subquery_multi.q.out | 627 ++--
.../clientpositive/llap/subquery_notin.q.out | 170 +-
.../clientpositive/llap/subquery_scalar.q.out | 103 +-
.../clientpositive/llap/subquery_select.q.out | 162 +-
.../clientpositive/llap/subquery_views.q.out | 82 +-
.../llap/vector_mapjoin_reduce.q.out | 29 +-
.../results/clientpositive/masking_12.q.out | 43 +-
.../test/results/clientpositive/masking_3.q.out | 237 +-
.../test/results/clientpositive/masking_4.q.out | 43 +-
.../clientpositive/perf/spark/query10.q.out | 176 +-
.../clientpositive/perf/spark/query16.q.out | 234 +-
.../clientpositive/perf/spark/query35.q.out | 224 +-
.../clientpositive/perf/spark/query69.q.out | 155 +-
.../clientpositive/perf/spark/query94.q.out | 234 +-
.../clientpositive/perf/tez/query10.q.out | 272 +-
.../clientpositive/perf/tez/query14.q.out | 4 +-
.../clientpositive/perf/tez/query16.q.out | 255 +-
.../clientpositive/perf/tez/query23.q.out | 2 +-
.../clientpositive/perf/tez/query35.q.out | 274 +-
.../clientpositive/perf/tez/query69.q.out | 248 +-
.../clientpositive/perf/tez/query94.q.out | 255 +-
.../test/results/clientpositive/semijoin5.q.out | 63 +-
.../spark/constprog_partitioner.q.out | 6 +-
.../spark/constprog_semijoin.q.out | 57 +-
.../spark/spark_explainuser_1.q.out | 136 +-
.../clientpositive/spark/subquery_exists.q.out | 317 +-
.../clientpositive/spark/subquery_in.q.out | 1571 ++++++----
.../clientpositive/spark/subquery_multi.q.out | 681 ++---
.../clientpositive/spark/subquery_notin.q.out | 136 +-
.../clientpositive/spark/subquery_scalar.q.out | 116 +-
.../clientpositive/spark/subquery_select.q.out | 141 +-
.../clientpositive/spark/subquery_views.q.out | 36 +-
.../spark/vector_mapjoin_reduce.q.out | 15 +-
.../clientpositive/subquery_exists.q.out | 347 +--
.../clientpositive/subquery_exists_having.q.out | 38 +-
.../clientpositive/subquery_in_having.q.out | 2770 ------------------
.../clientpositive/subquery_notexists.q.out | 32 +-
.../subquery_notexists_having.q.out | 32 +-
.../clientpositive/subquery_notin_having.q.out | 26 +-
.../subquery_unqualcolumnrefs.q.out | 197 +-
.../clientpositive/vector_mapjoin_reduce.q.out | 444 ---
56 files changed, 7625 insertions(+), 8059 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/aee0eaa0/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 9b0bace..46abf8a 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -353,7 +353,6 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\
vector_left_outer_join.q,\
vector_left_outer_join2.q,\
vector_leftsemi_mapjoin.q,\
- vector_mapjoin_reduce.q,\
vector_mr_diff_schema_alias.q,\
vector_multi_insert.q,\
vector_null_projection.q,\
@@ -621,6 +620,7 @@ minillaplocal.query.files=\
special_character_in_tabnames_1.q,\
sqlmerge.q,\
stats_based_fetch_decision.q,\
+ subquery_in_having.q,\
subquery_notin.q,\
subquery_nested_subquery.q, \
subquery_select.q, \
@@ -680,6 +680,7 @@ minillaplocal.query.files=\
vector_join30.q,\
vector_join_filters.q,\
vector_leftsemi_mapjoin.q,\
+ vector_mapjoin_reduce.q,\
vector_number_compare_projection.q,\
vector_partitioned_date_time.q,\
vector_ptf_part_simple.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/aee0eaa0/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java
index 3a1897f..c6a5ce2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java
@@ -1109,10 +1109,8 @@ public class HiveSubQRemoveRelBuilder {
return getRexBuilder().makeFieldAccess(e, name, false);
}
- /** Creates a {@link org.apache.calcite.rel.core.Join} with correlating
- * variables. */
public HiveSubQRemoveRelBuilder join(JoinRelType joinType, RexNode condition,
- Set<CorrelationId> variablesSet) {
+ Set<CorrelationId> variablesSet, boolean createSemiJoin) {
Frame right = stack.pop();
final Frame left = stack.pop();
final RelNode join;
@@ -1121,10 +1119,10 @@ public class HiveSubQRemoveRelBuilder {
if (correlate) {
final CorrelationId id = Iterables.getOnlyElement(variablesSet);
final ImmutableBitSet requiredColumns =
- RelOptUtil.correlationColumns(id, right.rel);
+ RelOptUtil.correlationColumns(id, right.rel);
if (!RelOptUtil.notContainsCorrelation(left.rel, id, Litmus.IGNORE)) {
throw new IllegalArgumentException("variable " + id
- + " must not be used by left input to correlation");
+ + " must not be used by left input to correlation");
}
switch (joinType) {
case LEFT:
@@ -1138,11 +1136,18 @@ public class HiveSubQRemoveRelBuilder {
default:
postCondition = condition;
}
- join = correlateFactory.createCorrelate(left.rel, right.rel, id,
- requiredColumns, SemiJoinType.of(joinType));
+ if(createSemiJoin) {
+ join = correlateFactory.createCorrelate(left.rel, right.rel, id,
+ requiredColumns, SemiJoinType.SEMI);
+ }
+ else {
+ join = correlateFactory.createCorrelate(left.rel, right.rel, id,
+ requiredColumns, SemiJoinType.of(joinType));
+
+ }
} else {
join = joinFactory.createJoin(left.rel, right.rel, condition,
- variablesSet, joinType, false);
+ variablesSet, joinType, false);
}
final List<Pair<String, RelDataType>> pairs = new ArrayList<>();
pairs.addAll(left.right);
@@ -1152,6 +1157,13 @@ public class HiveSubQRemoveRelBuilder {
return this;
}
+ /** Creates a {@link org.apache.calcite.rel.core.Join} with correlating
+ * variables. */
+ public HiveSubQRemoveRelBuilder join(JoinRelType joinType, RexNode condition,
+ Set<CorrelationId> variablesSet) {
+ return join(joinType, condition, variablesSet, false) ;
+ }
+
/** Creates a {@link org.apache.calcite.rel.core.Join} using USING syntax.
*
* <p>For each of the field names, both left and right inputs must have a
http://git-wip-us.apache.org/repos/asf/hive/blob/aee0eaa0/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java
index 62125f0..d1fe49c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java
@@ -61,8 +61,10 @@ import org.apache.calcite.rel.core.Values;
import org.apache.calcite.rel.logical.LogicalAggregate;
import org.apache.calcite.rel.logical.LogicalCorrelate;
import org.apache.calcite.rel.logical.LogicalFilter;
+import org.apache.calcite.rel.logical.LogicalIntersect;
import org.apache.calcite.rel.logical.LogicalJoin;
import org.apache.calcite.rel.logical.LogicalProject;
+import org.apache.calcite.rel.logical.LogicalUnion;
import org.apache.calcite.rel.metadata.RelMdUtil;
import org.apache.calcite.rel.metadata.RelMetadataQuery;
import org.apache.calcite.rel.rules.FilterJoinRule;
@@ -77,10 +79,12 @@ import org.apache.calcite.rex.RexFieldAccess;
import org.apache.calcite.rex.RexInputRef;
import org.apache.calcite.rex.RexLiteral;
import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexOver;
import org.apache.calcite.rex.RexShuttle;
import org.apache.calcite.rex.RexSubQuery;
import org.apache.calcite.rex.RexUtil;
import org.apache.calcite.rex.RexVisitorImpl;
+import org.apache.calcite.sql.SemiJoinType;
import org.apache.calcite.sql.SqlFunction;
import org.apache.calcite.sql.SqlKind;
import org.apache.calcite.sql.SqlOperator;
@@ -92,6 +96,7 @@ import org.apache.calcite.tools.RelBuilder;
import org.apache.calcite.util.Bug;
import org.apache.calcite.util.Holder;
import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.calcite.util.ImmutableIntList;
import org.apache.calcite.util.Litmus;
import org.apache.calcite.util.Pair;
import org.apache.calcite.util.ReflectUtil;
@@ -99,12 +104,17 @@ import org.apache.calcite.util.ReflectiveVisitor;
import org.apache.calcite.util.Stacks;
import org.apache.calcite.util.Util;
import org.apache.calcite.util.mapping.Mappings;
+import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelShuttleImpl;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIntersect;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -122,6 +132,8 @@ import com.google.common.collect.Multimaps;
import com.google.common.collect.Sets;
import com.google.common.collect.SortedSetMultimap;
+import java.util.Stack;
+
/**
* NOTE: this whole logic is replicated from Calcite's RelDecorrelator
* and is exteneded to make it suitable for HIVE
@@ -176,6 +188,8 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
private final HashSet<LogicalCorrelate> generatedCorRels = Sets.newHashSet();
+ private Stack valueGen = new Stack();
+
//~ Constructors -----------------------------------------------------------
private HiveRelDecorrelator (
@@ -262,6 +276,8 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
return planner2.findBestExp();
}
+ assert(valueGen.isEmpty());
+
return root;
}
@@ -319,8 +335,14 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
return planner.findBestExp();
}
+ protected RexNode decorrelateExpr(RexNode exp, boolean valueGenerator) {
+ DecorrelateRexShuttle shuttle = new DecorrelateRexShuttle();
+ shuttle.setValueGenerator(valueGenerator);
+ return exp.accept(shuttle);
+ }
protected RexNode decorrelateExpr(RexNode exp) {
DecorrelateRexShuttle shuttle = new DecorrelateRexShuttle();
+ shuttle.setValueGenerator(true);
return exp.accept(shuttle);
}
@@ -1106,7 +1128,11 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
try {
findCorrelationEquivalent(correlation, ((Filter) rel).getCondition());
} catch (Util.FoundOne e) {
- map.put(def, (Integer) e.getNode());
+ // we need to keep predicate kind e.g. EQUAL or NOT EQUAL
+ // so that later while decorrelating LogicalCorrelate appropriate join predicate
+ // is generated
+ def.setPredicateKind((SqlKind)((Pair)e.getNode()).getValue());
+ map.put(def, (Integer)((Pair) e.getNode()).getKey());
}
}
// If all correlation variables are now satisfied, skip creating a value
@@ -1145,16 +1171,23 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
private void findCorrelationEquivalent(CorRef correlation, RexNode e)
throws Util.FoundOne {
switch (e.getKind()) {
- case EQUALS:
+ // TODO: for now only EQUAL and NOT EQUAL corr predicates are optimized
+ //optimize rest of the predicates
+ case NOT_EQUALS:
+ if((boolean)valueGen.peek()) {
+ // we will need value generator
+ break;
+ }
+ case EQUALS:
final RexCall call = (RexCall) e;
final List<RexNode> operands = call.getOperands();
if (references(operands.get(0), correlation)
&& operands.get(1) instanceof RexInputRef) {
- throw new Util.FoundOne(((RexInputRef) operands.get(1)).getIndex());
+ throw new Util.FoundOne(Pair.of(((RexInputRef) operands.get(1)).getIndex(), e.getKind()));
}
if (references(operands.get(1), correlation)
&& operands.get(0) instanceof RexInputRef) {
- throw new Util.FoundOne(((RexInputRef) operands.get(0)).getIndex());
+ throw new Util.FoundOne(Pair.of(((RexInputRef) operands.get(0)).getIndex(), e.getKind()));
}
break;
case AND:
@@ -1223,17 +1256,38 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
return null;
}
+ Frame oldInputFrame = frame;
// If this LogicalFilter has correlated reference, create value generator
// and produce the correlated variables in the new output.
if (cm.mapRefRelToCorRef.containsKey(rel)) {
frame = decorrelateInputWithValueGenerator(rel);
}
- // Replace the filter expression to reference output of the join
- // Map filter to the new filter over join
- relBuilder.push(frame.r).filter(
- simplifyComparison(decorrelateExpr(rel.getCondition())));
+ boolean valueGenerator = true;
+ if(frame.r == oldInputFrame.r) {
+ // this means correated value generator wasn't generated
+ valueGenerator = false;
+ }
+ if(oldInput instanceof LogicalCorrelate && ((LogicalCorrelate) oldInput).getJoinType() == SemiJoinType.SEMI
+ && !cm.mapRefRelToCorRef.containsKey(rel)) {
+ // this conditions need to be pushed into semi-join since this condition
+ // corresponds to IN
+ HiveSemiJoin join = ((HiveSemiJoin)frame.r);
+ final List<RexNode> conditions = new ArrayList<>();
+ RexNode joinCond = join.getCondition();
+ conditions.add(joinCond);
+ conditions.add(decorrelateExpr(rel.getCondition(), valueGenerator));
+ final RexNode condition =
+ RexUtil.composeConjunction(rexBuilder, conditions, false);
+ RelNode newRel = HiveSemiJoin.getSemiJoin(frame.r.getCluster(), frame.r.getTraitSet(), join.getLeft(), join.getRight(),
+ condition,join.getLeftKeys(), join.getRightKeys());
+ return register(rel, newRel, frame.oldToNewOutputs, frame.corDefOutputs);
+ }
+ // Replace the filter expression to reference output of the join
+ // Map filter to the new filter over join
+ relBuilder.push(frame.r).filter(
+ (decorrelateExpr(rel.getCondition(), valueGenerator)));
// Filter does not change the input ordering.
// Filter rel does not permute the input.
// All corvars produced by filter will have the same output positions in the
@@ -1243,39 +1297,6 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
}
}
- private RexNode simplifyComparison(RexNode op) {
- switch(op.getKind()) {
- case EQUALS:
- case GREATER_THAN:
- case GREATER_THAN_OR_EQUAL:
- case LESS_THAN:
- case LESS_THAN_OR_EQUAL:
- case NOT_EQUALS:
- RexCall e = (RexCall) op;
- final List<RexNode> operands = new ArrayList<>(e.operands);
-
- // Simplify "x <op> x"
- final RexNode o0 = operands.get(0);
- final RexNode o1 = operands.get(1);
- // this should only be called when we are creating filter (decorrelate filter)
- // since in that case null/unknown is treated as false we don't care about
- // nullability of operands and will always rewrite op=op to op is not null
- if (RexUtil.eq(o0, o1) )
- switch (e.getKind()) {
- case EQUALS:
- case GREATER_THAN_OR_EQUAL:
- case LESS_THAN_OR_EQUAL:
- // "x = x" simplifies to "x is not null" (similarly <= and >=)
- return rexBuilder.makeCall(SqlStdOperatorTable.IS_NOT_NULL, o0);
- default:
- // "x != x" simplifies to "false" (similarly < and >)
- return rexBuilder.makeLiteral(false);
- }
- }
- return op;
- }
-
-
/**
* Rewrite LogicalFilter.
*
@@ -1312,9 +1333,31 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
}
+ boolean valueGenerator = true;
+ if(frame.r == oldInput) {
+ // this means correated value generator wasn't generated
+ valueGenerator = false;
+ }
+
+ if(oldInput instanceof LogicalCorrelate && ((LogicalCorrelate) oldInput).getJoinType() == SemiJoinType.SEMI
+ && !cm.mapRefRelToCorRef.containsKey(rel)) {
+ // this conditions need to be pushed into semi-join since this condition
+ // corresponds to IN
+ HiveSemiJoin join = ((HiveSemiJoin)frame.r);
+ final List<RexNode> conditions = new ArrayList<>();
+ RexNode joinCond = join.getCondition();
+ conditions.add(joinCond);
+ conditions.add(decorrelateExpr(rel.getCondition(), valueGenerator));
+ final RexNode condition =
+ RexUtil.composeConjunction(rexBuilder, conditions, false);
+ RelNode newRel = HiveSemiJoin.getSemiJoin(frame.r.getCluster(), frame.r.getTraitSet(), join.getLeft(), join.getRight(),
+ condition,join.getLeftKeys(), join.getRightKeys());
+ return register(rel, newRel, frame.oldToNewOutputs, frame.corDefOutputs);
+ }
+
// Replace the filter expression to reference output of the join
// Map filter to the new filter over join
- relBuilder.push(frame.r).filter(decorrelateExpr(rel.getCondition()));
+ relBuilder.push(frame.r).filter(decorrelateExpr(rel.getCondition(), valueGenerator));
// Filter does not change the input ordering.
@@ -1344,6 +1387,9 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
final RelNode oldLeft = rel.getInput(0);
final RelNode oldRight = rel.getInput(1);
+ boolean mightRequireValueGen = new findIfValueGenRequired().traverse(oldRight);
+ valueGen.push(mightRequireValueGen);
+
final Frame leftFrame = getInvoke(oldLeft, rel);
final Frame rightFrame = getInvoke(oldRight, rel);
@@ -1380,11 +1426,24 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
}
final int newLeftPos = leftFrame.oldToNewOutputs.get(corDef.field);
final int newRightPos = rightOutput.getValue();
- conditions.add(
- rexBuilder.makeCall(SqlStdOperatorTable.EQUALS,
- RexInputRef.of(newLeftPos, newLeftOutput),
- new RexInputRef(newLeftFieldCount + newRightPos,
- newRightOutput.get(newRightPos).getType())));
+ if(corDef.getPredicateKind() == SqlKind.NOT_EQUALS) {
+ conditions.add(
+ rexBuilder.makeCall(SqlStdOperatorTable.NOT_EQUALS,
+ RexInputRef.of(newLeftPos, newLeftOutput),
+ new RexInputRef(newLeftFieldCount + newRightPos,
+ newRightOutput.get(newRightPos).getType())));
+
+ }
+ else {
+ assert(corDef.getPredicateKind() == null
+ || corDef.getPredicateKind() == SqlKind.EQUALS);
+ conditions.add(
+ rexBuilder.makeCall(SqlStdOperatorTable.EQUALS,
+ RexInputRef.of(newLeftPos, newLeftOutput),
+ new RexInputRef(newLeftFieldCount + newRightPos,
+ newRightOutput.get(newRightPos).getType())));
+
+ }
// remove this cor var from output position mapping
corDefOutputs.remove(corDef);
@@ -1408,24 +1467,40 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
int oldLeftFieldCount = oldLeft.getRowType().getFieldCount();
int oldRightFieldCount = oldRight.getRowType().getFieldCount();
- assert rel.getRowType().getFieldCount()
- == oldLeftFieldCount + oldRightFieldCount;
// Left input positions are not changed.
mapOldToNewOutputs.putAll(leftFrame.oldToNewOutputs);
- // Right input positions are shifted by newLeftFieldCount.
- for (int i = 0; i < oldRightFieldCount; i++) {
- mapOldToNewOutputs.put(
- i + oldLeftFieldCount,
- rightFrame.oldToNewOutputs.get(i) + newLeftFieldCount);
- }
final RexNode condition =
- RexUtil.composeConjunction(rexBuilder, conditions, false);
- RelNode newJoin =
- LogicalJoin.create(leftFrame.r, rightFrame.r, condition,
- ImmutableSet.<CorrelationId>of(), rel.getJoinType().toJoinType());
+ RexUtil.composeConjunction(rexBuilder, conditions, false);
+ RelNode newJoin = null;
+
+ // this indicates original query was either correlated EXISTS or IN
+ if(rel.getJoinType() == SemiJoinType.SEMI) {
+ final List<Integer> leftKeys = new ArrayList<Integer>();
+ final List<Integer> rightKeys = new ArrayList<Integer>();
+
+ RelNode[] inputRels = new RelNode[] { leftFrame.r, rightFrame.r};
+ newJoin = HiveSemiJoin.getSemiJoin(rel.getCluster(), rel.getCluster().traitSetOf(HiveRelNode.CONVENTION),
+ leftFrame.r, rightFrame.r, condition, ImmutableIntList.copyOf(leftKeys),
+ ImmutableIntList.copyOf(rightKeys));
+
+ }
+ else {
+ // Right input positions are shifted by newLeftFieldCount.
+ for (int i = 0; i < oldRightFieldCount; i++) {
+ mapOldToNewOutputs.put(
+ i + oldLeftFieldCount,
+ rightFrame.oldToNewOutputs.get(i) + newLeftFieldCount);
+ }
+
+ newJoin = LogicalJoin.create(leftFrame.r, rightFrame.r, condition,
+ ImmutableSet.<CorrelationId>of(), rel.getJoinType().toJoinType());
+
+ }
+
+ valueGen.pop();
return register(rel, newJoin, mapOldToNewOutputs, corDefOutputs);
}
@@ -1545,6 +1620,7 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
assert currentRel != null;
int oldOrdinal = oldInputRef.getIndex();
+ int oldOrdinalNo = oldOrdinal;
int newOrdinal = 0;
// determine which input rel oldOrdinal references, and adjust
@@ -1563,6 +1639,17 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
oldOrdinal -= n;
}
+ if(oldInput == null) {
+ if(currentRel.getInputs().size() == 1 && currentRel.getInput(0) instanceof LogicalCorrelate) {
+ final Frame newFrame = map.get(currentRel.getInput(0));
+ if(newFrame.r instanceof HiveSemiJoin) {
+ int oldFieldSize = currentRel.getInput(0).getRowType().getFieldCount();
+ int newOrd = newFrame.r.getRowType().getFieldCount() + oldOrdinalNo - oldFieldSize;
+ return new RexInputRef(newOrd, oldInputRef.getType());
+ }
+ }
+ }
+
assert oldInput != null;
final Frame frame = map.get(oldInput);
@@ -1819,7 +1906,66 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
/** Shuttle that decorrelates. */
private class DecorrelateRexShuttle extends RexShuttle {
+ private boolean valueGenerator;
+ public void setValueGenerator(boolean valueGenerator) {
+ this.valueGenerator = valueGenerator;
+ }
+
+ // DecorrelateRexShuttle ends up decorrelating expressions cor.col1 <> $4
+ // to $4=$4 if value generator is not generated, $4<>$4 is further simplified
+ // to false. This is wrong and messes up the whole tree. To prevent this visitCall
+ // is overridden to rewrite/simply such predicates to is not null.
+ // we also need to take care that we do this only for correlated predicates and
+ // not user specified explicit predicates
+ // TODO: This code should be removed once CALCITE-1851 is fixed and
+ // there is support of not equal
+ @Override public RexNode visitCall(final RexCall call) {
+ if(!valueGenerator) {
+ switch (call.getKind()) {
+ case EQUALS:
+ case NOT_EQUALS:
+ final List<RexNode> operands = new ArrayList<>(call.operands);
+ RexNode o0 = operands.get(0);
+ RexNode o1 = operands.get(1);
+ boolean isCorrelated = false;
+ if (o0 instanceof RexFieldAccess && (cm.mapFieldAccessToCorRef.get(o0) != null)) {
+ o0 = decorrFieldAccess((RexFieldAccess) o0);
+ isCorrelated = true;
+
+ }
+ if (o1 instanceof RexFieldAccess && (cm.mapFieldAccessToCorRef.get(o1) != null)) {
+ o1 = decorrFieldAccess((RexFieldAccess) o1);
+ isCorrelated = true;
+ }
+ if (isCorrelated && RexUtil.eq(o0, o1)) {
+ return rexBuilder.makeCall(SqlStdOperatorTable.IS_NOT_NULL, o0);
+ }
+
+ final List<RexNode> newOperands = new ArrayList<>();
+ newOperands.add(o0);
+ newOperands.add(o1);
+ boolean[] update = { false };
+ List<RexNode> clonedOperands = visitList(newOperands, update);
+
+ return relBuilder.call(call.getOperator(), clonedOperands);
+ }
+ }
+ return super.visitCall(call);
+ }
+
@Override public RexNode visitFieldAccess(RexFieldAccess fieldAccess) {
+ return decorrFieldAccess(fieldAccess);
+ }
+
+ @Override public RexNode visitInputRef(RexInputRef inputRef) {
+ final RexInputRef ref = getNewForOldInputRef(inputRef);
+ if (ref.getIndex() == inputRef.getIndex()
+ && ref.getType() == inputRef.getType()) {
+ return inputRef; // re-use old object, to prevent needless expr cloning
+ }
+ return ref;
+ }
+ private RexNode decorrFieldAccess(RexFieldAccess fieldAccess) {
int newInputOutputOffset = 0;
for (RelNode input : currentRel.getInputs()) {
final Frame frame = map.get(input);
@@ -1834,7 +1980,7 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
// This input rel does produce the cor var referenced.
// Assume fieldAccess has the correct type info.
return new RexInputRef(newInputPos + newInputOutputOffset,
- frame.r.getRowType().getFieldList().get(newInputPos)
+ frame.r.getRowType().getFieldList().get(newInputPos)
.getType());
}
}
@@ -1848,15 +1994,6 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
}
return fieldAccess;
}
-
- @Override public RexNode visitInputRef(RexInputRef inputRef) {
- final RexInputRef ref = getNewForOldInputRef(inputRef);
- if (ref.getIndex() == inputRef.getIndex()
- && ref.getType() == inputRef.getType()) {
- return inputRef; // re-use old object, to prevent needless expr cloning
- }
- return ref;
- }
}
/** Shuttle that removes correlations. */
@@ -2881,10 +3018,12 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
static class CorDef implements Comparable<CorDef> {
public final CorrelationId corr;
public final int field;
+ private SqlKind predicateKind;
CorDef(CorrelationId corr, int field) {
this.corr = corr;
this.field = field;
+ this.predicateKind = null;
}
@Override public String toString() {
@@ -2909,6 +3048,13 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
}
return Integer.compare(field, o.field);
}
+ public SqlKind getPredicateKind() {
+ return predicateKind;
+ }
+ public void setPredicateKind(SqlKind predKind) {
+ this.predicateKind = predKind;
+
+ }
}
/** A map of the locations of
@@ -2986,6 +3132,107 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
}
}
+ private static class findIfValueGenRequired extends HiveRelShuttleImpl {
+ private boolean mightRequireValueGen ;
+ findIfValueGenRequired() { this.mightRequireValueGen = true; }
+
+ private boolean hasRexOver(List<RexNode> projects) {
+ for(RexNode expr : projects) {
+ if(expr instanceof RexOver) {
+ return true;
+ }
+ }
+ return false;
+ }
+ @Override public RelNode visit(HiveJoin rel) {
+ mightRequireValueGen = true;
+ return rel;
+ }
+ public RelNode visit(HiveSortLimit rel) {
+ mightRequireValueGen = true;
+ return rel;
+ }
+ public RelNode visit(HiveUnion rel) {
+ mightRequireValueGen = true;
+ return rel;
+ }
+ public RelNode visit(LogicalUnion rel) {
+ mightRequireValueGen = true;
+ return rel;
+ }
+ public RelNode visit(LogicalIntersect rel) {
+ mightRequireValueGen = true;
+ return rel;
+ }
+
+ public RelNode visit(HiveIntersect rel) {
+ mightRequireValueGen = true;
+ return rel;
+ }
+
+ @Override public RelNode visit(LogicalJoin rel) {
+ mightRequireValueGen = true;
+ return rel;
+ }
+ @Override public RelNode visit(HiveProject rel) {
+ if(!(hasRexOver(((HiveProject)rel).getProjects()))) {
+ mightRequireValueGen = false;
+ return super.visit(rel);
+ }
+ else {
+ mightRequireValueGen = true;
+ return rel;
+ }
+ }
+ @Override public RelNode visit(LogicalProject rel) {
+ if(!(hasRexOver(((LogicalProject)rel).getProjects()))) {
+ mightRequireValueGen = false;
+ return super.visit(rel);
+ }
+ else {
+ mightRequireValueGen = true;
+ return rel;
+ }
+ }
+ @Override public RelNode visit(HiveAggregate rel) {
+ // if there are aggregate functions or grouping sets we will need
+ // value generator
+ if((((HiveAggregate)rel).getAggCallList().isEmpty() == true
+ && ((HiveAggregate)rel).indicator == false)) {
+ this.mightRequireValueGen = false;
+ return super.visit(rel);
+ }
+ else {
+ // need to reset to true in case previous aggregate/project
+ // has set it to false
+ this.mightRequireValueGen = true;
+ return rel;
+ }
+ }
+ @Override public RelNode visit(LogicalAggregate rel) {
+ if((((LogicalAggregate)rel).getAggCallList().isEmpty() == true
+ && ((LogicalAggregate)rel).indicator == false)) {
+ this.mightRequireValueGen = false;
+ return super.visit(rel);
+ }
+ else {
+ // need to reset to true in case previous aggregate/project
+ // has set it to false
+ this.mightRequireValueGen = true;
+ return rel;
+ }
+ }
+ @Override public RelNode visit(LogicalCorrelate rel) {
+ // this means we are hitting nested subquery so don't
+ // need to go further
+ return rel;
+ }
+
+ public boolean traverse(RelNode root) {
+ root.accept(this);
+ return mightRequireValueGen;
+ }
+ }
/** Builds a {@link org.apache.calcite.sql2rel.RelDecorrelator.CorelMap}. */
private static class CorelMapBuilder extends HiveRelShuttleImpl {
final SortedMap<CorrelationId, RelNode> mapCorToCorRel =
http://git-wip-us.apache.org/repos/asf/hive/blob/aee0eaa0/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java
index 2dca6a2..90aab6e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java
@@ -370,9 +370,23 @@ public class HiveSubQueryRemoveRule extends RelOptRule{
case TRUE:
if (fields.isEmpty()) {
builder.project(builder.alias(builder.literal(true), "i" + e.rel.getId()));
- builder.aggregate(builder.groupKey(0));
+ if(!variablesSet.isEmpty() && (e.getKind() == SqlKind.EXISTS || e.getKind() == SqlKind.IN)) {
+ // avoid adding group by for correlated IN/EXISTS queries
+ // since this is rewritting into semijoin
+ break;
+ }
+ else {
+ builder.aggregate(builder.groupKey(0));
+ }
} else {
- builder.aggregate(builder.groupKey(fields));
+ if(!variablesSet.isEmpty() && (e.getKind() == SqlKind.EXISTS || e.getKind() == SqlKind.IN)) {
+ // avoid adding group by for correlated IN/EXISTS queries
+ // since this is rewritting into semijoin
+ break;
+ }
+ else {
+ builder.aggregate(builder.groupKey(fields));
+ }
}
break;
default:
@@ -389,7 +403,7 @@ public class HiveSubQueryRemoveRule extends RelOptRule{
}
switch (logic) {
case TRUE:
- builder.join(JoinRelType.INNER, builder.and(conditions), variablesSet);
+ builder.join(JoinRelType.INNER, builder.and(conditions), variablesSet, true);
return builder.literal(true);
}
builder.join(JoinRelType.LEFT, builder.and(conditions), variablesSet);
http://git-wip-us.apache.org/repos/asf/hive/blob/aee0eaa0/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java
index 5e8a994..73fdff4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java
@@ -48,6 +48,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelColumnsAlignment;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
@@ -273,10 +274,12 @@ public class PlanModifierForASTConv {
// But we only need the additional project if the left child
// is another join too; if it is not, ASTConverter will swap
// the join inputs, leaving the join operator on the left.
+ // we also do it if parent is HiveSemiJoin since ASTConverter won't
+ // swap inputs then
// This will help triggering multijoin recognition methods that
// are embedded in SemanticAnalyzer.
if (((Join) parent).getRight() == joinNode &&
- (((Join) parent).getLeft() instanceof Join) ) {
+ (((Join) parent).getLeft() instanceof Join || parent instanceof HiveSemiJoin) ) {
validParent = false;
}
} else if (parent instanceof SetOp) {
http://git-wip-us.apache.org/repos/asf/hive/blob/aee0eaa0/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 80351be..1363bbb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -1788,11 +1788,12 @@ public class CalcitePlanner extends SemanticAnalyzer {
}
// 5. Push Down Semi Joins
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
+ //TODO: Enable this later
+ /*perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, SemiJoinJoinTransposeRule.INSTANCE,
SemiJoinFilterTransposeRule.INSTANCE, SemiJoinProjectTransposeRule.INSTANCE);
perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER,
- "Calcite: Prejoin ordering transformation, Push Down Semi Joins");
+ "Calcite: Prejoin ordering transformation, Push Down Semi Joins"); */
// 6. Apply Partition Pruning
perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
http://git-wip-us.apache.org/repos/asf/hive/blob/aee0eaa0/ql/src/test/queries/clientpositive/subquery_exists.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/subquery_exists.q b/ql/src/test/queries/clientpositive/subquery_exists.q
index 19c42f0..08681e5 100644
--- a/ql/src/test/queries/clientpositive/subquery_exists.q
+++ b/ql/src/test/queries/clientpositive/subquery_exists.q
@@ -89,8 +89,8 @@ drop table t;
drop table if exists tx1;
create table tx1 (a integer,b integer);
insert into tx1 values (1, 1),
- (1, 2),
- (1, 3);
+ (1, 2),
+ (1, 3);
select count(*) as result,3 as expected from tx1 u
where exists (select * from tx1 v where u.a=v.a and u.b <> v.b);
@@ -108,4 +108,4 @@ insert into t2 values(4,2),(4,3),(4,5);
explain select * from t1 where t1.i in (select t2.i from t2 where t2.j <> t1.j);
select * from t1 where t1.i in (select t2.i from t2 where t2.j <> t1.j);
drop table t1;
-drop table t2;
+drop table t2;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/aee0eaa0/ql/src/test/queries/clientpositive/subquery_in.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/subquery_in.q b/ql/src/test/queries/clientpositive/subquery_in.q
index 4ba170a..33cc2fe 100644
--- a/ql/src/test/queries/clientpositive/subquery_in.q
+++ b/ql/src/test/queries/clientpositive/subquery_in.q
@@ -74,25 +74,47 @@ from part b where b.p_size in
;
-- distinct, corr
-explain
-select *
-from src b
+explain
+select *
+from src b
where b.key in
- (select distinct a.key
- from src a
+ (select distinct a.key
+ from src a
where b.value = a.value and a.key > '9'
)
;
-select *
-from src b
+select *
+from src b
where b.key in
- (select distinct a.key
- from src a
+ (select distinct a.key
+ from src a
where b.value = a.value and a.key > '9'
)
;
+-- corr, non equi predicate, should not have a join with outer to generate
+-- corr values
+explain
+select *
+from src b
+where b.key in
+ (select distinct a.key
+ from src a
+ where b.value <> a.key and a.key > '9'
+ )
+;
+
+select *
+from src b
+where b.key in
+ (select distinct a.key
+ from src a
+ where b.value <> a.key and a.key > '9'
+ )
+;
+
+
-- non agg, non corr, windowing
select p_mfgr, p_name, p_size
from part
@@ -267,3 +289,13 @@ select * from t where i IN (select sum(i) from tt where tt.j = t.j);
drop table t;
drop table tt;
+
+-- since inner query has aggregate it will be joined with outer to get all possible corrrelated values
+explain select * from part where p_size IN (select max(p_size) from part p where p.p_type <> part.p_name);
+select * from part where p_size IN (select max(p_size) from part p where p.p_type <> part.p_name);
+
+-- inner query has join so should have a join with outer query to fetch all corr values
+explain select * from part where p_size IN (select pp.p_size from part p join part pp on pp.p_type = p.p_type where part.p_type <> p.p_name);
+select * from part where p_size IN (select pp.p_size from part p join part pp on pp.p_type = p.p_type where part.p_type <> p.p_name);
+
+
http://git-wip-us.apache.org/repos/asf/hive/blob/aee0eaa0/ql/src/test/results/clientpositive/constprog_partitioner.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/constprog_partitioner.q.out b/ql/src/test/results/clientpositive/constprog_partitioner.q.out
index 87618df..455c630 100644
--- a/ql/src/test/results/clientpositive/constprog_partitioner.q.out
+++ b/ql/src/test/results/clientpositive/constprog_partitioner.q.out
@@ -91,7 +91,7 @@ STAGE PLANS:
alias: li
Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (l_linenumber = 1) (type: boolean)
+ predicate: ((l_linenumber = 1) and l_orderkey is not null) (type: boolean)
Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int)
@@ -107,10 +107,10 @@ STAGE PLANS:
alias: lineitem
Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((l_linenumber = l_linenumber) and (l_shipmode = 'AIR')) (type: boolean)
+ predicate: ((l_linenumber = 1) and (l_shipmode = 'AIR') and l_orderkey is not null) (type: boolean)
Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: l_orderkey (type: int), l_linenumber (type: int)
+ expressions: l_orderkey (type: int), 1 (type: int)
outputColumnNames: _col0, _col1
Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
Group By Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/aee0eaa0/ql/src/test/results/clientpositive/llap/constprog_semijoin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/constprog_semijoin.q.out b/ql/src/test/results/clientpositive/llap/constprog_semijoin.q.out
index 998a5df..7a48615 100644
--- a/ql/src/test/results/clientpositive/llap/constprog_semijoin.q.out
+++ b/ql/src/test/results/clientpositive/llap/constprog_semijoin.q.out
@@ -119,24 +119,26 @@ Stage-0
Select Operator [SEL_17] (rows=5 width=232)
Output:["_col0","_col1","_col2"]
Merge Join Operator [MERGEJOIN_28] (rows=5 width=232)
- Conds:RS_14._col0=RS_15._col0(Inner),Output:["_col0","_col4"]
+ Conds:RS_14._col2=RS_15._col0(Left Semi),Output:["_col0","_col4"]
<-Map 5 [SIMPLE_EDGE] llap
SHUFFLE [RS_15]
PartitionCols:_col0
- Select Operator [SEL_8] (rows=3 width=188)
- Output:["_col0","_col1"]
- Filter Operator [FIL_26] (rows=3 width=188)
- predicate:id is not null
- TableScan [TS_6] (rows=3 width=188)
- default@table2,table2,Tbl:COMPLETE,Col:NONE,Output:["id","val2"]
+ Group By Operator [GBY_13] (rows=5 width=4)
+ Output:["_col0"],keys:_col0
+ Select Operator [SEL_8] (rows=5 width=4)
+ Output:["_col0"]
+ Filter Operator [FIL_26] (rows=5 width=4)
+ predicate:id is not null
+ TableScan [TS_6] (rows=5 width=4)
+ default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"]
<-Reducer 2 [SIMPLE_EDGE] llap
SHUFFLE [RS_14]
- PartitionCols:_col0
+ PartitionCols:_col2
Merge Join Operator [MERGEJOIN_27] (rows=5 width=211)
- Conds:RS_11._col2=RS_12._col0(Left Semi),Output:["_col0"]
+ Conds:RS_9._col0=RS_10._col0(Inner),Output:["_col0","_col2","_col4"]
<-Map 1 [SIMPLE_EDGE] llap
- SHUFFLE [RS_11]
- PartitionCols:_col2
+ SHUFFLE [RS_9]
+ PartitionCols:_col0
Select Operator [SEL_2] (rows=5 width=192)
Output:["_col0","_col2"]
Filter Operator [FIL_24] (rows=5 width=192)
@@ -144,16 +146,14 @@ Stage-0
TableScan [TS_0] (rows=10 width=192)
default@table1,table1,Tbl:COMPLETE,Col:NONE,Output:["id","val","dimid"]
<-Map 4 [SIMPLE_EDGE] llap
- SHUFFLE [RS_12]
+ SHUFFLE [RS_10]
PartitionCols:_col0
- Group By Operator [GBY_10] (rows=5 width=4)
- Output:["_col0"],keys:_col0
- Select Operator [SEL_5] (rows=5 width=4)
- Output:["_col0"]
- Filter Operator [FIL_25] (rows=5 width=4)
- predicate:id is not null
- TableScan [TS_3] (rows=5 width=4)
- default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"]
+ Select Operator [SEL_5] (rows=3 width=188)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_25] (rows=3 width=188)
+ predicate:id is not null
+ TableScan [TS_3] (rows=3 width=188)
+ default@table2,table2,Tbl:COMPLETE,Col:NONE,Output:["id","val2"]
PREHOOK: query: select table1.id, table1.val, table2.val2 from table1 inner join table2 on table1.val = 't1val01' and table1.id = table2.id left semi join table3 on table1.dimid = table3.id
PREHOOK: type: QUERY
http://git-wip-us.apache.org/repos/asf/hive/blob/aee0eaa0/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out
index 87e08fb..e2e2ef3 100644
--- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out
+++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out
@@ -73,14 +73,13 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Reducer 12 (BROADCAST_EDGE), Reducer 6 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE)
- Map 14 <- Reducer 13 (BROADCAST_EDGE)
- Reducer 10 <- Map 14 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
- Reducer 11 <- Reducer 10 (SIMPLE_EDGE)
- Reducer 12 <- Reducer 11 (CUSTOM_SIMPLE_EDGE)
- Reducer 13 <- Map 9 (CUSTOM_SIMPLE_EDGE)
+ Map 1 <- Reducer 11 (BROADCAST_EDGE), Reducer 6 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE)
+ Map 13 <- Reducer 12 (BROADCAST_EDGE)
+ Reducer 10 <- Map 13 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
+ Reducer 11 <- Reducer 10 (CUSTOM_SIMPLE_EDGE)
+ Reducer 12 <- Map 9 (CUSTOM_SIMPLE_EDGE)
Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
- Reducer 3 <- Reducer 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE)
Reducer 7 <- Map 5 (CUSTOM_SIMPLE_EDGE)
@@ -91,10 +90,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: t1
- filterExpr: (bigint_col_7 is not null and decimal2016_col_26 is not null and tinyint_col_3 is not null and (decimal2016_col_26 BETWEEN DynamicValue(RS_21_t2_decimal2709_col_9_min) AND DynamicValue(RS_21_t2_decimal2709_col_9_max) and in_bloom_filter(decimal2016_col_26, DynamicValue(RS_21_t2_decimal2709_col_9_bloom_filter))) and (tinyint_col_3 BETWEEN DynamicValue(RS_21_t2_tinyint_col_20_min) AND DynamicValue(RS_21_t2_tinyint_col_20_max) and in_bloom_filter(tinyint_col_3, DynamicValue(RS_21_t2_tinyint_col_20_bloom_filter))) and (bigint_col_7 BETWEEN DynamicValue(RS_21_t2_tinyint_col_15_min) AND DynamicValue(RS_21_t2_tinyint_col_15_max) and in_bloom_filter(bigint_col_7, DynamicValue(RS_21_t2_tinyint_col_15_bloom_filter))) and (timestamp_col_9 BETWEEN DynamicValue(RS_24_tt2_timestamp_col_18_min) AND DynamicValue(RS_24_tt2_timestamp_col_18_max) and in_bloom_filter(timestamp_col_9, DynamicValue(RS_24_tt2_timestamp_col_18_bloom_filter)))) (type: boolean)
+ filterExpr: (bigint_col_7 is not null and decimal2016_col_26 is not null and tinyint_col_3 is not null and timestamp_col_9 is not null and (decimal2016_col_26 BETWEEN DynamicValue(RS_17_t2_decimal2709_col_9_min) AND DynamicValue(RS_17_t2_decimal2709_col_9_max) and in_bloom_filter(decimal2016_col_26, DynamicValue(RS_17_t2_decimal2709_col_9_bloom_filter))) and (tinyint_col_3 BETWEEN DynamicValue(RS_17_t2_tinyint_col_20_min) AND DynamicValue(RS_17_t2_tinyint_col_20_max) and in_bloom_filter(tinyint_col_3, DynamicValue(RS_17_t2_tinyint_col_20_bloom_filter))) and (bigint_col_7 BETWEEN DynamicValue(RS_17_t2_tinyint_col_15_min) AND DynamicValue(RS_17_t2_tinyint_col_15_max) and in_bloom_filter(bigint_col_7, DynamicValue(RS_17_t2_tinyint_col_15_bloom_filter))) and (timestamp_col_9 BETWEEN DynamicValue(RS_22_tt2_timestamp_col_18_min) AND DynamicValue(RS_22_tt2_timestamp_col_18_max) and in_bloom_filter(timestamp_col_9, DynamicValue(RS_22_tt2_timestamp_col_18_bloom_filter)))) (
type: boolean)
Statistics: Num rows: 1 Data size: 164 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((bigint_col_7 BETWEEN DynamicValue(RS_21_t2_tinyint_col_15_min) AND DynamicValue(RS_21_t2_tinyint_col_15_max) and in_bloom_filter(bigint_col_7, DynamicValue(RS_21_t2_tinyint_col_15_bloom_filter))) and (decimal2016_col_26 BETWEEN DynamicValue(RS_21_t2_decimal2709_col_9_min) AND DynamicValue(RS_21_t2_decimal2709_col_9_max) and in_bloom_filter(decimal2016_col_26, DynamicValue(RS_21_t2_decimal2709_col_9_bloom_filter))) and (timestamp_col_9 BETWEEN DynamicValue(RS_24_tt2_timestamp_col_18_min) AND DynamicValue(RS_24_tt2_timestamp_col_18_max) and in_bloom_filter(timestamp_col_9, DynamicValue(RS_24_tt2_timestamp_col_18_bloom_filter))) and (tinyint_col_3 BETWEEN DynamicValue(RS_21_t2_tinyint_col_20_min) AND DynamicValue(RS_21_t2_tinyint_col_20_max) and in_bloom_filter(tinyint_col_3, DynamicValue(RS_21_t2_tinyint_col_20_bloom_filter))) and bigint_col_7 is not null and decimal2016_col_26 is not null and tinyint_col_3 is not null) (type: boolean)
+ predicate: ((bigint_col_7 BETWEEN DynamicValue(RS_17_t2_tinyint_col_15_min) AND DynamicValue(RS_17_t2_tinyint_col_15_max) and in_bloom_filter(bigint_col_7, DynamicValue(RS_17_t2_tinyint_col_15_bloom_filter))) and (decimal2016_col_26 BETWEEN DynamicValue(RS_17_t2_decimal2709_col_9_min) AND DynamicValue(RS_17_t2_decimal2709_col_9_max) and in_bloom_filter(decimal2016_col_26, DynamicValue(RS_17_t2_decimal2709_col_9_bloom_filter))) and (timestamp_col_9 BETWEEN DynamicValue(RS_22_tt2_timestamp_col_18_min) AND DynamicValue(RS_22_tt2_timestamp_col_18_max) and in_bloom_filter(timestamp_col_9, DynamicValue(RS_22_tt2_timestamp_col_18_bloom_filter))) and (tinyint_col_3 BETWEEN DynamicValue(RS_17_t2_tinyint_col_20_min) AND DynamicValue(RS_17_t2_tinyint_col_20_max) and in_bloom_filter(tinyint_col_3, DynamicValue(RS_17_t2_tinyint_col_20_bloom_filter))) and bigint_col_7 is not null and decimal2016_col_26 is not null and timestamp_col_9 is not null and tinyint_col_3 is not null)
(type: boolean)
Statistics: Num rows: 1 Data size: 164 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: bigint_col_7 (type: bigint), decimal2016_col_26 (type: decimal(20,16)), tinyint_col_3 (type: tinyint), timestamp_col_9 (type: timestamp)
@@ -108,7 +107,7 @@ STAGE PLANS:
value expressions: _col3 (type: timestamp)
Execution mode: llap
LLAP IO: no inputs
- Map 14
+ Map 13
Map Operator Tree:
TableScan
alias: tt2
@@ -133,10 +132,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: t2
- filterExpr: (tinyint_col_15 is not null and decimal2709_col_9 is not null and tinyint_col_20 is not null) (type: boolean)
+ filterExpr: (tinyint_col_15 is not null and decimal2709_col_9 is not null and tinyint_col_20 is not null and smallint_col_19 is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 124 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (decimal2709_col_9 is not null and tinyint_col_15 is not null and tinyint_col_20 is not null) (type: boolean)
+ predicate: (decimal2709_col_9 is not null and smallint_col_19 is not null and tinyint_col_15 is not null and tinyint_col_20 is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 124 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: tinyint_col_15 (type: tinyint), decimal2709_col_9 (type: decimal(27,9)), tinyint_col_20 (type: tinyint), smallint_col_19 (type: smallint)
@@ -233,47 +232,34 @@ STAGE PLANS:
1 _col0 (type: decimal(26,12))
outputColumnNames: _col2
Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col2 (type: timestamp)
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: timestamp)
- sort order: +
- Map-reduce partition columns: _col0 (type: timestamp)
- Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE
- Reducer 11
- Execution mode: llap
- Reduce Operator Tree:
- Group By Operator
- keys: KEY._col0 (type: timestamp)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: timestamp)
- outputColumnNames: _col1
+ expressions: _col2 (type: timestamp), -92 (type: int)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col1 (type: timestamp), -92 (type: int)
- sort order: ++
- Map-reduce partition columns: _col1 (type: timestamp), -92 (type: int)
+ Group By Operator
+ keys: _col0 (type: timestamp), _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col1 (type: timestamp)
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1)
- mode: hash
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
+ Reduce Output Operator
+ key expressions: _col0 (type: timestamp), _col1 (type: int)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: int)
+ Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: timestamp)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: binary)
- Reducer 12
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: binary)
+ Reducer 11
Execution mode: llap
Reduce Operator Tree:
Group By Operator
@@ -285,7 +271,7 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: binary)
- Reducer 13
+ Reducer 12
Execution mode: llap
Reduce Operator Tree:
Group By Operator
@@ -318,10 +304,10 @@ STAGE PLANS:
Reduce Operator Tree:
Merge Join Operator
condition map:
- Inner Join 0 to 1
+ Left Semi Join 0 to 1
keys:
0 _col3 (type: timestamp), UDFToInteger(_col7) (type: int)
- 1 _col1 (type: timestamp), -92 (type: int)
+ 1 _col0 (type: timestamp), _col1 (type: int)
Statistics: Num rows: 1 Data size: 198 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
http://git-wip-us.apache.org/repos/asf/hive/blob/aee0eaa0/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
index 6e55acf..f03a65f 100644
--- a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
@@ -1683,11 +1683,11 @@ Stage-0
Stage-1
Reducer 2 llap
File Output Operator [FS_19]
- Select Operator [SEL_18] (rows=434 width=178)
+ Select Operator [SEL_18] (rows=366 width=178)
Output:["_col0","_col1"]
- Filter Operator [FIL_17] (rows=434 width=178)
+ Filter Operator [FIL_17] (rows=366 width=179)
predicate:_col3 is null
- Merge Join Operator [MERGEJOIN_22] (rows=500 width=178)
+ Merge Join Operator [MERGEJOIN_22] (rows=500 width=179)
Conds:RS_14._col1=RS_15._col0(Left Outer),Output:["_col0","_col1","_col3"]
<-Map 1 [SIMPLE_EDGE] llap
SHUFFLE [RS_14]
@@ -1699,21 +1699,21 @@ Stage-0
<-Reducer 3 [SIMPLE_EDGE] llap
SHUFFLE [RS_15]
PartitionCols:_col0
- Select Operator [SEL_13] (rows=41 width=95)
+ Select Operator [SEL_13] (rows=83 width=95)
Output:["_col0","_col1"]
- Group By Operator [GBY_12] (rows=41 width=91)
+ Group By Operator [GBY_12] (rows=83 width=91)
Output:["_col0"],keys:_col1
- Select Operator [SEL_8] (rows=41 width=178)
+ Select Operator [SEL_8] (rows=83 width=178)
Output:["_col1"]
- Group By Operator [GBY_7] (rows=41 width=178)
+ Group By Operator [GBY_7] (rows=83 width=178)
Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
<-Map 1 [SIMPLE_EDGE] llap
SHUFFLE [RS_6]
PartitionCols:_col0
- Group By Operator [GBY_5] (rows=41 width=178)
+ Group By Operator [GBY_5] (rows=83 width=178)
Output:["_col0","_col1"],keys:value, key
- Filter Operator [FIL_21] (rows=83 width=178)
- predicate:((value = value) and (value > 'val_2'))
+ Filter Operator [FIL_21] (rows=166 width=178)
+ predicate:(value > 'val_2')
Please refer to the previous TableScan [TS_0]
PREHOOK: query: explain select *
@@ -1747,11 +1747,11 @@ Stage-0
Stage-1
Reducer 3 llap
File Output Operator [FS_18]
- Select Operator [SEL_17] (rows=234 width=178)
+ Select Operator [SEL_17] (rows=183 width=178)
Output:["_col0","_col1"]
- Filter Operator [FIL_16] (rows=234 width=178)
+ Filter Operator [FIL_16] (rows=183 width=179)
predicate:_col4 is null
- Merge Join Operator [MERGEJOIN_21] (rows=250 width=178)
+ Merge Join Operator [MERGEJOIN_21] (rows=250 width=179)
Conds:RS_13._col0, _col1=RS_14._col0, _col1(Left Outer),Output:["_col0","_col1","_col4"]
<-Reducer 2 [ONE_TO_ONE_EDGE] llap
FORWARD [RS_13]
@@ -1770,17 +1770,17 @@ Stage-0
<-Reducer 4 [ONE_TO_ONE_EDGE] llap
FORWARD [RS_14]
PartitionCols:_col0, _col1
- Select Operator [SEL_12] (rows=20 width=182)
+ Select Operator [SEL_12] (rows=83 width=182)
Output:["_col0","_col1","_col2"]
- Group By Operator [GBY_11] (rows=20 width=178)
+ Group By Operator [GBY_11] (rows=83 width=178)
Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
<-Map 1 [SIMPLE_EDGE] llap
SHUFFLE [RS_10]
PartitionCols:_col0, _col1
- Group By Operator [GBY_9] (rows=20 width=178)
+ Group By Operator [GBY_9] (rows=83 width=178)
Output:["_col0","_col1"],keys:key, value
- Filter Operator [FIL_20] (rows=41 width=178)
- predicate:((key = key) and (value = value) and (value > 'val_12'))
+ Filter Operator [FIL_20] (rows=166 width=178)
+ predicate:((value > 'val_12') and key is not null)
Please refer to the previous TableScan [TS_0]
PREHOOK: query: create view cv1 as
@@ -1821,26 +1821,28 @@ Stage-0
limit:-1
Stage-1
Reducer 2 llap
- File Output Operator [FS_11]
- Merge Join Operator [MERGEJOIN_16] (rows=32 width=178)
- Conds:RS_7._col0, _col1=RS_8._col0, _col1(Left Semi),Output:["_col0","_col1"]
+ File Output Operator [FS_12]
+ Merge Join Operator [MERGEJOIN_17] (rows=133 width=178)
+ Conds:RS_8._col0, _col1=RS_9._col0, _col1(Left Semi),Output:["_col0","_col1"]
<-Map 1 [SIMPLE_EDGE] llap
- SHUFFLE [RS_7]
+ SHUFFLE [RS_8]
PartitionCols:_col0, _col1
- Select Operator [SEL_1] (rows=500 width=178)
+ Select Operator [SEL_2] (rows=166 width=178)
Output:["_col0","_col1"]
- TableScan [TS_0] (rows=500 width=178)
- default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"],properties:{"insideView":"TRUE"}
+ Filter Operator [FIL_15] (rows=166 width=178)
+ predicate:((value > 'val_9') and key is not null)
+ TableScan [TS_0] (rows=500 width=178)
+ default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"],properties:{"insideView":"TRUE"}
<-Map 3 [SIMPLE_EDGE] llap
- SHUFFLE [RS_8]
+ SHUFFLE [RS_9]
PartitionCols:_col0, _col1
- Group By Operator [GBY_6] (rows=20 width=178)
+ Group By Operator [GBY_7] (rows=83 width=178)
Output:["_col0","_col1"],keys:_col0, _col1
- Select Operator [SEL_4] (rows=41 width=178)
+ Select Operator [SEL_5] (rows=166 width=178)
Output:["_col0","_col1"]
- Filter Operator [FIL_15] (rows=41 width=178)
- predicate:((key = key) and (value = value) and (value > 'val_9'))
- TableScan [TS_2] (rows=500 width=178)
+ Filter Operator [FIL_16] (rows=166 width=178)
+ predicate:((value > 'val_9') and key is not null)
+ TableScan [TS_3] (rows=500 width=178)
default@src_cbo,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
PREHOOK: query: explain select *
@@ -1871,26 +1873,28 @@ Stage-0
limit:-1
Stage-1
Reducer 2 llap
- File Output Operator [FS_11]
- Merge Join Operator [MERGEJOIN_16] (rows=32 width=178)
- Conds:RS_7._col0, _col1=RS_8._col0, _col1(Left Semi),Output:["_col0","_col1"]
+ File Output Operator [FS_12]
+ Merge Join Operator [MERGEJOIN_17] (rows=133 width=178)
+ Conds:RS_8._col0, _col1=RS_9._col0, _col1(Left Semi),Output:["_col0","_col1"]
<-Map 1 [SIMPLE_EDGE] llap
- SHUFFLE [RS_7]
+ SHUFFLE [RS_8]
PartitionCols:_col0, _col1
- Select Operator [SEL_1] (rows=500 width=178)
+ Select Operator [SEL_2] (rows=166 width=178)
Output:["_col0","_col1"]
- TableScan [TS_0] (rows=500 width=178)
- default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
+ Filter Operator [FIL_15] (rows=166 width=178)
+ predicate:((value > 'val_9') and key is not null)
+ TableScan [TS_0] (rows=500 width=178)
+ default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
<-Map 3 [SIMPLE_EDGE] llap
- SHUFFLE [RS_8]
+ SHUFFLE [RS_9]
PartitionCols:_col0, _col1
- Group By Operator [GBY_6] (rows=20 width=178)
+ Group By Operator [GBY_7] (rows=83 width=178)
Output:["_col0","_col1"],keys:_col0, _col1
- Select Operator [SEL_4] (rows=41 width=178)
+ Select Operator [SEL_5] (rows=166 width=178)
Output:["_col0","_col1"]
- Filter Operator [FIL_15] (rows=41 width=178)
- predicate:((key = key) and (value = value) and (value > 'val_9'))
- TableScan [TS_2] (rows=500 width=178)
+ Filter Operator [FIL_16] (rows=166 width=178)
+ predicate:((value > 'val_9') and key is not null)
+ TableScan [TS_3] (rows=500 width=178)
default@src_cbo,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
PREHOOK: query: explain select *
@@ -1958,19 +1962,19 @@ Stage-0
Stage-1
Reducer 4 llap
File Output Operator [FS_22]
- Select Operator [SEL_21] (rows=7 width=8)
+ Select Operator [SEL_21] (rows=3 width=8)
Output:["_col0","_col1"]
- Merge Join Operator [MERGEJOIN_32] (rows=7 width=8)
+ Merge Join Operator [MERGEJOIN_32] (rows=3 width=8)
Conds:RS_18._col1, _col4=RS_19._col0, _col1(Left Semi),Output:["_col0","_col3"]
<-Map 6 [SIMPLE_EDGE] llap
SHUFFLE [RS_19]
PartitionCols:_col0, _col1
- Group By Operator [GBY_17] (rows=2 width=8)
+ Group By Operator [GBY_17] (rows=1 width=8)
Output:["_col0","_col1"],keys:_col0, _col1
- Select Operator [SEL_12] (rows=7 width=8)
+ Select Operator [SEL_12] (rows=2 width=8)
Output:["_col0","_col1"]
- Filter Operator [FIL_30] (rows=7 width=96)
- predicate:((l_linenumber = l_linenumber) and (l_shipmode = 'AIR'))
+ Filter Operator [FIL_30] (rows=2 width=96)
+ predicate:((l_linenumber = 1) and (l_shipmode = 'AIR') and l_orderkey is not null)
TableScan [TS_10] (rows=100 width=96)
default@lineitem,lineitem,Tbl:COMPLETE,Col:COMPLETE,Output:["l_orderkey","l_linenumber","l_shipmode"]
<-Reducer 3 [SIMPLE_EDGE] llap
@@ -1984,7 +1988,7 @@ Stage-0
Select Operator [SEL_9] (rows=14 width=16)
Output:["_col0","_col1","_col2","_col3"]
Filter Operator [FIL_29] (rows=14 width=16)
- predicate:((l_linenumber = 1) and l_partkey is not null)
+ predicate:((l_linenumber = 1) and l_orderkey is not null and l_partkey is not null)
TableScan [TS_7] (rows=100 width=16)
default@lineitem,li,Tbl:COMPLETE,Col:COMPLETE,Output:["l_orderkey","l_partkey","l_suppkey","l_linenumber"]
<-Reducer 2 [ONE_TO_ONE_EDGE] llap
@@ -2258,14 +2262,14 @@ Stage-0
File Output Operator [FS_26]
Select Operator [SEL_25] (rows=13 width=223)
Output:["_col0","_col1","_col2"]
- Filter Operator [FIL_24] (rows=13 width=227)
+ Filter Operator [FIL_24] (rows=13 width=231)
predicate:(not CASE WHEN ((_col4 = 0)) THEN (false) WHEN (_col4 is null) THEN (false) WHEN (_col8 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col5 < _col4)) THEN (true) ELSE (false) END)
- Merge Join Operator [MERGEJOIN_32] (rows=26 width=227)
+ Merge Join Operator [MERGEJOIN_32] (rows=26 width=230)
Conds:RS_21._col0, _col1=RS_22._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5","_col8"]
<-Reducer 2 [SIMPLE_EDGE] llap
SHUFFLE [RS_21]
PartitionCols:_col0, _col1
- Merge Join Operator [MERGEJOIN_31] (rows=26 width=226)
+ Merge Join Operator [MERGEJOIN_31] (rows=26 width=229)
Conds:RS_18._col1=RS_19._col0(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5"]
<-Map 1 [SIMPLE_EDGE] llap
SHUFFLE [RS_18]
@@ -2277,36 +2281,36 @@ Stage-0
<-Reducer 4 [ONE_TO_ONE_EDGE] llap
FORWARD [RS_19]
PartitionCols:_col0
- Group By Operator [GBY_7] (rows=1 width=114)
+ Group By Operator [GBY_7] (rows=2 width=114)
Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0
<-Map 1 [SIMPLE_EDGE] llap
SHUFFLE [RS_6]
PartitionCols:_col0
- Group By Operator [GBY_5] (rows=1 width=114)
+ Group By Operator [GBY_5] (rows=2 width=114)
Output:["_col0","_col1","_col2"],aggregations:["count()","count(p_name)"],keys:p_mfgr
- Select Operator [SEL_4] (rows=4 width=223)
+ Select Operator [SEL_4] (rows=8 width=223)
Output:["p_name","p_mfgr"]
- Filter Operator [FIL_29] (rows=4 width=223)
- predicate:((p_mfgr = p_mfgr) and (p_size < 10))
+ Filter Operator [FIL_29] (rows=8 width=223)
+ predicate:((p_size < 10) and p_mfgr is not null)
Please refer to the previous TableScan [TS_0]
<-Reducer 5 [ONE_TO_ONE_EDGE] llap
FORWARD [RS_22]
PartitionCols:_col0, _col1
- Select Operator [SEL_17] (rows=2 width=223)
+ Select Operator [SEL_17] (rows=4 width=223)
Output:["_col0","_col1","_col2"]
- Filter Operator [FIL_16] (rows=2 width=219)
+ Filter Operator [FIL_16] (rows=4 width=219)
predicate:_col0 is not null
- Group By Operator [GBY_14] (rows=2 width=219)
+ Group By Operator [GBY_14] (rows=4 width=219)
Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
<-Map 1 [SIMPLE_EDGE] llap
SHUFFLE [RS_13]
PartitionCols:_col0, _col1
- Group By Operator [GBY_12] (rows=2 width=219)
+ Group By Operator [GBY_12] (rows=4 width=219)
Output:["_col0","_col1"],keys:p_name, p_mfgr
- Select Operator [SEL_11] (rows=4 width=223)
+ Select Operator [SEL_11] (rows=8 width=223)
Output:["p_name","p_mfgr"]
- Filter Operator [FIL_30] (rows=4 width=223)
- predicate:((p_mfgr = p_mfgr) and (p_size < 10))
+ Filter Operator [FIL_30] (rows=8 width=223)
+ predicate:((p_size < 10) and p_mfgr is not null)
Please refer to the previous TableScan [TS_0]
PREHOOK: query: explain select p_name, p_size
@@ -2465,7 +2469,7 @@ Stage-0
Select Operator [SEL_12] (rows=1 width=114)
Output:["_col0","_col1"]
Filter Operator [FIL_40] (rows=1 width=114)
- predicate:(((_col2 - _col1) > 600.0) and (_col1 = _col1))
+ predicate:(((_col2 - _col1) > 600.0) and _col1 is not null)
Group By Operator [GBY_10] (rows=5 width=114)
Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)"],keys:KEY._col0
<-Map 1 [SIMPLE_EDGE] llap
@@ -2484,7 +2488,7 @@ Stage-0
Select Operator [SEL_24] (rows=1 width=110)
Output:["_col0","_col1"]
Filter Operator [FIL_41] (rows=1 width=114)
- predicate:(((_col2 - _col1) > 600.0) and (_col1 = _col1))
+ predicate:(((_col2 - _col1) > 600.0) and _col1 is not null)
Group By Operator [GBY_22] (rows=5 width=114)
Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)"],keys:KEY._col0
<-Map 1 [SIMPLE_EDGE] llap