You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2018/07/14 05:16:35 UTC
[12/12] hive git commit: HIVE-20090 : Extend creation of semijoin
reduction filters to be able to discover new opportunities (Jesus Camacho
Rodriguez via Deepak Jaiswal)
HIVE-20090 : Extend creation of semijoin reduction filters to be able to discover new opportunities (Jesus Camacho Rodriguez via Deepak Jaiswal)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ab9e954d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ab9e954d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ab9e954d
Branch: refs/heads/master
Commit: ab9e954d478ca0e117b04843ab645f2861e5c925
Parents: bf54424
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Wed Jul 4 14:05:00 2018 -0700
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Fri Jul 13 22:15:23 2018 -0700
----------------------------------------------------------------------
.../org/apache/hadoop/hive/conf/HiveConf.java | 4 +
.../test/resources/testconfiguration.properties | 1 +
.../hadoop/hive/ql/parse/TezCompiler.java | 444 +++++++----
.../hive/ql/ppd/SyntheticJoinPredicate.java | 174 ++++-
.../dynamic_semijoin_reduction_sw2.q | 59 ++
.../llap/dynamic_semijoin_reduction_sw2.q.out | 450 +++++++++++
.../clientpositive/llap/explainuser_1.q.out | 12 +-
.../llap/tez_fixed_bucket_pruning.q.out | 8 +-
.../clientpositive/perf/tez/query1.q.out | 76 +-
.../clientpositive/perf/tez/query16.q.out | 118 +--
.../clientpositive/perf/tez/query17.q.out | 197 ++---
.../clientpositive/perf/tez/query18.q.out | 124 +--
.../clientpositive/perf/tez/query2.q.out | 116 +--
.../clientpositive/perf/tez/query23.q.out | 444 +++++------
.../clientpositive/perf/tez/query24.q.out | 252 +++---
.../clientpositive/perf/tez/query25.q.out | 188 ++---
.../clientpositive/perf/tez/query29.q.out | 148 ++--
.../clientpositive/perf/tez/query31.q.out | 322 ++++----
.../clientpositive/perf/tez/query32.q.out | 140 ++--
.../clientpositive/perf/tez/query39.q.out | 94 +--
.../clientpositive/perf/tez/query40.q.out | 92 +--
.../clientpositive/perf/tez/query54.q.out | 246 +++---
.../clientpositive/perf/tez/query59.q.out | 134 ++--
.../clientpositive/perf/tez/query64.q.out | 760 ++++++++++---------
.../clientpositive/perf/tez/query69.q.out | 144 ++--
.../clientpositive/perf/tez/query72.q.out | 178 ++---
.../clientpositive/perf/tez/query77.q.out | 248 +++---
.../clientpositive/perf/tez/query78.q.out | 136 ++--
.../clientpositive/perf/tez/query80.q.out | 336 ++++----
.../clientpositive/perf/tez/query91.q.out | 74 +-
.../clientpositive/perf/tez/query92.q.out | 174 ++---
.../clientpositive/perf/tez/query94.q.out | 118 +--
.../clientpositive/perf/tez/query95.q.out | 241 +++---
.../spark_dynamic_partition_pruning_3.q.out | 3 +-
34 files changed, 3548 insertions(+), 2707 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/ab9e954d/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 6ea68c3..41fae36 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -3752,6 +3752,10 @@ public class HiveConf extends Configuration {
"When dynamic pruning is enabled, joins on partition keys will be processed by sending\n" +
"events from the processing vertices to the Tez application master. These events will be\n" +
"used to prune unnecessary partitions."),
+ TEZ_DYNAMIC_PARTITION_PRUNING_EXTENDED("hive.tez.dynamic.partition.pruning.extended", true,
+ "Whether we should try to create additional opportunities for dynamic pruning, e.g., considering\n" +
+ "siblings that may not be created by normal dynamic pruning logic.\n" +
+ "Only works when dynamic pruning is enabled."),
TEZ_DYNAMIC_PARTITION_PRUNING_MAX_EVENT_SIZE("hive.tez.dynamic.partition.pruning.max.event.size", 1*1024*1024L,
"Maximum size of events sent by processors in dynamic pruning. If this size is crossed no pruning will take place."),
http://git-wip-us.apache.org/repos/asf/hive/blob/ab9e954d/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 4001b9f..d08528f 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -894,6 +894,7 @@ minillaplocal.query.files=\
unionDistinct_3.q,\
vectorized_join46.q,\
vectorized_multi_output_select.q,\
+ dynamic_semijoin_reduction_sw2.q,\
partialdhj.q,\
stats_date.q,\
dst.q
http://git-wip-us.apache.org/repos/asf/hive/blob/ab9e954d/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
index 119aa92..1b433c7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
@@ -169,44 +169,7 @@ public class TezCompiler extends TaskCompiler {
}
perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run reduce sink after join algorithm selection");
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
- runRemoveDynamicPruningOptimization(procCtx, inputs, outputs);
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run remove dynamic pruning by size");
-
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
- markSemiJoinForDPP(procCtx);
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Mark certain semijoin edges important based ");
-
- // Removing semijoin optimization when it may not be beneficial
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
- removeSemijoinOptimizationByBenefit(procCtx);
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove Semijoins based on cost benefits");
-
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
- // Remove any parallel edge between semijoin and mapjoin.
- removeSemijoinsParallelToMapJoin(procCtx);
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run the optimizations that use stats for optimization");
-
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
- // Remove semijoin optimization if it creates a cycle with mapside joins
- removeSemiJoinCyclesDueToMapsideJoins(procCtx);
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove semijoin optimizations if it creates a cycle with mapside join");
-
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
- // Remove semijoin optimization if SMB join is created.
- removeSemijoinOptimizationFromSMBJoins(procCtx);
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove semijoin optimizations if needed");
-
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
- // Remove bloomfilter if no stats generated
- removeSemiJoinIfNoStats(procCtx);
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove bloom filter optimizations if needed");
-
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
- // after the stats phase we might have some cyclic dependencies that we need
- // to take care of.
- runCycleAnalysisForPartitionPruning(procCtx, inputs, outputs);
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run cycle analysis for partition pruning");
+ semijoinRemovalBasedTransformations(procCtx, inputs, outputs);
perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
if(procCtx.conf.getBoolVar(ConfVars.HIVE_SHARED_WORK_OPTIMIZATION)) {
@@ -230,11 +193,6 @@ public class TezCompiler extends TaskCompiler {
private void runCycleAnalysisForPartitionPruning(OptimizeTezProcContext procCtx,
Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws SemanticException {
-
- if (!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING)) {
- return;
- }
-
boolean cycleFree = false;
while (!cycleFree) {
cycleFree = true;
@@ -454,6 +412,80 @@ public class TezCompiler extends TaskCompiler {
ogw.startWalking(topNodes, null);
}
+ private void semijoinRemovalBasedTransformations(OptimizeTezProcContext procCtx,
+ Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws SemanticException {
+ PerfLogger perfLogger = SessionState.getPerfLogger();
+
+ final boolean dynamicPartitionPruningEnabled =
+ procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING);
+ final boolean semiJoinReductionEnabled = dynamicPartitionPruningEnabled &&
+ procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION);
+ final boolean extendedReductionEnabled = dynamicPartitionPruningEnabled &&
+ procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING_EXTENDED);
+
+ perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
+ if (dynamicPartitionPruningEnabled) {
+ runRemoveDynamicPruningOptimization(procCtx, inputs, outputs);
+ }
+ perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run remove dynamic pruning by size");
+
+ perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
+ if (semiJoinReductionEnabled) {
+ markSemiJoinForDPP(procCtx);
+ }
+ perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Mark certain semijoin edges important based ");
+
+ // Removing semijoin optimization when it may not be beneficial
+ perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
+ if (semiJoinReductionEnabled) {
+ removeSemijoinOptimizationByBenefit(procCtx);
+ }
+ perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove Semijoins based on cost benefits");
+
+ // Remove any parallel edge between semijoin and mapjoin.
+ perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
+ if (semiJoinReductionEnabled) {
+ removeSemijoinsParallelToMapJoin(procCtx);
+ }
+ perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove any parallel edge between semijoin and mapjoin");
+
+ // Remove semijoin optimization if it creates a cycle with mapside joins
+ perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
+ if (semiJoinReductionEnabled && procCtx.parseContext.getRsToSemiJoinBranchInfo().size() != 0) {
+ removeSemiJoinCyclesDueToMapsideJoins(procCtx);
+ }
+ perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove semijoin optimizations if it creates a cycle with mapside join");
+
+ // Remove semijoin optimization if SMB join is created.
+ perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
+ if (semiJoinReductionEnabled && procCtx.parseContext.getRsToSemiJoinBranchInfo().size() != 0) {
+ removeSemijoinOptimizationFromSMBJoins(procCtx);
+ }
+ perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove semijoin optimizations if needed");
+
+ // Remove bloomfilter if no stats generated
+ perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
+ if (semiJoinReductionEnabled && procCtx.parseContext.getRsToSemiJoinBranchInfo().size() != 0) {
+ removeSemiJoinIfNoStats(procCtx);
+ }
+ perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove bloom filter optimizations if needed");
+
+ // after the stats phase we might have some cyclic dependencies that we need
+ // to take care of.
+ perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
+ if (dynamicPartitionPruningEnabled) {
+ runCycleAnalysisForPartitionPruning(procCtx, inputs, outputs);
+ }
+ perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run cycle analysis for partition pruning");
+
+ // remove redundant dpp and semijoins
+ perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
+ if (extendedReductionEnabled) {
+ removeRedundantSemijoinAndDpp(procCtx);
+ }
+ perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove redundant semijoin reduction");
+ }
+
private void runRemoveDynamicPruningOptimization(OptimizeTezProcContext procCtx,
Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws SemanticException {
@@ -739,11 +771,6 @@ public class TezCompiler extends TaskCompiler {
private static void removeSemijoinOptimizationFromSMBJoins(
OptimizeTezProcContext procCtx) throws SemanticException {
- if (!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION) ||
- procCtx.parseContext.getRsToSemiJoinBranchInfo().size() == 0) {
- return;
- }
-
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
opRules.put(
new RuleRegExp("R1", TableScanOperator.getOperatorName() + "%" +
@@ -825,11 +852,6 @@ public class TezCompiler extends TaskCompiler {
private static void removeSemiJoinCyclesDueToMapsideJoins(
OptimizeTezProcContext procCtx) throws SemanticException {
- if (!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION) ||
- procCtx.parseContext.getRsToSemiJoinBranchInfo().size() == 0) {
- return;
- }
-
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
opRules.put(
new RuleRegExp("R1", MapJoinOperator.getOperatorName() + "%" +
@@ -914,99 +936,18 @@ public class TezCompiler extends TaskCompiler {
}
}
- private static class SemiJoinRemovalIfNoStatsProc implements NodeProcessor {
-
- @Override
- public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
- Object... nodeOutputs) throws SemanticException {
- assert nd instanceof ReduceSinkOperator;
- ReduceSinkOperator rs = (ReduceSinkOperator) nd;
- ParseContext pCtx = ((OptimizeTezProcContext) procCtx).parseContext;
- SemiJoinBranchInfo sjInfo = pCtx.getRsToSemiJoinBranchInfo().get(rs);
- if (sjInfo == null) {
- // nothing to do here.
- return null;
- }
-
- // This is a semijoin branch. The stack should look like,
- // <Parent Ops>-SEL-GB1-RS1-GB2-RS2
- GroupByOperator gbOp = (GroupByOperator) (stack.get(stack.size() - 2));
- GroupByDesc gbDesc = gbOp.getConf();
- ArrayList<AggregationDesc> aggregationDescs = gbDesc.getAggregators();
- for (AggregationDesc agg : aggregationDescs) {
- if (!"bloom_filter".equals(agg.getGenericUDAFName())) {
- continue;
- }
-
- GenericUDAFBloomFilterEvaluator udafBloomFilterEvaluator =
- (GenericUDAFBloomFilterEvaluator) agg.getGenericUDAFEvaluator();
- if (udafBloomFilterEvaluator.hasHintEntries())
- {
- return null; // Created using hint, skip it
- }
-
- long expectedEntries = udafBloomFilterEvaluator.getExpectedEntries();
- if (expectedEntries == -1 || expectedEntries >
- pCtx.getConf().getLongVar(ConfVars.TEZ_MAX_BLOOM_FILTER_ENTRIES)) {
- if (sjInfo.getIsHint()) {
- throw new SemanticException("Removing hinted semijoin due to lack to stats" +
- " or exceeding max bloom filter entries");
- }
- // Remove the semijoin optimization branch along with ALL the mappings
- // The parent GB2 has all the branches. Collect them and remove them.
- for (Node node : gbOp.getChildren()) {
- ReduceSinkOperator rsFinal = (ReduceSinkOperator) node;
- TableScanOperator ts = pCtx.getRsToSemiJoinBranchInfo().
- get(rsFinal).getTsOp();
- if (LOG.isDebugEnabled()) {
- LOG.debug("expectedEntries=" + expectedEntries + ". "
- + "Either stats unavailable or expectedEntries exceeded max allowable bloomfilter size. "
- + "Removing semijoin "
- + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(ts));
- }
- GenTezUtils.removeBranch(rsFinal);
- GenTezUtils.removeSemiJoinOperator(pCtx, rsFinal, ts);
- }
- return null;
- }
- }
-
- // At this point, hinted semijoin case has been handled already
- // Check if big table is big enough that runtime filtering is
- // worth it.
- TableScanOperator ts = sjInfo.getTsOp();
- if (ts.getStatistics() != null) {
- long numRows = ts.getStatistics().getNumRows();
- if (numRows < pCtx.getConf().getLongVar(ConfVars.TEZ_BIGTABLE_MIN_SIZE_SEMIJOIN_REDUCTION)) {
- if (sjInfo.getShouldRemove()) {
- if (LOG.isDebugEnabled()) {
- LOG.debug("Insufficient rows (" + numRows + ") to justify semijoin optimization. Removing semijoin "
- + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(ts));
- }
- GenTezUtils.removeBranch(rs);
- GenTezUtils.removeSemiJoinOperator(pCtx, rs, ts);
- }
- }
- }
- return null;
- }
- }
-
private void removeSemiJoinIfNoStats(OptimizeTezProcContext procCtx)
throws SemanticException {
- if(!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION)) {
- // Not needed without semi-join reduction
- return;
- }
-
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
opRules.put(
new RuleRegExp("R1", GroupByOperator.getOperatorName() + "%" +
ReduceSinkOperator.getOperatorName() + "%" +
GroupByOperator.getOperatorName() + "%" +
ReduceSinkOperator.getOperatorName() + "%"),
- new SemiJoinRemovalIfNoStatsProc());
- Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
+ new SemiJoinRemovalProc(true, false));
+ SemiJoinRemovalContext ctx =
+ new SemiJoinRemovalContext(procCtx.parseContext);
+ Dispatcher disp = new DefaultRuleDispatcher(null, opRules, ctx);
List<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(procCtx.parseContext.getTopOps().values());
GraphWalker ogw = new PreOrderOnceWalker(disp);
@@ -1077,6 +1018,218 @@ public class TezCompiler extends TaskCompiler {
GraphWalker ogw = new PreOrderOnceWalker(disp);
ogw.startWalking(topNodes, null);
}
+
+ private class SemiJoinRemovalProc implements NodeProcessor {
+
+ private final boolean removeBasedOnStats;
+ private final boolean removeRedundant;
+
+ private SemiJoinRemovalProc (boolean removeBasedOnStats, boolean removeRedundant) {
+ this.removeBasedOnStats = removeBasedOnStats;
+ this.removeRedundant = removeRedundant;
+ }
+
+ @Override
+ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+ ReduceSinkOperator rs = (ReduceSinkOperator) nd;
+ SemiJoinRemovalContext rCtx = (SemiJoinRemovalContext) procCtx;
+ ParseContext pCtx = rCtx.parseContext;
+ SemiJoinBranchInfo sjInfo = pCtx.getRsToSemiJoinBranchInfo().get(rs);
+ if (sjInfo == null) {
+ // nothing to do here.
+ return null;
+ }
+ TableScanOperator targetTSOp = sjInfo.getTsOp();
+ ExprNodeDesc targetColExpr = pCtx.getRsToRuntimeValuesInfoMap().get(rs).getTsColExpr();
+
+ // This is a semijoin branch. The stack should look like,
+ // <Parent Ops>-SEL-GB1-RS1-GB2-RS2
+ GroupByOperator gbOp = (GroupByOperator) stack.get(stack.size() - 2);
+ GroupByDesc gbDesc = gbOp.getConf();
+ ArrayList<AggregationDesc> aggregationDescs = gbDesc.getAggregators();
+ for (AggregationDesc agg : aggregationDescs) {
+ if (!isBloomFilterAgg(agg)) {
+ continue;
+ }
+
+ GenericUDAFBloomFilterEvaluator udafBloomFilterEvaluator =
+ (GenericUDAFBloomFilterEvaluator) agg.getGenericUDAFEvaluator();
+ if (udafBloomFilterEvaluator.hasHintEntries()) {
+ return null; // Created using hint, skip it
+ }
+
+ if (removeBasedOnStats) {
+ long expectedEntries = udafBloomFilterEvaluator.getExpectedEntries();
+ if (expectedEntries == -1 || expectedEntries >
+ pCtx.getConf().getLongVar(ConfVars.TEZ_MAX_BLOOM_FILTER_ENTRIES)) {
+ if (sjInfo.getIsHint()) {
+ throw new SemanticException("Removing hinted semijoin due to lack to stats" +
+ " or exceeding max bloom filter entries");
+ }
+ // Remove the semijoin optimization branch along with ALL the mappings
+ // The parent GB2 has all the branches. Collect them and remove them.
+ for (Node node : gbOp.getChildren()) {
+ ReduceSinkOperator rsFinal = (ReduceSinkOperator) node;
+ TableScanOperator ts = pCtx.getRsToSemiJoinBranchInfo().
+ get(rsFinal).getTsOp();
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("expectedEntries=" + expectedEntries + ". "
+ + "Either stats unavailable or expectedEntries exceeded max allowable bloomfilter size. "
+ + "Removing semijoin "
+ + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(ts));
+ }
+ GenTezUtils.removeBranch(rsFinal);
+ GenTezUtils.removeSemiJoinOperator(pCtx, rsFinal, ts);
+ }
+ return null;
+ }
+ }
+ }
+
+ if (removeBasedOnStats) {
+ // At this point, hinted semijoin case has been handled already
+ // Check if big table is big enough that runtime filtering is
+ // worth it.
+ TableScanOperator ts = sjInfo.getTsOp();
+ if (ts.getStatistics() != null) {
+ long numRows = ts.getStatistics().getNumRows();
+ if (numRows < pCtx.getConf().getLongVar(ConfVars.TEZ_BIGTABLE_MIN_SIZE_SEMIJOIN_REDUCTION)) {
+ if (sjInfo.getShouldRemove()) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Insufficient rows (" + numRows + ") to justify semijoin optimization. Removing semijoin "
+ + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(ts));
+ }
+ GenTezUtils.removeBranch(rs);
+ GenTezUtils.removeSemiJoinOperator(pCtx, rs, ts);
+ }
+ }
+ }
+ }
+
+ if (removeRedundant) {
+ // Look for RS ops above the current semijoin branch
+ Set<ReduceSinkOperator> rsOps = OperatorUtils.findOperators(
+ ((Operator<?>) stack.get(stack.size() - 5)).getParentOperators().get(0),
+ ReduceSinkOperator.class);
+ for (Operator<?> otherRSOp : rsOps) {
+ SemiJoinBranchInfo otherSjInfo = pCtx.getRsToSemiJoinBranchInfo().get(otherRSOp);
+ // First conjunct prevents SJ RS from removing itself
+ if (otherRSOp != rs && otherSjInfo != null && otherSjInfo.getTsOp() == targetTSOp) {
+ if (rCtx.opsToRemove.containsKey(otherRSOp)) {
+ // We found siblings, since we are removing the other operator, no need to remove this one
+ continue;
+ }
+ ExprNodeDesc otherColExpr = pCtx.getRsToRuntimeValuesInfoMap().get(otherRSOp).getTsColExpr();
+ if (!otherColExpr.isSame(targetColExpr)) {
+ // Filter should be on the same column, otherwise we do not proceed
+ continue;
+ }
+ rCtx.opsToRemove.put(rs, targetTSOp);
+ break;
+ }
+ }
+ }
+
+ return null;
+ }
+ }
+
+ private static boolean isBloomFilterAgg(AggregationDesc agg) {
+ return "bloom_filter".equals(agg.getGenericUDAFName());
+ }
+
+ private static class DynamicPruningRemovalRedundantProc implements NodeProcessor {
+
+ @Override
+ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+ AppMasterEventOperator event = (AppMasterEventOperator) nd;
+ if (!(event.getConf() instanceof DynamicPruningEventDesc)) {
+ return null;
+ }
+
+ SemiJoinRemovalContext rCtx = (SemiJoinRemovalContext) procCtx;
+
+ DynamicPruningEventDesc desc = (DynamicPruningEventDesc) event.getConf();
+ TableScanOperator targetTSOp = desc.getTableScan();
+ String targetColumnName = desc.getTargetColumnName();
+
+ // Look for event ops above the current event op branch
+ Operator<?> op = event.getParentOperators().get(0);
+ while (op.getChildOperators().size() < 2) {
+ op = op.getParentOperators().get(0);
+ }
+ Set<AppMasterEventOperator> eventOps = OperatorUtils.findOperators(
+ op, AppMasterEventOperator.class);
+ for (AppMasterEventOperator otherEvent : eventOps) {
+ if (!(otherEvent.getConf() instanceof DynamicPruningEventDesc)) {
+ continue;
+ }
+ DynamicPruningEventDesc otherDesc = (DynamicPruningEventDesc) otherEvent.getConf();
+ if (otherEvent != event && otherDesc.getTableScan() == targetTSOp &&
+ otherDesc.getTargetColumnName().equals(targetColumnName)) {
+ if (rCtx.opsToRemove.containsKey(otherEvent)) {
+ // We found siblings, since we are removing the other operator, no need to remove this one
+ continue;
+ }
+ rCtx.opsToRemove.put(event, targetTSOp);
+ break;
+ }
+ }
+
+ return null;
+ }
+ }
+
+ private void removeRedundantSemijoinAndDpp(OptimizeTezProcContext procCtx)
+ throws SemanticException {
+ Map<Rule, NodeProcessor> opRules = new LinkedHashMap<>();
+ opRules.put(
+ new RuleRegExp("R1", GroupByOperator.getOperatorName() + "%" +
+ ReduceSinkOperator.getOperatorName() + "%" +
+ GroupByOperator.getOperatorName() + "%" +
+ ReduceSinkOperator.getOperatorName() + "%"),
+ new SemiJoinRemovalProc(false, true));
+ opRules.put(
+ new RuleRegExp("R2",
+ AppMasterEventOperator.getOperatorName() + "%"),
+ new DynamicPruningRemovalRedundantProc());
+
+ // Gather
+ SemiJoinRemovalContext ctx =
+ new SemiJoinRemovalContext(procCtx.parseContext);
+ Dispatcher disp = new DefaultRuleDispatcher(null, opRules, ctx);
+ List<Node> topNodes = new ArrayList<Node>();
+ topNodes.addAll(procCtx.parseContext.getTopOps().values());
+ GraphWalker ogw = new PreOrderOnceWalker(disp);
+ ogw.startWalking(topNodes, null);
+
+ // Remove
+ for (Map.Entry<Operator<?>, TableScanOperator> p : ctx.opsToRemove.entrySet()) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Removing redundant " + OperatorUtils.getOpNamePretty(p.getKey()) + " - " + OperatorUtils.getOpNamePretty(p.getValue()));
+ }
+ GenTezUtils.removeBranch(p.getKey());
+ if (p.getKey() instanceof AppMasterEventOperator) {
+ GenTezUtils.removeSemiJoinOperator(procCtx.parseContext, (AppMasterEventOperator) p.getKey(), p.getValue());
+ } else if (p.getKey() instanceof ReduceSinkOperator) {
+ GenTezUtils.removeSemiJoinOperator(procCtx.parseContext, (ReduceSinkOperator) p.getKey(), p.getValue());
+ } else {
+ throw new SemanticException("Unexpected error - type for branch could not be recognized");
+ }
+ }
+ }
+
+ private class SemiJoinRemovalContext implements NodeProcessorCtx {
+ private final ParseContext parseContext;
+ private final Map<Operator<?>, TableScanOperator> opsToRemove;
+
+ private SemiJoinRemovalContext(final ParseContext parseContext) {
+ this.parseContext = parseContext;
+ this.opsToRemove = new HashMap<>();
+ }
+ }
private boolean findParallelSemiJoinBranch(Operator<?> mapjoin, TableScanOperator bigTableTS,
ParseContext parseContext,
@@ -1166,9 +1319,8 @@ public class TezCompiler extends TaskCompiler {
*/
private void removeSemijoinsParallelToMapJoin(OptimizeTezProcContext procCtx)
throws SemanticException {
- if(!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION) ||
- !procCtx.conf.getBoolVar(ConfVars.HIVECONVERTJOIN) ||
- procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION_FOR_MAPJOIN)) {
+ if(!procCtx.conf.getBoolVar(ConfVars.HIVECONVERTJOIN) ||
+ procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION_FOR_MAPJOIN)) {
// Not needed without semi-join reduction or mapjoins or when semijoins
// are enabled for parallel mapjoins.
return;
@@ -1376,11 +1528,6 @@ public class TezCompiler extends TaskCompiler {
private void removeSemijoinOptimizationByBenefit(OptimizeTezProcContext procCtx)
throws SemanticException {
- if(!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION)) {
- // Not needed without semi-join reduction
- return;
- }
-
List<ReduceSinkOperator> semijoinRsToRemove = new ArrayList<ReduceSinkOperator>();
Map<ReduceSinkOperator, SemiJoinBranchInfo> map = procCtx.parseContext.getRsToSemiJoinBranchInfo();
double semijoinReductionThreshold = procCtx.conf.getFloatVar(
@@ -1437,11 +1584,6 @@ public class TezCompiler extends TaskCompiler {
private void markSemiJoinForDPP(OptimizeTezProcContext procCtx)
throws SemanticException {
- if(!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION)) {
- // Not needed without semi-join reduction
- return;
- }
-
// Stores the Tablescan operators processed to avoid redoing them.
Map<TableScanOperator, TableScanOperator> tsOps = new HashMap<>();
Map<ReduceSinkOperator, SemiJoinBranchInfo> map = procCtx.parseContext.getRsToSemiJoinBranchInfo();
http://git-wip-us.apache.org/repos/asf/hive/blob/ab9e954d/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java
index dec2d1e..1f533bc 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java
@@ -26,6 +26,12 @@ import java.util.Map;
import java.util.Set;
import java.util.Stack;
+import org.apache.hadoop.hive.ql.exec.FilterOperator;
+import org.apache.hadoop.hive.ql.exec.GroupByOperator;
+import org.apache.hadoop.hive.ql.exec.OperatorUtils;
+import org.apache.hadoop.hive.ql.exec.SelectOperator;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
@@ -119,14 +125,20 @@ public class SyntheticJoinPredicate extends Transform {
private static class SyntheticContext implements NodeProcessorCtx {
ParseContext parseContext;
+ boolean extended;
public SyntheticContext(ParseContext pCtx) {
parseContext = pCtx;
+ extended = parseContext.getConf().getBoolVar(ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING_EXTENDED);
}
public ParseContext getParseContext() {
return parseContext;
}
+
+ public boolean isExtended() {
+ return extended;
+ }
}
private static class JoinSynthetic implements NodeProcessor {
@@ -134,6 +146,8 @@ public class SyntheticJoinPredicate extends Transform {
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
Object... nodeOutputs) throws SemanticException {
+ SyntheticContext sCtx = (SyntheticContext) procCtx;
+
@SuppressWarnings("unchecked")
CommonJoinOperator<JoinDesc> join = (CommonJoinOperator<JoinDesc>) nd;
@@ -161,9 +175,6 @@ public class SyntheticJoinPredicate extends Transform {
continue;
}
- if (LOG.isDebugEnabled()) {
- LOG.debug("Synthetic predicate: " + srcPos + " --> " + targetPos);
- }
ReduceSinkOperator target = (ReduceSinkOperator) parents.get(targetPos);
List<ExprNodeDesc> sourceKeys = source.getConf().getKeyCols();
List<ExprNodeDesc> targetKeys = target.getConf().getKeyCols();
@@ -175,8 +186,10 @@ public class SyntheticJoinPredicate extends Transform {
ExprNodeDesc syntheticExpr = null;
for (int i = 0; i < sourceKeys.size(); ++i) {
- List<ExprNodeDesc> inArgs = new ArrayList<ExprNodeDesc>();
- inArgs.add(sourceKeys.get(i));
+ final ExprNodeDesc sourceKey = sourceKeys.get(i);
+
+ List<ExprNodeDesc> inArgs = new ArrayList<>();
+ inArgs.add(sourceKey);
ExprNodeDynamicListDesc dynamicExpr =
new ExprNodeDynamicListDesc(targetKeys.get(i).getTypeInfo(), target, i);
@@ -186,17 +199,36 @@ public class SyntheticJoinPredicate extends Transform {
ExprNodeDesc syntheticInExpr =
ExprNodeGenericFuncDesc.newInstance(FunctionRegistry.getFunctionInfo("in")
.getGenericUDF(), inArgs);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Synthetic predicate in " + join + ": " + srcPos + " --> " + targetPos + " (" + syntheticInExpr + ")");
+ }
+ List<ExprNodeDesc> andArgs = new ArrayList<>();
if (syntheticExpr != null) {
- List<ExprNodeDesc> andArgs = new ArrayList<ExprNodeDesc>();
andArgs.add(syntheticExpr);
- andArgs.add(syntheticInExpr);
+ }
+ andArgs.add(syntheticInExpr);
+
+ if(sCtx.isExtended()) {
+ // Backtrack
+ List<ExprNodeDesc> newExprs = createDerivatives(target.getParentOperators().get(0), targetKeys.get(i), sourceKey);
+ if (!newExprs.isEmpty()) {
+ if (LOG.isDebugEnabled()) {
+ for (ExprNodeDesc expr : newExprs) {
+ LOG.debug("Additional synthetic predicate in " + join + ": " + srcPos + " --> " + targetPos + " (" + expr + ")");
+ }
+ }
+ andArgs.addAll(newExprs);
+ }
+ }
+ if (andArgs.size() < 2) {
+ syntheticExpr = syntheticInExpr;
+ } else {
+ // Create AND expression
syntheticExpr =
ExprNodeGenericFuncDesc.newInstance(FunctionRegistry.getFunctionInfo("and")
.getGenericUDF(), andArgs);
- } else {
- syntheticExpr = syntheticInExpr;
}
}
@@ -241,6 +273,129 @@ public class SyntheticJoinPredicate extends Transform {
}
return result;
}
+
+ private List<ExprNodeDesc> createDerivatives(final Operator<?> currentOp,
+ final ExprNodeDesc currentNode, final ExprNodeDesc sourceKey) throws SemanticException {
+ List<ExprNodeDesc> resultExprs = new ArrayList<>();
+ return createDerivatives(resultExprs, currentOp, currentNode, sourceKey) ? resultExprs : new ArrayList<>();
+ }
+
+ private boolean createDerivatives(final List<ExprNodeDesc> resultExprs, final Operator<?> op,
+ final ExprNodeDesc currentNode, final ExprNodeDesc sourceKey) throws SemanticException {
+ // 1. Obtain join operator upstream
+ Operator<?> currentOp = op;
+ while (!(currentOp instanceof CommonJoinOperator)) {
+ if (currentOp.getParentOperators() == null || currentOp.getParentOperators().size() != 1) {
+ // Cannot backtrack
+ currentOp = null;
+ break;
+ }
+ if (!(currentOp instanceof FilterOperator) &&
+ !(currentOp instanceof SelectOperator) &&
+ !(currentOp instanceof ReduceSinkOperator) &&
+ !(currentOp instanceof GroupByOperator)) {
+ // Operator not supported
+ currentOp = null;
+ break;
+ }
+ // Move the pointer
+ currentOp = currentOp.getParentOperators().get(0);
+ }
+ if (currentOp == null) {
+ // We did not find any join, we are done
+ return true;
+ }
+ CommonJoinOperator<JoinDesc> joinOp = (CommonJoinOperator) currentOp;
+
+ // 2. Backtrack expression to join output
+ final ExprNodeDesc joinExprNode = ExprNodeDescUtils.backtrack(currentNode, op, joinOp);
+ if (joinExprNode == null || !(joinExprNode instanceof ExprNodeColumnDesc)) {
+ // TODO: We can extend to other expression types
+ // We are done
+ return true;
+ }
+ final String columnRefJoinInput = ((ExprNodeColumnDesc)joinExprNode).getColumn();
+
+ // 3. Find input position in join for expression obtained
+ String columnOutputName = null;
+ for (Map.Entry<String, ExprNodeDesc> e : joinOp.getColumnExprMap().entrySet()) {
+ if (e.getValue() == joinExprNode) {
+ columnOutputName = e.getKey();
+ break;
+ }
+ }
+ if (columnOutputName == null) {
+ // Maybe the join is pruning columns, though it should not.
+ // In any case, we are done
+ return true;
+ }
+ final int srcPos = joinOp.getConf().getReversedExprs().get(columnOutputName);
+ final int[][] targets = getTargets(joinOp);
+ final ReduceSinkOperator rsOp = (ReduceSinkOperator) joinOp.getParentOperators().get(srcPos);
+
+ // 4. Find expression in input RS operator.
+ final Operator<?> rsOpInput = rsOp.getParentOperators().get(0);
+ final ExprNodeDesc rsOpInputExprNode = rsOp.getColumnExprMap().get(columnRefJoinInput);
+ if (rsOpInputExprNode == null) {
+ // Unexpected, we just bail out and we do not infer additional predicates
+ return false;
+ }
+ int posInRSOpKeys = -1;
+ for (int i = 0; i < rsOp.getConf().getKeyCols().size(); i++) {
+ if (rsOpInputExprNode.isSame(rsOp.getConf().getKeyCols().get(i))) {
+ posInRSOpKeys = i;
+ break;
+ }
+ }
+
+ // 5. If it is part of the key, we can create a new semijoin.
+ // In addition, we can do the same for siblings
+ if (posInRSOpKeys >= 0) {
+ // We pass the tests, we add it to the args for the AND expression
+ addParentReduceSink(resultExprs, rsOp, posInRSOpKeys, sourceKey);
+ for (int targetPos: targets[srcPos]) {
+ if (srcPos == targetPos) {
+ continue;
+ }
+ final ReduceSinkOperator otherRsOp = (ReduceSinkOperator) joinOp.getParentOperators().get(targetPos);
+ final Operator<?> otherRsOpInput = otherRsOp.getParentOperators().get(0);
+ // We pass the tests, we add it to the args for the AND expression
+ addParentReduceSink(resultExprs, otherRsOp, posInRSOpKeys, sourceKey);
+ // We propagate to operator below
+ boolean success = createDerivatives(
+ resultExprs, otherRsOpInput, otherRsOp.getConf().getKeyCols().get(posInRSOpKeys), sourceKey);
+ if (!success) {
+ // Something went wrong, bail out
+ return false;
+ }
+ }
+ }
+
+ // 6. Whether it was part of the key or of the value, if we reach here, we can at least
+ // continue propagating to operators below
+ boolean success = createDerivatives(
+ resultExprs, rsOpInput, rsOpInputExprNode, sourceKey);
+ if (!success) {
+ // Something went wrong, bail out
+ return false;
+ }
+
+ // 7. We are done, success
+ return true;
+ }
+
+ private void addParentReduceSink(final List<ExprNodeDesc> andArgs, final ReduceSinkOperator rsOp,
+ final int keyIndex, final ExprNodeDesc sourceKey) throws SemanticException {
+ ExprNodeDynamicListDesc dynamicExpr =
+ new ExprNodeDynamicListDesc(rsOp.getConf().getKeyCols().get(keyIndex).getTypeInfo(), rsOp, keyIndex);
+ // Create synthetic IN expression
+ List<ExprNodeDesc> inArgs = new ArrayList<>();
+ inArgs.add(sourceKey);
+ inArgs.add(dynamicExpr);
+ ExprNodeDesc newNode = ExprNodeGenericFuncDesc.newInstance(
+ FunctionRegistry.getFunctionInfo("in").getGenericUDF(), inArgs);
+ andArgs.add(newNode);
+ }
}
private static class Vectors {
@@ -285,4 +440,5 @@ public class SyntheticJoinPredicate extends Transform {
}
}
}
+
}
http://git-wip-us.apache.org/repos/asf/hive/blob/ab9e954d/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_sw2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_sw2.q b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_sw2.q
new file mode 100644
index 0000000..910119d
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_sw2.q
@@ -0,0 +1,59 @@
+--! qt:dataset:srcpart
+--! qt:dataset:alltypesorc
+set hive.compute.query.using.stats=false;
+set hive.mapred.mode=nonstrict;
+set hive.explain.user=false;
+set hive.optimize.ppd=true;
+set hive.ppd.remove.duplicatefilters=true;
+set hive.tez.dynamic.partition.pruning=true;
+set hive.tez.dynamic.semijoin.reduction=true;
+set hive.optimize.metadataonly=false;
+set hive.optimize.index.filter=true;
+set hive.stats.autogather=true;
+set hive.tez.bigtable.minsize.semijoin.reduction=1;
+set hive.tez.min.bloom.filter.entries=1;
+set hive.stats.fetch.column.stats=true;
+set hive.cbo.enable=false;
+set hive.reorder.nway.joins=false;
+set hive.merge.nway.joins=false;
+
+-- Create Tables
+create table alltypesorc_int_n0 ( cint int, cstring string ) stored as ORC;
+create table srcpart_date_n6 (key string, value string) partitioned by (ds string ) stored as ORC;
+CREATE TABLE srcpart_small_n2(key1 STRING, value1 STRING) partitioned by (ds1 string) STORED as ORC;
+
+-- Add Partitions
+alter table srcpart_date_n6 add partition (ds = "2008-04-08");
+alter table srcpart_date_n6 add partition (ds = "2008-04-09");
+
+alter table srcpart_small_n2 add partition (ds1 = "2008-04-08");
+alter table srcpart_small_n2 add partition (ds1 = "2008-04-09");
+
+-- Load
+insert overwrite table alltypesorc_int_n0 select cint, cstring1 from alltypesorc;
+insert overwrite table srcpart_date_n6 partition (ds = "2008-04-08" ) select key, value from srcpart where ds = "2008-04-08";
+insert overwrite table srcpart_date_n6 partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09";
+insert overwrite table srcpart_small_n2 partition (ds1 = "2008-04-09") select key, value from srcpart where ds = "2008-04-09" limit 20;
+
+set hive.tez.dynamic.semijoin.reduction=false;
+
+analyze table alltypesorc_int_n0 compute statistics for columns;
+analyze table srcpart_date_n6 compute statistics for columns;
+analyze table srcpart_small_n2 compute statistics for columns;
+
+set hive.tez.dynamic.semijoin.reduction=true;
+EXPLAIN
+SELECT count(*)
+ FROM (SELECT * FROM srcpart_date_n6 WHERE ds = "2008-04-09") `srcpart_date_n6`
+ JOIN (SELECT * FROM srcpart_small_n2 WHERE ds1 = "2008-04-08") `srcpart_small_n2`
+ ON (srcpart_date_n6.key = srcpart_small_n2.key1)
+ JOIN (
+ SELECT *
+ FROM (SELECT * FROM alltypesorc_int_n0 WHERE cint = 10) `alltypesorc_int_n0`
+ JOIN (SELECT * FROM srcpart_small_n2) `srcpart_small_n2`
+ ON (alltypesorc_int_n0.cstring = srcpart_small_n2.key1)) b
+ ON (srcpart_small_n2.key1 = b.cstring);
+
+drop table srcpart_date_n6;
+drop table srcpart_small_n2;
+drop table alltypesorc_int_n0;
http://git-wip-us.apache.org/repos/asf/hive/blob/ab9e954d/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_sw2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_sw2.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_sw2.q.out
new file mode 100644
index 0000000..883bdd7
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_sw2.q.out
@@ -0,0 +1,450 @@
+PREHOOK: query: create table alltypesorc_int_n0 ( cint int, cstring string ) stored as ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@alltypesorc_int_n0
+POSTHOOK: query: create table alltypesorc_int_n0 ( cint int, cstring string ) stored as ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@alltypesorc_int_n0
+PREHOOK: query: create table srcpart_date_n6 (key string, value string) partitioned by (ds string ) stored as ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@srcpart_date_n6
+POSTHOOK: query: create table srcpart_date_n6 (key string, value string) partitioned by (ds string ) stored as ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@srcpart_date_n6
+PREHOOK: query: CREATE TABLE srcpart_small_n2(key1 STRING, value1 STRING) partitioned by (ds1 string) STORED as ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@srcpart_small_n2
+POSTHOOK: query: CREATE TABLE srcpart_small_n2(key1 STRING, value1 STRING) partitioned by (ds1 string) STORED as ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@srcpart_small_n2
+PREHOOK: query: alter table srcpart_date_n6 add partition (ds = "2008-04-08")
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Output: default@srcpart_date_n6
+POSTHOOK: query: alter table srcpart_date_n6 add partition (ds = "2008-04-08")
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Output: default@srcpart_date_n6
+POSTHOOK: Output: default@srcpart_date_n6@ds=2008-04-08
+PREHOOK: query: alter table srcpart_date_n6 add partition (ds = "2008-04-09")
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Output: default@srcpart_date_n6
+POSTHOOK: query: alter table srcpart_date_n6 add partition (ds = "2008-04-09")
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Output: default@srcpart_date_n6
+POSTHOOK: Output: default@srcpart_date_n6@ds=2008-04-09
+PREHOOK: query: alter table srcpart_small_n2 add partition (ds1 = "2008-04-08")
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Output: default@srcpart_small_n2
+POSTHOOK: query: alter table srcpart_small_n2 add partition (ds1 = "2008-04-08")
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Output: default@srcpart_small_n2
+POSTHOOK: Output: default@srcpart_small_n2@ds1=2008-04-08
+PREHOOK: query: alter table srcpart_small_n2 add partition (ds1 = "2008-04-09")
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Output: default@srcpart_small_n2
+POSTHOOK: query: alter table srcpart_small_n2 add partition (ds1 = "2008-04-09")
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Output: default@srcpart_small_n2
+POSTHOOK: Output: default@srcpart_small_n2@ds1=2008-04-09
+PREHOOK: query: insert overwrite table alltypesorc_int_n0 select cint, cstring1 from alltypesorc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: default@alltypesorc_int_n0
+POSTHOOK: query: insert overwrite table alltypesorc_int_n0 select cint, cstring1 from alltypesorc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@alltypesorc_int_n0
+POSTHOOK: Lineage: alltypesorc_int_n0.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_int_n0.cstring SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ]
+PREHOOK: query: insert overwrite table srcpart_date_n6 partition (ds = "2008-04-08" ) select key, value from srcpart where ds = "2008-04-08"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Output: default@srcpart_date_n6@ds=2008-04-08
+POSTHOOK: query: insert overwrite table srcpart_date_n6 partition (ds = "2008-04-08" ) select key, value from srcpart where ds = "2008-04-08"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Output: default@srcpart_date_n6@ds=2008-04-08
+POSTHOOK: Lineage: srcpart_date_n6 PARTITION(ds=2008-04-08).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: srcpart_date_n6 PARTITION(ds=2008-04-08).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert overwrite table srcpart_date_n6 partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+PREHOOK: Output: default@srcpart_date_n6@ds=2008-04-09
+POSTHOOK: query: insert overwrite table srcpart_date_n6 partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+POSTHOOK: Output: default@srcpart_date_n6@ds=2008-04-09
+POSTHOOK: Lineage: srcpart_date_n6 PARTITION(ds=2008-04-09).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: srcpart_date_n6 PARTITION(ds=2008-04-09).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert overwrite table srcpart_small_n2 partition (ds1 = "2008-04-09") select key, value from srcpart where ds = "2008-04-09" limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+PREHOOK: Output: default@srcpart_small_n2@ds1=2008-04-09
+POSTHOOK: query: insert overwrite table srcpart_small_n2 partition (ds1 = "2008-04-09") select key, value from srcpart where ds = "2008-04-09" limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+POSTHOOK: Output: default@srcpart_small_n2@ds1=2008-04-09
+POSTHOOK: Lineage: srcpart_small_n2 PARTITION(ds1=2008-04-09).key1 SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: srcpart_small_n2 PARTITION(ds1=2008-04-09).value1 SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: analyze table alltypesorc_int_n0 compute statistics for columns
+PREHOOK: type: ANALYZE_TABLE
+PREHOOK: Input: default@alltypesorc_int_n0
+PREHOOK: Output: default@alltypesorc_int_n0
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table alltypesorc_int_n0 compute statistics for columns
+POSTHOOK: type: ANALYZE_TABLE
+POSTHOOK: Input: default@alltypesorc_int_n0
+POSTHOOK: Output: default@alltypesorc_int_n0
+#### A masked pattern was here ####
+PREHOOK: query: analyze table srcpart_date_n6 compute statistics for columns
+PREHOOK: type: ANALYZE_TABLE
+PREHOOK: Input: default@srcpart_date_n6
+PREHOOK: Input: default@srcpart_date_n6@ds=2008-04-08
+PREHOOK: Input: default@srcpart_date_n6@ds=2008-04-09
+PREHOOK: Output: default@srcpart_date_n6
+PREHOOK: Output: default@srcpart_date_n6@ds=2008-04-08
+PREHOOK: Output: default@srcpart_date_n6@ds=2008-04-09
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table srcpart_date_n6 compute statistics for columns
+POSTHOOK: type: ANALYZE_TABLE
+POSTHOOK: Input: default@srcpart_date_n6
+POSTHOOK: Input: default@srcpart_date_n6@ds=2008-04-08
+POSTHOOK: Input: default@srcpart_date_n6@ds=2008-04-09
+POSTHOOK: Output: default@srcpart_date_n6
+POSTHOOK: Output: default@srcpart_date_n6@ds=2008-04-08
+POSTHOOK: Output: default@srcpart_date_n6@ds=2008-04-09
+#### A masked pattern was here ####
+PREHOOK: query: analyze table srcpart_small_n2 compute statistics for columns
+PREHOOK: type: ANALYZE_TABLE
+PREHOOK: Input: default@srcpart_small_n2
+PREHOOK: Input: default@srcpart_small_n2@ds1=2008-04-08
+PREHOOK: Input: default@srcpart_small_n2@ds1=2008-04-09
+PREHOOK: Output: default@srcpart_small_n2
+PREHOOK: Output: default@srcpart_small_n2@ds1=2008-04-08
+PREHOOK: Output: default@srcpart_small_n2@ds1=2008-04-09
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table srcpart_small_n2 compute statistics for columns
+POSTHOOK: type: ANALYZE_TABLE
+POSTHOOK: Input: default@srcpart_small_n2
+POSTHOOK: Input: default@srcpart_small_n2@ds1=2008-04-08
+POSTHOOK: Input: default@srcpart_small_n2@ds1=2008-04-09
+POSTHOOK: Output: default@srcpart_small_n2
+POSTHOOK: Output: default@srcpart_small_n2@ds1=2008-04-08
+POSTHOOK: Output: default@srcpart_small_n2@ds1=2008-04-09
+#### A masked pattern was here ####
+PREHOOK: query: EXPLAIN
+SELECT count(*)
+ FROM (SELECT * FROM srcpart_date_n6 WHERE ds = "2008-04-09") `srcpart_date_n6`
+ JOIN (SELECT * FROM srcpart_small_n2 WHERE ds1 = "2008-04-08") `srcpart_small_n2`
+ ON (srcpart_date_n6.key = srcpart_small_n2.key1)
+ JOIN (
+ SELECT *
+ FROM (SELECT * FROM alltypesorc_int_n0 WHERE cint = 10) `alltypesorc_int_n0`
+ JOIN (SELECT * FROM srcpart_small_n2) `srcpart_small_n2`
+ ON (alltypesorc_int_n0.cstring = srcpart_small_n2.key1)) b
+ ON (srcpart_small_n2.key1 = b.cstring)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT count(*)
+ FROM (SELECT * FROM srcpart_date_n6 WHERE ds = "2008-04-09") `srcpart_date_n6`
+ JOIN (SELECT * FROM srcpart_small_n2 WHERE ds1 = "2008-04-08") `srcpart_small_n2`
+ ON (srcpart_date_n6.key = srcpart_small_n2.key1)
+ JOIN (
+ SELECT *
+ FROM (SELECT * FROM alltypesorc_int_n0 WHERE cint = 10) `alltypesorc_int_n0`
+ JOIN (SELECT * FROM srcpart_small_n2) `srcpart_small_n2`
+ ON (alltypesorc_int_n0.cstring = srcpart_small_n2.key1)) b
+ ON (srcpart_small_n2.key1 = b.cstring)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Reducer 6 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE)
+ Map 11 <- Reducer 10 (BROADCAST_EDGE), Reducer 6 (BROADCAST_EDGE)
+ Reducer 10 <- Map 7 (CUSTOM_SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 8 (ONE_TO_ONE_EDGE)
+ Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
+ Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE)
+ Reducer 8 <- Map 11 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
+ Reducer 9 <- Reducer 8 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: srcpart_date_n6
+ filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_20_srcpart_small_n2_key1_min) AND DynamicValue(RS_20_srcpart_small_n2_key1_max) and in_bloom_filter(key, DynamicValue(RS_20_srcpart_small_n2_key1_bloom_filter))) and (key BETWEEN DynamicValue(RS_25_alltypesorc_int_n0_cstring_min) AND DynamicValue(RS_25_alltypesorc_int_n0_cstring_max) and in_bloom_filter(key, DynamicValue(RS_25_alltypesorc_int_n0_cstring_bloom_filter)))) (type: boolean)
+ Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: ((key BETWEEN DynamicValue(RS_20_srcpart_small_n2_key1_min) AND DynamicValue(RS_20_srcpart_small_n2_key1_max) and in_bloom_filter(key, DynamicValue(RS_20_srcpart_small_n2_key1_bloom_filter))) and (key BETWEEN DynamicValue(RS_25_alltypesorc_int_n0_cstring_min) AND DynamicValue(RS_25_alltypesorc_int_n0_cstring_max) and in_bloom_filter(key, DynamicValue(RS_25_alltypesorc_int_n0_cstring_bloom_filter))) and key is not null) (type: boolean)
+ Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 11
+ Map Operator Tree:
+ TableScan
+ alias: srcpart_small_n2
+ filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(RS_12_alltypesorc_int_n0_cstring_min) AND DynamicValue(RS_12_alltypesorc_int_n0_cstring_max) and in_bloom_filter(key1, DynamicValue(RS_12_alltypesorc_int_n0_cstring_bloom_filter))) and (key1 BETWEEN DynamicValue(RS_20_srcpart_small_n2_key1_min) AND DynamicValue(RS_20_srcpart_small_n2_key1_max) and in_bloom_filter(key1, DynamicValue(RS_20_srcpart_small_n2_key1_bloom_filter)))) (type: boolean)
+ Statistics: Num rows: 20 Data size: 1740 Basic stats: PARTIAL Column stats: PARTIAL
+ Filter Operator
+ predicate: ((key1 BETWEEN DynamicValue(RS_12_alltypesorc_int_n0_cstring_min) AND DynamicValue(RS_12_alltypesorc_int_n0_cstring_max) and in_bloom_filter(key1, DynamicValue(RS_12_alltypesorc_int_n0_cstring_bloom_filter))) and (key1 BETWEEN DynamicValue(RS_20_srcpart_small_n2_key1_min) AND DynamicValue(RS_20_srcpart_small_n2_key1_max) and in_bloom_filter(key1, DynamicValue(RS_20_srcpart_small_n2_key1_bloom_filter))) and key1 is not null) (type: boolean)
+ Statistics: Num rows: 20 Data size: 1740 Basic stats: PARTIAL Column stats: PARTIAL
+ Select Operator
+ expressions: key1 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 20 Data size: 1740 Basic stats: PARTIAL Column stats: PARTIAL
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 20 Data size: 1740 Basic stats: PARTIAL Column stats: PARTIAL
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: srcpart_small_n2
+ filterExpr: key1 is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 87 Basic stats: PARTIAL Column stats: COMPLETE
+ Filter Operator
+ predicate: key1 is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 87 Basic stats: PARTIAL Column stats: COMPLETE
+ Select Operator
+ expressions: key1 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 87 Basic stats: PARTIAL Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 87 Basic stats: PARTIAL Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 87 Basic stats: PARTIAL Column stats: COMPLETE
+ Group By Operator
+ aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 639 Basic stats: PARTIAL Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 639 Basic stats: PARTIAL Column stats: COMPLETE
+ value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc_int_n0
+ filterExpr: ((cint = 10) and cstring is not null) (type: boolean)
+ Statistics: Num rows: 12288 Data size: 899146 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: ((cint = 10) and cstring is not null) (type: boolean)
+ Statistics: Num rows: 1 Data size: 98 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: cstring (type: string)
+ outputColumnNames: _col1
+ Statistics: Num rows: 1 Data size: 98 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 1 Data size: 98 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col1 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 10
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1)
+ mode: final
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col3
+ Statistics: Num rows: 1100 Data size: 95700 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col3 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col3 (type: string)
+ Statistics: Num rows: 1100 Data size: 95700 Basic stats: PARTIAL Column stats: NONE
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col3 (type: string)
+ 1 _col1 (type: string)
+ Statistics: Num rows: 1210 Data size: 105270 Basic stats: PARTIAL Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reducer 4
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 6
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1)
+ mode: final
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 639 Basic stats: PARTIAL Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 639 Basic stats: PARTIAL Column stats: COMPLETE
+ value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 639 Basic stats: PARTIAL Column stats: COMPLETE
+ value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
+ Reducer 8
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col1
+ Statistics: Num rows: 22 Data size: 1914 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 22 Data size: 1914 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 22 Data size: 1914 Basic stats: PARTIAL Column stats: NONE
+ Group By Operator
+ aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=22)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 552 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 552 Basic stats: PARTIAL Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
+ Reducer 9
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=22)
+ mode: final
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 552 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 552 Basic stats: PARTIAL Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: drop table srcpart_date_n6
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@srcpart_date_n6
+PREHOOK: Output: default@srcpart_date_n6
+POSTHOOK: query: drop table srcpart_date_n6
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@srcpart_date_n6
+POSTHOOK: Output: default@srcpart_date_n6
+PREHOOK: query: drop table srcpart_small_n2
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@srcpart_small_n2
+PREHOOK: Output: default@srcpart_small_n2
+POSTHOOK: query: drop table srcpart_small_n2
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@srcpart_small_n2
+POSTHOOK: Output: default@srcpart_small_n2
+PREHOOK: query: drop table alltypesorc_int_n0
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@alltypesorc_int_n0
+PREHOOK: Output: default@alltypesorc_int_n0
+POSTHOOK: query: drop table alltypesorc_int_n0
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@alltypesorc_int_n0
+POSTHOOK: Output: default@alltypesorc_int_n0
http://git-wip-us.apache.org/repos/asf/hive/blob/ab9e954d/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
index f87fe36..6a2ae62 100644
--- a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
@@ -5347,8 +5347,8 @@ Stage-0
Stage-1
Map 3 llap
File Output Operator [FS_21]
- Map Join Operator [MAPJOIN_67] (rows=2 width=404)
- Conds:RS_16._col0=RS_17._col0(Inner),RS_17._col0=MAPJOIN_66._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"]
+ Map Join Operator [MAPJOIN_71] (rows=2 width=404)
+ Conds:RS_16._col0=RS_17._col0(Inner),RS_17._col0=MAPJOIN_70._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"]
<-Map 1 [BROADCAST_EDGE] llap
BROADCAST [RS_16]
PartitionCols:_col0
@@ -5367,7 +5367,7 @@ Stage-0
predicate:key is not null
TableScan [TS_3] (rows=1 width=368)
default@t2_n70,b,Tbl:COMPLETE,Col:NONE,Output:["key","val"]
- <-Map Join Operator [MAPJOIN_66] (rows=1 width=404)
+ <-Map Join Operator [MAPJOIN_70] (rows=1 width=404)
Conds:SEL_8._col0=RS_13._col0(Inner),Output:["_col0","_col1","_col2","_col3"]
<-Map 4 [BROADCAST_EDGE] llap
BROADCAST [RS_13]
@@ -5408,8 +5408,8 @@ Stage-0
Stage-1
Map 3 llap
File Output Operator [FS_21]
- Map Join Operator [MAPJOIN_67] (rows=2 width=404)
- Conds:RS_16._col0=RS_17._col0(Inner),RS_17._col0=MAPJOIN_66._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"]
+ Map Join Operator [MAPJOIN_71] (rows=2 width=404)
+ Conds:RS_16._col0=RS_17._col0(Inner),RS_17._col0=MAPJOIN_70._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"]
<-Map 1 [BROADCAST_EDGE] llap
BROADCAST [RS_16]
PartitionCols:_col0
@@ -5428,7 +5428,7 @@ Stage-0
predicate:key is not null
TableScan [TS_3] (rows=1 width=368)
default@t2_n70,b,Tbl:COMPLETE,Col:NONE,Output:["key","val"]
- <-Map Join Operator [MAPJOIN_66] (rows=1 width=404)
+ <-Map Join Operator [MAPJOIN_70] (rows=1 width=404)
Conds:SEL_8._col0=RS_13._col0(Inner),Output:["_col0","_col1","_col2","_col3"]
<-Map 4 [BROADCAST_EDGE] llap
BROADCAST [RS_13]
http://git-wip-us.apache.org/repos/asf/hive/blob/ab9e954d/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out b/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out
index 6987a96..74fc2e8 100644
--- a/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out
+++ b/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out
@@ -424,7 +424,7 @@ POSTHOOK: type: ANALYZE_TABLE
POSTHOOK: Input: default@l3_monthly_dw_dimplan
POSTHOOK: Output: default@l3_monthly_dw_dimplan
#### A masked pattern was here ####
-Warning: Shuffle Join MERGEJOIN[47][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[48][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product
PREHOOK: query: EXPLAIN EXTENDED
SELECT DW.PROJECT_OBJECT_ID, S1.PLAN_KEY as PLAN_KEY, S2.PROJECT_KEY AS PROJECT_KEY
FROM l3_clarity__L3_SNAP_NUMBER_2018022300104 snap inner join
@@ -873,7 +873,7 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Shuffle Join MERGEJOIN[47][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[48][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product
PREHOOK: query: SELECT DW.PROJECT_OBJECT_ID, S1.PLAN_KEY as PLAN_KEY, S2.PROJECT_KEY AS PROJECT_KEY
FROM l3_clarity__L3_SNAP_NUMBER_2018022300104 snap inner join
l3_clarity__L3_MONTHLY_DW_FACTPLAN_DW_STG_2018022300104_1 DW on 1=1
@@ -915,7 +915,7 @@ POSTHOOK: Input: default@l3_monthly_dw_dimplan
7147200 NULL 27114
7147200 NULL 27114
7147200 NULL 27114
-Warning: Shuffle Join MERGEJOIN[47][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[48][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product
PREHOOK: query: EXPLAIN EXTENDED
SELECT DW.PROJECT_OBJECT_ID, S1.PLAN_KEY as PLAN_KEY, S2.PROJECT_KEY AS PROJECT_KEY
FROM l3_clarity__L3_SNAP_NUMBER_2018022300104 snap inner join
@@ -1365,7 +1365,7 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Shuffle Join MERGEJOIN[47][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[48][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product
PREHOOK: query: SELECT DW.PROJECT_OBJECT_ID, S1.PLAN_KEY as PLAN_KEY, S2.PROJECT_KEY AS PROJECT_KEY
FROM l3_clarity__L3_SNAP_NUMBER_2018022300104 snap inner join
l3_clarity__L3_MONTHLY_DW_FACTPLAN_DW_STG_2018022300104_1 DW on 1=1
http://git-wip-us.apache.org/repos/asf/hive/blob/ab9e954d/ql/src/test/results/clientpositive/perf/tez/query1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/query1.q.out b/ql/src/test/results/clientpositive/perf/tez/query1.q.out
index 579940c..58c422d 100644
--- a/ql/src/test/results/clientpositive/perf/tez/query1.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/query1.q.out
@@ -63,10 +63,10 @@ Stage-0
limit:100
Stage-1
Reducer 7 vectorized
- File Output Operator [FS_159]
- Limit [LIM_158] (rows=100 width=860)
+ File Output Operator [FS_161]
+ Limit [LIM_160] (rows=100 width=860)
Number of rows:100
- Select Operator [SEL_157] (rows=32266667 width=860)
+ Select Operator [SEL_159] (rows=32266667 width=860)
Output:["_col0"]
<-Reducer 6 [SIMPLE_EDGE]
SHUFFLE [RS_50]
@@ -74,96 +74,96 @@ Stage-0
Output:["_col0"]
Filter Operator [FIL_48] (rows=32266667 width=860)
predicate:(_col2 > _col7)
- Merge Join Operator [MERGEJOIN_132] (rows=96800003 width=860)
- Conds:RS_45._col1=RS_156._col1(Inner),Output:["_col2","_col6","_col7"]
+ Merge Join Operator [MERGEJOIN_134] (rows=96800003 width=860)
+ Conds:RS_45._col1=RS_158._col1(Inner),Output:["_col2","_col6","_col7"]
<-Reducer 5 [SIMPLE_EDGE]
SHUFFLE [RS_45]
PartitionCols:_col1
- Merge Join Operator [MERGEJOIN_130] (rows=88000001 width=860)
- Conds:RS_42._col0=RS_151._col0(Inner),Output:["_col1","_col2","_col6"]
+ Merge Join Operator [MERGEJOIN_132] (rows=88000001 width=860)
+ Conds:RS_42._col0=RS_153._col0(Inner),Output:["_col1","_col2","_col6"]
<-Map 12 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_151]
+ SHUFFLE [RS_153]
PartitionCols:_col0
- Select Operator [SEL_150] (rows=80000000 width=860)
+ Select Operator [SEL_152] (rows=80000000 width=860)
Output:["_col0","_col1"]
- Filter Operator [FIL_149] (rows=80000000 width=860)
+ Filter Operator [FIL_151] (rows=80000000 width=860)
predicate:c_customer_sk is not null
TableScan [TS_17] (rows=80000000 width=860)
default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_customer_id"]
<-Reducer 4 [SIMPLE_EDGE]
SHUFFLE [RS_42]
PartitionCols:_col0
- Merge Join Operator [MERGEJOIN_129] (rows=34842647 width=77)
- Conds:RS_145._col1=RS_148._col0(Inner),Output:["_col0","_col1","_col2"]
+ Merge Join Operator [MERGEJOIN_131] (rows=34842647 width=77)
+ Conds:RS_147._col1=RS_150._col0(Inner),Output:["_col0","_col1","_col2"]
<-Map 11 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_148]
+ SHUFFLE [RS_150]
PartitionCols:_col0
- Select Operator [SEL_147] (rows=852 width=1910)
+ Select Operator [SEL_149] (rows=852 width=1910)
Output:["_col0"]
- Filter Operator [FIL_146] (rows=852 width=1910)
+ Filter Operator [FIL_148] (rows=852 width=1910)
predicate:((s_state = 'NM') and s_store_sk is not null)
TableScan [TS_14] (rows=1704 width=1910)
default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_state"]
<-Reducer 3 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_145]
+ SHUFFLE [RS_147]
PartitionCols:_col1
- Select Operator [SEL_144] (rows=31675133 width=77)
+ Select Operator [SEL_146] (rows=31675133 width=77)
Output:["_col0","_col1","_col2"]
- Group By Operator [GBY_143] (rows=31675133 width=77)
+ Group By Operator [GBY_145] (rows=31675133 width=77)
Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1
<-Reducer 2 [SIMPLE_EDGE]
SHUFFLE [RS_11]
PartitionCols:_col0, _col1
Group By Operator [GBY_10] (rows=63350266 width=77)
Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col2, _col1
- Merge Join Operator [MERGEJOIN_128] (rows=63350266 width=77)
- Conds:RS_137._col0=RS_141._col0(Inner),Output:["_col1","_col2","_col3"]
+ Merge Join Operator [MERGEJOIN_130] (rows=63350266 width=77)
+ Conds:RS_139._col0=RS_143._col0(Inner),Output:["_col1","_col2","_col3"]
<-Map 1 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_137]
+ SHUFFLE [RS_139]
PartitionCols:_col0
- Select Operator [SEL_135] (rows=57591150 width=77)
+ Select Operator [SEL_137] (rows=57591150 width=77)
Output:["_col0","_col1","_col2","_col3"]
- Filter Operator [FIL_133] (rows=57591150 width=77)
+ Filter Operator [FIL_135] (rows=57591150 width=77)
predicate:(sr_customer_sk is not null and sr_returned_date_sk is not null and sr_store_sk is not null)
TableScan [TS_0] (rows=57591150 width=77)
default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_returned_date_sk","sr_customer_sk","sr_store_sk","sr_fee"]
<-Map 10 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_141]
+ SHUFFLE [RS_143]
PartitionCols:_col0
- Select Operator [SEL_140] (rows=36524 width=1119)
+ Select Operator [SEL_142] (rows=36524 width=1119)
Output:["_col0"]
- Filter Operator [FIL_139] (rows=36524 width=1119)
+ Filter Operator [FIL_141] (rows=36524 width=1119)
predicate:((d_year = 2000) and d_date_sk is not null)
TableScan [TS_3] (rows=73049 width=1119)
default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"]
<-Reducer 9 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_156]
+ SHUFFLE [RS_158]
PartitionCols:_col1
- Select Operator [SEL_155] (rows=15837566 width=77)
+ Select Operator [SEL_157] (rows=15837566 width=77)
Output:["_col0","_col1"]
- Group By Operator [GBY_154] (rows=15837566 width=77)
+ Group By Operator [GBY_156] (rows=15837566 width=77)
Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","count(_col2)"],keys:_col1
- Select Operator [SEL_153] (rows=31675133 width=77)
+ Select Operator [SEL_155] (rows=31675133 width=77)
Output:["_col1","_col2"]
- Group By Operator [GBY_152] (rows=31675133 width=77)
+ Group By Operator [GBY_154] (rows=31675133 width=77)
Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1
<-Reducer 8 [SIMPLE_EDGE]
SHUFFLE [RS_31]
PartitionCols:_col0
Group By Operator [GBY_30] (rows=63350266 width=77)
Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col2, _col1
- Merge Join Operator [MERGEJOIN_131] (rows=63350266 width=77)
- Conds:RS_138._col0=RS_142._col0(Inner),Output:["_col1","_col2","_col3"]
+ Merge Join Operator [MERGEJOIN_133] (rows=63350266 width=77)
+ Conds:RS_140._col0=RS_144._col0(Inner),Output:["_col1","_col2","_col3"]
<-Map 1 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_138]
+ SHUFFLE [RS_140]
PartitionCols:_col0
- Select Operator [SEL_136] (rows=57591150 width=77)
+ Select Operator [SEL_138] (rows=57591150 width=77)
Output:["_col0","_col1","_col2","_col3"]
- Filter Operator [FIL_134] (rows=57591150 width=77)
+ Filter Operator [FIL_136] (rows=57591150 width=77)
predicate:(sr_returned_date_sk is not null and sr_store_sk is not null)
Please refer to the previous TableScan [TS_0]
<-Map 10 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_142]
+ SHUFFLE [RS_144]
PartitionCols:_col0
- Please refer to the previous Select Operator [SEL_140]
+ Please refer to the previous Select Operator [SEL_142]