You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2018/08/01 21:30:38 UTC
[5/5] hive git commit: HIVE-20090 : Extend creation of semijoin
reduction filters to be able to discover new opportunities (Jesus Camacho
Rodriguez via Deepak Jaiswal)
HIVE-20090 : Extend creation of semijoin reduction filters to be able to discover new opportunities (Jesus Camacho Rodriguez via Deepak Jaiswal)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/18beea6a
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/18beea6a
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/18beea6a
Branch: refs/heads/branch-3
Commit: 18beea6a9454b496ee406e1cb1cc7a11e4073406
Parents: ccc3c9a
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Wed Jul 4 14:05:00 2018 -0700
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Wed Aug 1 14:30:24 2018 -0700
----------------------------------------------------------------------
.../org/apache/hadoop/hive/conf/HiveConf.java | 4 +
.../test/resources/testconfiguration.properties | 1 +
.../hadoop/hive/ql/parse/TezCompiler.java | 444 +++++++++++-------
.../hive/ql/ppd/SyntheticJoinPredicate.java | 174 ++++++-
.../dynamic_semijoin_reduction_sw2.q | 59 +++
.../llap/dynamic_semijoin_reduction_sw2.q.out | 450 +++++++++++++++++++
.../llap/tez_fixed_bucket_pruning.q.out | 8 +-
.../spark_dynamic_partition_pruning_3.q.out | 2 +-
8 files changed, 977 insertions(+), 165 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/18beea6a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 89171ef..6f2690e 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -3731,6 +3731,10 @@ public class HiveConf extends Configuration {
"When dynamic pruning is enabled, joins on partition keys will be processed by sending\n" +
"events from the processing vertices to the Tez application master. These events will be\n" +
"used to prune unnecessary partitions."),
+ TEZ_DYNAMIC_PARTITION_PRUNING_EXTENDED("hive.tez.dynamic.partition.pruning.extended", true,
+ "Whether we should try to create additional opportunities for dynamic pruning, e.g., considering\n" +
+ "siblings that may not be created by normal dynamic pruning logic.\n" +
+ "Only works when dynamic pruning is enabled."),
TEZ_DYNAMIC_PARTITION_PRUNING_MAX_EVENT_SIZE("hive.tez.dynamic.partition.pruning.max.event.size", 1*1024*1024L,
"Maximum size of events sent by processors in dynamic pruning. If this size is crossed no pruning will take place."),
http://git-wip-us.apache.org/repos/asf/hive/blob/18beea6a/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 2d9dc35..342bcb0 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -918,6 +918,7 @@ minillaplocal.query.files=\
unionDistinct_3.q,\
vectorized_join46.q,\
vectorized_multi_output_select.q,\
+ dynamic_semijoin_reduction_sw2.q,\
partialdhj.q,\
stats_date.q,\
dst.q
http://git-wip-us.apache.org/repos/asf/hive/blob/18beea6a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
index cffa176..cc0bd07 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
@@ -178,44 +178,7 @@ public class TezCompiler extends TaskCompiler {
}
perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run reduce sink after join algorithm selection");
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
- runRemoveDynamicPruningOptimization(procCtx, inputs, outputs);
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run remove dynamic pruning by size");
-
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
- markSemiJoinForDPP(procCtx);
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Mark certain semijoin edges important based ");
-
- // Removing semijoin optimization when it may not be beneficial
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
- removeSemijoinOptimizationByBenefit(procCtx);
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove Semijoins based on cost benefits");
-
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
- // Remove any parallel edge between semijoin and mapjoin.
- removeSemijoinsParallelToMapJoin(procCtx);
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run the optimizations that use stats for optimization");
-
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
- // Remove semijoin optimization if it creates a cycle with mapside joins
- removeSemiJoinCyclesDueToMapsideJoins(procCtx);
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove semijoin optimizations if it creates a cycle with mapside join");
-
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
- // Remove semijoin optimization if SMB join is created.
- removeSemijoinOptimizationFromSMBJoins(procCtx);
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove semijoin optimizations if needed");
-
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
- // Remove bloomfilter if no stats generated
- removeSemiJoinIfNoStats(procCtx);
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove bloom filter optimizations if needed");
-
- perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
- // after the stats phase we might have some cyclic dependencies that we need
- // to take care of.
- runCycleAnalysisForPartitionPruning(procCtx, inputs, outputs);
- perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run cycle analysis for partition pruning");
+ semijoinRemovalBasedTransformations(procCtx, inputs, outputs);
perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
if(procCtx.conf.getBoolVar(ConfVars.HIVE_SHARED_WORK_OPTIMIZATION)) {
@@ -239,11 +202,6 @@ public class TezCompiler extends TaskCompiler {
private void runCycleAnalysisForPartitionPruning(OptimizeTezProcContext procCtx,
Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws SemanticException {
-
- if (!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING)) {
- return;
- }
-
boolean cycleFree = false;
while (!cycleFree) {
cycleFree = true;
@@ -463,6 +421,80 @@ public class TezCompiler extends TaskCompiler {
ogw.startWalking(topNodes, null);
}
+ private void semijoinRemovalBasedTransformations(OptimizeTezProcContext procCtx,
+ Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws SemanticException {
+ PerfLogger perfLogger = SessionState.getPerfLogger();
+
+ final boolean dynamicPartitionPruningEnabled =
+ procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING);
+ final boolean semiJoinReductionEnabled = dynamicPartitionPruningEnabled &&
+ procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION);
+ final boolean extendedReductionEnabled = dynamicPartitionPruningEnabled &&
+ procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING_EXTENDED);
+
+ perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
+ if (dynamicPartitionPruningEnabled) {
+ runRemoveDynamicPruningOptimization(procCtx, inputs, outputs);
+ }
+ perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run remove dynamic pruning by size");
+
+ perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
+ if (semiJoinReductionEnabled) {
+ markSemiJoinForDPP(procCtx);
+ }
+ perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Mark certain semijoin edges important based ");
+
+ // Removing semijoin optimization when it may not be beneficial
+ perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
+ if (semiJoinReductionEnabled) {
+ removeSemijoinOptimizationByBenefit(procCtx);
+ }
+ perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove Semijoins based on cost benefits");
+
+ // Remove any parallel edge between semijoin and mapjoin.
+ perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
+ if (semiJoinReductionEnabled) {
+ removeSemijoinsParallelToMapJoin(procCtx);
+ }
+ perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove any parallel edge between semijoin and mapjoin");
+
+ // Remove semijoin optimization if it creates a cycle with mapside joins
+ perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
+ if (semiJoinReductionEnabled && procCtx.parseContext.getRsToSemiJoinBranchInfo().size() != 0) {
+ removeSemiJoinCyclesDueToMapsideJoins(procCtx);
+ }
+ perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove semijoin optimizations if it creates a cycle with mapside join");
+
+ // Remove semijoin optimization if SMB join is created.
+ perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
+ if (semiJoinReductionEnabled && procCtx.parseContext.getRsToSemiJoinBranchInfo().size() != 0) {
+ removeSemijoinOptimizationFromSMBJoins(procCtx);
+ }
+ perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove semijoin optimizations if needed");
+
+ // Remove bloomfilter if no stats generated
+ perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
+ if (semiJoinReductionEnabled && procCtx.parseContext.getRsToSemiJoinBranchInfo().size() != 0) {
+ removeSemiJoinIfNoStats(procCtx);
+ }
+ perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove bloom filter optimizations if needed");
+
+ // after the stats phase we might have some cyclic dependencies that we need
+ // to take care of.
+ perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
+ if (dynamicPartitionPruningEnabled) {
+ runCycleAnalysisForPartitionPruning(procCtx, inputs, outputs);
+ }
+ perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run cycle analysis for partition pruning");
+
+ // remove redundant dpp and semijoins
+ perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER);
+ if (extendedReductionEnabled) {
+ removeRedundantSemijoinAndDpp(procCtx);
+ }
+ perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove redundant semijoin reduction");
+ }
+
private void runRemoveDynamicPruningOptimization(OptimizeTezProcContext procCtx,
Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws SemanticException {
@@ -748,11 +780,6 @@ public class TezCompiler extends TaskCompiler {
private static void removeSemijoinOptimizationFromSMBJoins(
OptimizeTezProcContext procCtx) throws SemanticException {
- if (!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION) ||
- procCtx.parseContext.getRsToSemiJoinBranchInfo().size() == 0) {
- return;
- }
-
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
opRules.put(
new RuleRegExp("R1", TableScanOperator.getOperatorName() + "%" +
@@ -834,11 +861,6 @@ public class TezCompiler extends TaskCompiler {
private static void removeSemiJoinCyclesDueToMapsideJoins(
OptimizeTezProcContext procCtx) throws SemanticException {
- if (!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION) ||
- procCtx.parseContext.getRsToSemiJoinBranchInfo().size() == 0) {
- return;
- }
-
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
opRules.put(
new RuleRegExp("R1", MapJoinOperator.getOperatorName() + "%" +
@@ -923,99 +945,18 @@ public class TezCompiler extends TaskCompiler {
}
}
- private static class SemiJoinRemovalIfNoStatsProc implements NodeProcessor {
-
- @Override
- public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
- Object... nodeOutputs) throws SemanticException {
- assert nd instanceof ReduceSinkOperator;
- ReduceSinkOperator rs = (ReduceSinkOperator) nd;
- ParseContext pCtx = ((OptimizeTezProcContext) procCtx).parseContext;
- SemiJoinBranchInfo sjInfo = pCtx.getRsToSemiJoinBranchInfo().get(rs);
- if (sjInfo == null) {
- // nothing to do here.
- return null;
- }
-
- // This is a semijoin branch. The stack should look like,
- // <Parent Ops>-SEL-GB1-RS1-GB2-RS2
- GroupByOperator gbOp = (GroupByOperator) (stack.get(stack.size() - 2));
- GroupByDesc gbDesc = gbOp.getConf();
- ArrayList<AggregationDesc> aggregationDescs = gbDesc.getAggregators();
- for (AggregationDesc agg : aggregationDescs) {
- if (!"bloom_filter".equals(agg.getGenericUDAFName())) {
- continue;
- }
-
- GenericUDAFBloomFilterEvaluator udafBloomFilterEvaluator =
- (GenericUDAFBloomFilterEvaluator) agg.getGenericUDAFEvaluator();
- if (udafBloomFilterEvaluator.hasHintEntries())
- {
- return null; // Created using hint, skip it
- }
-
- long expectedEntries = udafBloomFilterEvaluator.getExpectedEntries();
- if (expectedEntries == -1 || expectedEntries >
- pCtx.getConf().getLongVar(ConfVars.TEZ_MAX_BLOOM_FILTER_ENTRIES)) {
- if (sjInfo.getIsHint()) {
- throw new SemanticException("Removing hinted semijoin due to lack to stats" +
- " or exceeding max bloom filter entries");
- }
- // Remove the semijoin optimization branch along with ALL the mappings
- // The parent GB2 has all the branches. Collect them and remove them.
- for (Node node : gbOp.getChildren()) {
- ReduceSinkOperator rsFinal = (ReduceSinkOperator) node;
- TableScanOperator ts = pCtx.getRsToSemiJoinBranchInfo().
- get(rsFinal).getTsOp();
- if (LOG.isDebugEnabled()) {
- LOG.debug("expectedEntries=" + expectedEntries + ". "
- + "Either stats unavailable or expectedEntries exceeded max allowable bloomfilter size. "
- + "Removing semijoin "
- + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(ts));
- }
- GenTezUtils.removeBranch(rsFinal);
- GenTezUtils.removeSemiJoinOperator(pCtx, rsFinal, ts);
- }
- return null;
- }
- }
-
- // At this point, hinted semijoin case has been handled already
- // Check if big table is big enough that runtime filtering is
- // worth it.
- TableScanOperator ts = sjInfo.getTsOp();
- if (ts.getStatistics() != null) {
- long numRows = ts.getStatistics().getNumRows();
- if (numRows < pCtx.getConf().getLongVar(ConfVars.TEZ_BIGTABLE_MIN_SIZE_SEMIJOIN_REDUCTION)) {
- if (sjInfo.getShouldRemove()) {
- if (LOG.isDebugEnabled()) {
- LOG.debug("Insufficient rows (" + numRows + ") to justify semijoin optimization. Removing semijoin "
- + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(ts));
- }
- GenTezUtils.removeBranch(rs);
- GenTezUtils.removeSemiJoinOperator(pCtx, rs, ts);
- }
- }
- }
- return null;
- }
- }
-
private void removeSemiJoinIfNoStats(OptimizeTezProcContext procCtx)
throws SemanticException {
- if(!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION)) {
- // Not needed without semi-join reduction
- return;
- }
-
Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
opRules.put(
new RuleRegExp("R1", GroupByOperator.getOperatorName() + "%" +
ReduceSinkOperator.getOperatorName() + "%" +
GroupByOperator.getOperatorName() + "%" +
ReduceSinkOperator.getOperatorName() + "%"),
- new SemiJoinRemovalIfNoStatsProc());
- Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx);
+ new SemiJoinRemovalProc(true, false));
+ SemiJoinRemovalContext ctx =
+ new SemiJoinRemovalContext(procCtx.parseContext);
+ Dispatcher disp = new DefaultRuleDispatcher(null, opRules, ctx);
List<Node> topNodes = new ArrayList<Node>();
topNodes.addAll(procCtx.parseContext.getTopOps().values());
GraphWalker ogw = new PreOrderOnceWalker(disp);
@@ -1107,6 +1048,218 @@ public class TezCompiler extends TaskCompiler {
GraphWalker ogw = new PreOrderOnceWalker(disp);
ogw.startWalking(topNodes, null);
}
+
+ private class SemiJoinRemovalProc implements NodeProcessor {
+
+ private final boolean removeBasedOnStats;
+ private final boolean removeRedundant;
+
+ private SemiJoinRemovalProc (boolean removeBasedOnStats, boolean removeRedundant) {
+ this.removeBasedOnStats = removeBasedOnStats;
+ this.removeRedundant = removeRedundant;
+ }
+
+ @Override
+ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+ ReduceSinkOperator rs = (ReduceSinkOperator) nd;
+ SemiJoinRemovalContext rCtx = (SemiJoinRemovalContext) procCtx;
+ ParseContext pCtx = rCtx.parseContext;
+ SemiJoinBranchInfo sjInfo = pCtx.getRsToSemiJoinBranchInfo().get(rs);
+ if (sjInfo == null) {
+ // nothing to do here.
+ return null;
+ }
+ TableScanOperator targetTSOp = sjInfo.getTsOp();
+ ExprNodeDesc targetColExpr = pCtx.getRsToRuntimeValuesInfoMap().get(rs).getTsColExpr();
+
+ // This is a semijoin branch. The stack should look like,
+ // <Parent Ops>-SEL-GB1-RS1-GB2-RS2
+ GroupByOperator gbOp = (GroupByOperator) stack.get(stack.size() - 2);
+ GroupByDesc gbDesc = gbOp.getConf();
+ ArrayList<AggregationDesc> aggregationDescs = gbDesc.getAggregators();
+ for (AggregationDesc agg : aggregationDescs) {
+ if (!isBloomFilterAgg(agg)) {
+ continue;
+ }
+
+ GenericUDAFBloomFilterEvaluator udafBloomFilterEvaluator =
+ (GenericUDAFBloomFilterEvaluator) agg.getGenericUDAFEvaluator();
+ if (udafBloomFilterEvaluator.hasHintEntries()) {
+ return null; // Created using hint, skip it
+ }
+
+ if (removeBasedOnStats) {
+ long expectedEntries = udafBloomFilterEvaluator.getExpectedEntries();
+ if (expectedEntries == -1 || expectedEntries >
+ pCtx.getConf().getLongVar(ConfVars.TEZ_MAX_BLOOM_FILTER_ENTRIES)) {
+ if (sjInfo.getIsHint()) {
+ throw new SemanticException("Removing hinted semijoin due to lack to stats" +
+ " or exceeding max bloom filter entries");
+ }
+ // Remove the semijoin optimization branch along with ALL the mappings
+ // The parent GB2 has all the branches. Collect them and remove them.
+ for (Node node : gbOp.getChildren()) {
+ ReduceSinkOperator rsFinal = (ReduceSinkOperator) node;
+ TableScanOperator ts = pCtx.getRsToSemiJoinBranchInfo().
+ get(rsFinal).getTsOp();
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("expectedEntries=" + expectedEntries + ". "
+ + "Either stats unavailable or expectedEntries exceeded max allowable bloomfilter size. "
+ + "Removing semijoin "
+ + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(ts));
+ }
+ GenTezUtils.removeBranch(rsFinal);
+ GenTezUtils.removeSemiJoinOperator(pCtx, rsFinal, ts);
+ }
+ return null;
+ }
+ }
+ }
+
+ if (removeBasedOnStats) {
+ // At this point, hinted semijoin case has been handled already
+ // Check if big table is big enough that runtime filtering is
+ // worth it.
+ TableScanOperator ts = sjInfo.getTsOp();
+ if (ts.getStatistics() != null) {
+ long numRows = ts.getStatistics().getNumRows();
+ if (numRows < pCtx.getConf().getLongVar(ConfVars.TEZ_BIGTABLE_MIN_SIZE_SEMIJOIN_REDUCTION)) {
+ if (sjInfo.getShouldRemove()) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Insufficient rows (" + numRows + ") to justify semijoin optimization. Removing semijoin "
+ + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(ts));
+ }
+ GenTezUtils.removeBranch(rs);
+ GenTezUtils.removeSemiJoinOperator(pCtx, rs, ts);
+ }
+ }
+ }
+ }
+
+ if (removeRedundant) {
+ // Look for RS ops above the current semijoin branch
+ Set<ReduceSinkOperator> rsOps = OperatorUtils.findOperators(
+ ((Operator<?>) stack.get(stack.size() - 5)).getParentOperators().get(0),
+ ReduceSinkOperator.class);
+ for (Operator<?> otherRSOp : rsOps) {
+ SemiJoinBranchInfo otherSjInfo = pCtx.getRsToSemiJoinBranchInfo().get(otherRSOp);
+ // First conjunct prevents SJ RS from removing itself
+ if (otherRSOp != rs && otherSjInfo != null && otherSjInfo.getTsOp() == targetTSOp) {
+ if (rCtx.opsToRemove.containsKey(otherRSOp)) {
+ // We found siblings, since we are removing the other operator, no need to remove this one
+ continue;
+ }
+ ExprNodeDesc otherColExpr = pCtx.getRsToRuntimeValuesInfoMap().get(otherRSOp).getTsColExpr();
+ if (!otherColExpr.isSame(targetColExpr)) {
+ // Filter should be on the same column, otherwise we do not proceed
+ continue;
+ }
+ rCtx.opsToRemove.put(rs, targetTSOp);
+ break;
+ }
+ }
+ }
+
+ return null;
+ }
+ }
+
+ private static boolean isBloomFilterAgg(AggregationDesc agg) {
+ return "bloom_filter".equals(agg.getGenericUDAFName());
+ }
+
+ private static class DynamicPruningRemovalRedundantProc implements NodeProcessor {
+
+ @Override
+ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+ AppMasterEventOperator event = (AppMasterEventOperator) nd;
+ if (!(event.getConf() instanceof DynamicPruningEventDesc)) {
+ return null;
+ }
+
+ SemiJoinRemovalContext rCtx = (SemiJoinRemovalContext) procCtx;
+
+ DynamicPruningEventDesc desc = (DynamicPruningEventDesc) event.getConf();
+ TableScanOperator targetTSOp = desc.getTableScan();
+ String targetColumnName = desc.getTargetColumnName();
+
+ // Look for event ops above the current event op branch
+ Operator<?> op = event.getParentOperators().get(0);
+ while (op.getChildOperators().size() < 2) {
+ op = op.getParentOperators().get(0);
+ }
+ Set<AppMasterEventOperator> eventOps = OperatorUtils.findOperators(
+ op, AppMasterEventOperator.class);
+ for (AppMasterEventOperator otherEvent : eventOps) {
+ if (!(otherEvent.getConf() instanceof DynamicPruningEventDesc)) {
+ continue;
+ }
+ DynamicPruningEventDesc otherDesc = (DynamicPruningEventDesc) otherEvent.getConf();
+ if (otherEvent != event && otherDesc.getTableScan() == targetTSOp &&
+ otherDesc.getTargetColumnName().equals(targetColumnName)) {
+ if (rCtx.opsToRemove.containsKey(otherEvent)) {
+ // We found siblings, since we are removing the other operator, no need to remove this one
+ continue;
+ }
+ rCtx.opsToRemove.put(event, targetTSOp);
+ break;
+ }
+ }
+
+ return null;
+ }
+ }
+
+ private void removeRedundantSemijoinAndDpp(OptimizeTezProcContext procCtx)
+ throws SemanticException {
+ Map<Rule, NodeProcessor> opRules = new LinkedHashMap<>();
+ opRules.put(
+ new RuleRegExp("R1", GroupByOperator.getOperatorName() + "%" +
+ ReduceSinkOperator.getOperatorName() + "%" +
+ GroupByOperator.getOperatorName() + "%" +
+ ReduceSinkOperator.getOperatorName() + "%"),
+ new SemiJoinRemovalProc(false, true));
+ opRules.put(
+ new RuleRegExp("R2",
+ AppMasterEventOperator.getOperatorName() + "%"),
+ new DynamicPruningRemovalRedundantProc());
+
+ // Gather
+ SemiJoinRemovalContext ctx =
+ new SemiJoinRemovalContext(procCtx.parseContext);
+ Dispatcher disp = new DefaultRuleDispatcher(null, opRules, ctx);
+ List<Node> topNodes = new ArrayList<Node>();
+ topNodes.addAll(procCtx.parseContext.getTopOps().values());
+ GraphWalker ogw = new PreOrderOnceWalker(disp);
+ ogw.startWalking(topNodes, null);
+
+ // Remove
+ for (Map.Entry<Operator<?>, TableScanOperator> p : ctx.opsToRemove.entrySet()) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Removing redundant " + OperatorUtils.getOpNamePretty(p.getKey()) + " - " + OperatorUtils.getOpNamePretty(p.getValue()));
+ }
+ GenTezUtils.removeBranch(p.getKey());
+ if (p.getKey() instanceof AppMasterEventOperator) {
+ GenTezUtils.removeSemiJoinOperator(procCtx.parseContext, (AppMasterEventOperator) p.getKey(), p.getValue());
+ } else if (p.getKey() instanceof ReduceSinkOperator) {
+ GenTezUtils.removeSemiJoinOperator(procCtx.parseContext, (ReduceSinkOperator) p.getKey(), p.getValue());
+ } else {
+ throw new SemanticException("Unexpected error - type for branch could not be recognized");
+ }
+ }
+ }
+
+ private class SemiJoinRemovalContext implements NodeProcessorCtx {
+ private final ParseContext parseContext;
+ private final Map<Operator<?>, TableScanOperator> opsToRemove;
+
+ private SemiJoinRemovalContext(final ParseContext parseContext) {
+ this.parseContext = parseContext;
+ this.opsToRemove = new HashMap<>();
+ }
+ }
private boolean findParallelSemiJoinBranch(Operator<?> mapjoin, TableScanOperator bigTableTS,
ParseContext parseContext,
@@ -1196,9 +1349,8 @@ public class TezCompiler extends TaskCompiler {
*/
private void removeSemijoinsParallelToMapJoin(OptimizeTezProcContext procCtx)
throws SemanticException {
- if(!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION) ||
- !procCtx.conf.getBoolVar(ConfVars.HIVECONVERTJOIN) ||
- procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION_FOR_MAPJOIN)) {
+ if(!procCtx.conf.getBoolVar(ConfVars.HIVECONVERTJOIN) ||
+ procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION_FOR_MAPJOIN)) {
// Not needed without semi-join reduction or mapjoins or when semijoins
// are enabled for parallel mapjoins.
return;
@@ -1406,11 +1558,6 @@ public class TezCompiler extends TaskCompiler {
private void removeSemijoinOptimizationByBenefit(OptimizeTezProcContext procCtx)
throws SemanticException {
- if(!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION)) {
- // Not needed without semi-join reduction
- return;
- }
-
List<ReduceSinkOperator> semijoinRsToRemove = new ArrayList<ReduceSinkOperator>();
Map<ReduceSinkOperator, SemiJoinBranchInfo> map = procCtx.parseContext.getRsToSemiJoinBranchInfo();
double semijoinReductionThreshold = procCtx.conf.getFloatVar(
@@ -1467,11 +1614,6 @@ public class TezCompiler extends TaskCompiler {
private void markSemiJoinForDPP(OptimizeTezProcContext procCtx)
throws SemanticException {
- if(!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION)) {
- // Not needed without semi-join reduction
- return;
- }
-
// Stores the Tablescan operators processed to avoid redoing them.
Map<TableScanOperator, TableScanOperator> tsOps = new HashMap<>();
Map<ReduceSinkOperator, SemiJoinBranchInfo> map = procCtx.parseContext.getRsToSemiJoinBranchInfo();
http://git-wip-us.apache.org/repos/asf/hive/blob/18beea6a/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java
index dec2d1e..1f533bc 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java
@@ -26,6 +26,12 @@ import java.util.Map;
import java.util.Set;
import java.util.Stack;
+import org.apache.hadoop.hive.ql.exec.FilterOperator;
+import org.apache.hadoop.hive.ql.exec.GroupByOperator;
+import org.apache.hadoop.hive.ql.exec.OperatorUtils;
+import org.apache.hadoop.hive.ql.exec.SelectOperator;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
@@ -119,14 +125,20 @@ public class SyntheticJoinPredicate extends Transform {
private static class SyntheticContext implements NodeProcessorCtx {
ParseContext parseContext;
+ boolean extended;
public SyntheticContext(ParseContext pCtx) {
parseContext = pCtx;
+ extended = parseContext.getConf().getBoolVar(ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING_EXTENDED);
}
public ParseContext getParseContext() {
return parseContext;
}
+
+ public boolean isExtended() {
+ return extended;
+ }
}
private static class JoinSynthetic implements NodeProcessor {
@@ -134,6 +146,8 @@ public class SyntheticJoinPredicate extends Transform {
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
Object... nodeOutputs) throws SemanticException {
+ SyntheticContext sCtx = (SyntheticContext) procCtx;
+
@SuppressWarnings("unchecked")
CommonJoinOperator<JoinDesc> join = (CommonJoinOperator<JoinDesc>) nd;
@@ -161,9 +175,6 @@ public class SyntheticJoinPredicate extends Transform {
continue;
}
- if (LOG.isDebugEnabled()) {
- LOG.debug("Synthetic predicate: " + srcPos + " --> " + targetPos);
- }
ReduceSinkOperator target = (ReduceSinkOperator) parents.get(targetPos);
List<ExprNodeDesc> sourceKeys = source.getConf().getKeyCols();
List<ExprNodeDesc> targetKeys = target.getConf().getKeyCols();
@@ -175,8 +186,10 @@ public class SyntheticJoinPredicate extends Transform {
ExprNodeDesc syntheticExpr = null;
for (int i = 0; i < sourceKeys.size(); ++i) {
- List<ExprNodeDesc> inArgs = new ArrayList<ExprNodeDesc>();
- inArgs.add(sourceKeys.get(i));
+ final ExprNodeDesc sourceKey = sourceKeys.get(i);
+
+ List<ExprNodeDesc> inArgs = new ArrayList<>();
+ inArgs.add(sourceKey);
ExprNodeDynamicListDesc dynamicExpr =
new ExprNodeDynamicListDesc(targetKeys.get(i).getTypeInfo(), target, i);
@@ -186,17 +199,36 @@ public class SyntheticJoinPredicate extends Transform {
ExprNodeDesc syntheticInExpr =
ExprNodeGenericFuncDesc.newInstance(FunctionRegistry.getFunctionInfo("in")
.getGenericUDF(), inArgs);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Synthetic predicate in " + join + ": " + srcPos + " --> " + targetPos + " (" + syntheticInExpr + ")");
+ }
+ List<ExprNodeDesc> andArgs = new ArrayList<>();
if (syntheticExpr != null) {
- List<ExprNodeDesc> andArgs = new ArrayList<ExprNodeDesc>();
andArgs.add(syntheticExpr);
- andArgs.add(syntheticInExpr);
+ }
+ andArgs.add(syntheticInExpr);
+
+ if(sCtx.isExtended()) {
+ // Backtrack
+ List<ExprNodeDesc> newExprs = createDerivatives(target.getParentOperators().get(0), targetKeys.get(i), sourceKey);
+ if (!newExprs.isEmpty()) {
+ if (LOG.isDebugEnabled()) {
+ for (ExprNodeDesc expr : newExprs) {
+ LOG.debug("Additional synthetic predicate in " + join + ": " + srcPos + " --> " + targetPos + " (" + expr + ")");
+ }
+ }
+ andArgs.addAll(newExprs);
+ }
+ }
+ if (andArgs.size() < 2) {
+ syntheticExpr = syntheticInExpr;
+ } else {
+ // Create AND expression
syntheticExpr =
ExprNodeGenericFuncDesc.newInstance(FunctionRegistry.getFunctionInfo("and")
.getGenericUDF(), andArgs);
- } else {
- syntheticExpr = syntheticInExpr;
}
}
@@ -241,6 +273,129 @@ public class SyntheticJoinPredicate extends Transform {
}
return result;
}
+
+ private List<ExprNodeDesc> createDerivatives(final Operator<?> currentOp,
+ final ExprNodeDesc currentNode, final ExprNodeDesc sourceKey) throws SemanticException {
+ List<ExprNodeDesc> resultExprs = new ArrayList<>();
+ return createDerivatives(resultExprs, currentOp, currentNode, sourceKey) ? resultExprs : new ArrayList<>();
+ }
+
+ private boolean createDerivatives(final List<ExprNodeDesc> resultExprs, final Operator<?> op,
+ final ExprNodeDesc currentNode, final ExprNodeDesc sourceKey) throws SemanticException {
+ // 1. Obtain join operator upstream
+ Operator<?> currentOp = op;
+ while (!(currentOp instanceof CommonJoinOperator)) {
+ if (currentOp.getParentOperators() == null || currentOp.getParentOperators().size() != 1) {
+ // Cannot backtrack
+ currentOp = null;
+ break;
+ }
+ if (!(currentOp instanceof FilterOperator) &&
+ !(currentOp instanceof SelectOperator) &&
+ !(currentOp instanceof ReduceSinkOperator) &&
+ !(currentOp instanceof GroupByOperator)) {
+ // Operator not supported
+ currentOp = null;
+ break;
+ }
+ // Move the pointer
+ currentOp = currentOp.getParentOperators().get(0);
+ }
+ if (currentOp == null) {
+ // We did not find any join, we are done
+ return true;
+ }
+ CommonJoinOperator<JoinDesc> joinOp = (CommonJoinOperator) currentOp;
+
+ // 2. Backtrack expression to join output
+ final ExprNodeDesc joinExprNode = ExprNodeDescUtils.backtrack(currentNode, op, joinOp);
+ if (joinExprNode == null || !(joinExprNode instanceof ExprNodeColumnDesc)) {
+ // TODO: We can extend to other expression types
+ // We are done
+ return true;
+ }
+ final String columnRefJoinInput = ((ExprNodeColumnDesc)joinExprNode).getColumn();
+
+ // 3. Find input position in join for expression obtained
+ String columnOutputName = null;
+ for (Map.Entry<String, ExprNodeDesc> e : joinOp.getColumnExprMap().entrySet()) {
+ if (e.getValue() == joinExprNode) {
+ columnOutputName = e.getKey();
+ break;
+ }
+ }
+ if (columnOutputName == null) {
+ // Maybe the join is pruning columns, though it should not.
+ // In any case, we are done
+ return true;
+ }
+ final int srcPos = joinOp.getConf().getReversedExprs().get(columnOutputName);
+ final int[][] targets = getTargets(joinOp);
+ final ReduceSinkOperator rsOp = (ReduceSinkOperator) joinOp.getParentOperators().get(srcPos);
+
+ // 4. Find expression in input RS operator.
+ final Operator<?> rsOpInput = rsOp.getParentOperators().get(0);
+ final ExprNodeDesc rsOpInputExprNode = rsOp.getColumnExprMap().get(columnRefJoinInput);
+ if (rsOpInputExprNode == null) {
+ // Unexpected, we just bail out and we do not infer additional predicates
+ return false;
+ }
+ int posInRSOpKeys = -1;
+ for (int i = 0; i < rsOp.getConf().getKeyCols().size(); i++) {
+ if (rsOpInputExprNode.isSame(rsOp.getConf().getKeyCols().get(i))) {
+ posInRSOpKeys = i;
+ break;
+ }
+ }
+
+ // 5. If it is part of the key, we can create a new semijoin.
+ // In addition, we can do the same for siblings
+ if (posInRSOpKeys >= 0) {
+ // We pass the tests, we add it to the args for the AND expression
+ addParentReduceSink(resultExprs, rsOp, posInRSOpKeys, sourceKey);
+ for (int targetPos: targets[srcPos]) {
+ if (srcPos == targetPos) {
+ continue;
+ }
+ final ReduceSinkOperator otherRsOp = (ReduceSinkOperator) joinOp.getParentOperators().get(targetPos);
+ final Operator<?> otherRsOpInput = otherRsOp.getParentOperators().get(0);
+ // We pass the tests, we add it to the args for the AND expression
+ addParentReduceSink(resultExprs, otherRsOp, posInRSOpKeys, sourceKey);
+ // We propagate to operator below
+ boolean success = createDerivatives(
+ resultExprs, otherRsOpInput, otherRsOp.getConf().getKeyCols().get(posInRSOpKeys), sourceKey);
+ if (!success) {
+ // Something went wrong, bail out
+ return false;
+ }
+ }
+ }
+
+ // 6. Whether it was part of the key or of the value, if we reach here, we can at least
+ // continue propagating to operators below
+ boolean success = createDerivatives(
+ resultExprs, rsOpInput, rsOpInputExprNode, sourceKey);
+ if (!success) {
+ // Something went wrong, bail out
+ return false;
+ }
+
+ // 7. We are done, success
+ return true;
+ }
+
+ private void addParentReduceSink(final List<ExprNodeDesc> andArgs, final ReduceSinkOperator rsOp,
+ final int keyIndex, final ExprNodeDesc sourceKey) throws SemanticException {
+ ExprNodeDynamicListDesc dynamicExpr =
+ new ExprNodeDynamicListDesc(rsOp.getConf().getKeyCols().get(keyIndex).getTypeInfo(), rsOp, keyIndex);
+ // Create synthetic IN expression
+ List<ExprNodeDesc> inArgs = new ArrayList<>();
+ inArgs.add(sourceKey);
+ inArgs.add(dynamicExpr);
+ ExprNodeDesc newNode = ExprNodeGenericFuncDesc.newInstance(
+ FunctionRegistry.getFunctionInfo("in").getGenericUDF(), inArgs);
+ andArgs.add(newNode);
+ }
}
private static class Vectors {
@@ -285,4 +440,5 @@ public class SyntheticJoinPredicate extends Transform {
}
}
}
+
}
http://git-wip-us.apache.org/repos/asf/hive/blob/18beea6a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_sw2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_sw2.q b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_sw2.q
new file mode 100644
index 0000000..910119d
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_sw2.q
@@ -0,0 +1,59 @@
+--! qt:dataset:srcpart
+--! qt:dataset:alltypesorc
+set hive.compute.query.using.stats=false;
+set hive.mapred.mode=nonstrict;
+set hive.explain.user=false;
+set hive.optimize.ppd=true;
+set hive.ppd.remove.duplicatefilters=true;
+set hive.tez.dynamic.partition.pruning=true;
+set hive.tez.dynamic.semijoin.reduction=true;
+set hive.optimize.metadataonly=false;
+set hive.optimize.index.filter=true;
+set hive.stats.autogather=true;
+set hive.tez.bigtable.minsize.semijoin.reduction=1;
+set hive.tez.min.bloom.filter.entries=1;
+set hive.stats.fetch.column.stats=true;
+set hive.cbo.enable=false;
+set hive.reorder.nway.joins=false;
+set hive.merge.nway.joins=false;
+
+-- Create Tables
+create table alltypesorc_int_n0 ( cint int, cstring string ) stored as ORC;
+create table srcpart_date_n6 (key string, value string) partitioned by (ds string ) stored as ORC;
+CREATE TABLE srcpart_small_n2(key1 STRING, value1 STRING) partitioned by (ds1 string) STORED as ORC;
+
+-- Add Partitions
+alter table srcpart_date_n6 add partition (ds = "2008-04-08");
+alter table srcpart_date_n6 add partition (ds = "2008-04-09");
+
+alter table srcpart_small_n2 add partition (ds1 = "2008-04-08");
+alter table srcpart_small_n2 add partition (ds1 = "2008-04-09");
+
+-- Load
+insert overwrite table alltypesorc_int_n0 select cint, cstring1 from alltypesorc;
+insert overwrite table srcpart_date_n6 partition (ds = "2008-04-08" ) select key, value from srcpart where ds = "2008-04-08";
+insert overwrite table srcpart_date_n6 partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09";
+insert overwrite table srcpart_small_n2 partition (ds1 = "2008-04-09") select key, value from srcpart where ds = "2008-04-09" limit 20;
+
+set hive.tez.dynamic.semijoin.reduction=false;
+
+analyze table alltypesorc_int_n0 compute statistics for columns;
+analyze table srcpart_date_n6 compute statistics for columns;
+analyze table srcpart_small_n2 compute statistics for columns;
+
+set hive.tez.dynamic.semijoin.reduction=true;
+EXPLAIN
+SELECT count(*)
+ FROM (SELECT * FROM srcpart_date_n6 WHERE ds = "2008-04-09") `srcpart_date_n6`
+ JOIN (SELECT * FROM srcpart_small_n2 WHERE ds1 = "2008-04-08") `srcpart_small_n2`
+ ON (srcpart_date_n6.key = srcpart_small_n2.key1)
+ JOIN (
+ SELECT *
+ FROM (SELECT * FROM alltypesorc_int_n0 WHERE cint = 10) `alltypesorc_int_n0`
+ JOIN (SELECT * FROM srcpart_small_n2) `srcpart_small_n2`
+ ON (alltypesorc_int_n0.cstring = srcpart_small_n2.key1)) b
+ ON (srcpart_small_n2.key1 = b.cstring);
+
+drop table srcpart_date_n6;
+drop table srcpart_small_n2;
+drop table alltypesorc_int_n0;
http://git-wip-us.apache.org/repos/asf/hive/blob/18beea6a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_sw2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_sw2.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_sw2.q.out
new file mode 100644
index 0000000..883bdd7
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_sw2.q.out
@@ -0,0 +1,450 @@
+PREHOOK: query: create table alltypesorc_int_n0 ( cint int, cstring string ) stored as ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@alltypesorc_int_n0
+POSTHOOK: query: create table alltypesorc_int_n0 ( cint int, cstring string ) stored as ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@alltypesorc_int_n0
+PREHOOK: query: create table srcpart_date_n6 (key string, value string) partitioned by (ds string ) stored as ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@srcpart_date_n6
+POSTHOOK: query: create table srcpart_date_n6 (key string, value string) partitioned by (ds string ) stored as ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@srcpart_date_n6
+PREHOOK: query: CREATE TABLE srcpart_small_n2(key1 STRING, value1 STRING) partitioned by (ds1 string) STORED as ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@srcpart_small_n2
+POSTHOOK: query: CREATE TABLE srcpart_small_n2(key1 STRING, value1 STRING) partitioned by (ds1 string) STORED as ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@srcpart_small_n2
+PREHOOK: query: alter table srcpart_date_n6 add partition (ds = "2008-04-08")
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Output: default@srcpart_date_n6
+POSTHOOK: query: alter table srcpart_date_n6 add partition (ds = "2008-04-08")
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Output: default@srcpart_date_n6
+POSTHOOK: Output: default@srcpart_date_n6@ds=2008-04-08
+PREHOOK: query: alter table srcpart_date_n6 add partition (ds = "2008-04-09")
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Output: default@srcpart_date_n6
+POSTHOOK: query: alter table srcpart_date_n6 add partition (ds = "2008-04-09")
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Output: default@srcpart_date_n6
+POSTHOOK: Output: default@srcpart_date_n6@ds=2008-04-09
+PREHOOK: query: alter table srcpart_small_n2 add partition (ds1 = "2008-04-08")
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Output: default@srcpart_small_n2
+POSTHOOK: query: alter table srcpart_small_n2 add partition (ds1 = "2008-04-08")
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Output: default@srcpart_small_n2
+POSTHOOK: Output: default@srcpart_small_n2@ds1=2008-04-08
+PREHOOK: query: alter table srcpart_small_n2 add partition (ds1 = "2008-04-09")
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Output: default@srcpart_small_n2
+POSTHOOK: query: alter table srcpart_small_n2 add partition (ds1 = "2008-04-09")
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Output: default@srcpart_small_n2
+POSTHOOK: Output: default@srcpart_small_n2@ds1=2008-04-09
+PREHOOK: query: insert overwrite table alltypesorc_int_n0 select cint, cstring1 from alltypesorc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: default@alltypesorc_int_n0
+POSTHOOK: query: insert overwrite table alltypesorc_int_n0 select cint, cstring1 from alltypesorc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@alltypesorc_int_n0
+POSTHOOK: Lineage: alltypesorc_int_n0.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ]
+POSTHOOK: Lineage: alltypesorc_int_n0.cstring SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ]
+PREHOOK: query: insert overwrite table srcpart_date_n6 partition (ds = "2008-04-08" ) select key, value from srcpart where ds = "2008-04-08"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Output: default@srcpart_date_n6@ds=2008-04-08
+POSTHOOK: query: insert overwrite table srcpart_date_n6 partition (ds = "2008-04-08" ) select key, value from srcpart where ds = "2008-04-08"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Output: default@srcpart_date_n6@ds=2008-04-08
+POSTHOOK: Lineage: srcpart_date_n6 PARTITION(ds=2008-04-08).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: srcpart_date_n6 PARTITION(ds=2008-04-08).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert overwrite table srcpart_date_n6 partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+PREHOOK: Output: default@srcpart_date_n6@ds=2008-04-09
+POSTHOOK: query: insert overwrite table srcpart_date_n6 partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+POSTHOOK: Output: default@srcpart_date_n6@ds=2008-04-09
+POSTHOOK: Lineage: srcpart_date_n6 PARTITION(ds=2008-04-09).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: srcpart_date_n6 PARTITION(ds=2008-04-09).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert overwrite table srcpart_small_n2 partition (ds1 = "2008-04-09") select key, value from srcpart where ds = "2008-04-09" limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+PREHOOK: Output: default@srcpart_small_n2@ds1=2008-04-09
+POSTHOOK: query: insert overwrite table srcpart_small_n2 partition (ds1 = "2008-04-09") select key, value from srcpart where ds = "2008-04-09" limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+POSTHOOK: Output: default@srcpart_small_n2@ds1=2008-04-09
+POSTHOOK: Lineage: srcpart_small_n2 PARTITION(ds1=2008-04-09).key1 SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: srcpart_small_n2 PARTITION(ds1=2008-04-09).value1 SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: analyze table alltypesorc_int_n0 compute statistics for columns
+PREHOOK: type: ANALYZE_TABLE
+PREHOOK: Input: default@alltypesorc_int_n0
+PREHOOK: Output: default@alltypesorc_int_n0
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table alltypesorc_int_n0 compute statistics for columns
+POSTHOOK: type: ANALYZE_TABLE
+POSTHOOK: Input: default@alltypesorc_int_n0
+POSTHOOK: Output: default@alltypesorc_int_n0
+#### A masked pattern was here ####
+PREHOOK: query: analyze table srcpart_date_n6 compute statistics for columns
+PREHOOK: type: ANALYZE_TABLE
+PREHOOK: Input: default@srcpart_date_n6
+PREHOOK: Input: default@srcpart_date_n6@ds=2008-04-08
+PREHOOK: Input: default@srcpart_date_n6@ds=2008-04-09
+PREHOOK: Output: default@srcpart_date_n6
+PREHOOK: Output: default@srcpart_date_n6@ds=2008-04-08
+PREHOOK: Output: default@srcpart_date_n6@ds=2008-04-09
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table srcpart_date_n6 compute statistics for columns
+POSTHOOK: type: ANALYZE_TABLE
+POSTHOOK: Input: default@srcpart_date_n6
+POSTHOOK: Input: default@srcpart_date_n6@ds=2008-04-08
+POSTHOOK: Input: default@srcpart_date_n6@ds=2008-04-09
+POSTHOOK: Output: default@srcpart_date_n6
+POSTHOOK: Output: default@srcpart_date_n6@ds=2008-04-08
+POSTHOOK: Output: default@srcpart_date_n6@ds=2008-04-09
+#### A masked pattern was here ####
+PREHOOK: query: analyze table srcpart_small_n2 compute statistics for columns
+PREHOOK: type: ANALYZE_TABLE
+PREHOOK: Input: default@srcpart_small_n2
+PREHOOK: Input: default@srcpart_small_n2@ds1=2008-04-08
+PREHOOK: Input: default@srcpart_small_n2@ds1=2008-04-09
+PREHOOK: Output: default@srcpart_small_n2
+PREHOOK: Output: default@srcpart_small_n2@ds1=2008-04-08
+PREHOOK: Output: default@srcpart_small_n2@ds1=2008-04-09
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table srcpart_small_n2 compute statistics for columns
+POSTHOOK: type: ANALYZE_TABLE
+POSTHOOK: Input: default@srcpart_small_n2
+POSTHOOK: Input: default@srcpart_small_n2@ds1=2008-04-08
+POSTHOOK: Input: default@srcpart_small_n2@ds1=2008-04-09
+POSTHOOK: Output: default@srcpart_small_n2
+POSTHOOK: Output: default@srcpart_small_n2@ds1=2008-04-08
+POSTHOOK: Output: default@srcpart_small_n2@ds1=2008-04-09
+#### A masked pattern was here ####
+PREHOOK: query: EXPLAIN
+SELECT count(*)
+ FROM (SELECT * FROM srcpart_date_n6 WHERE ds = "2008-04-09") `srcpart_date_n6`
+ JOIN (SELECT * FROM srcpart_small_n2 WHERE ds1 = "2008-04-08") `srcpart_small_n2`
+ ON (srcpart_date_n6.key = srcpart_small_n2.key1)
+ JOIN (
+ SELECT *
+ FROM (SELECT * FROM alltypesorc_int_n0 WHERE cint = 10) `alltypesorc_int_n0`
+ JOIN (SELECT * FROM srcpart_small_n2) `srcpart_small_n2`
+ ON (alltypesorc_int_n0.cstring = srcpart_small_n2.key1)) b
+ ON (srcpart_small_n2.key1 = b.cstring)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT count(*)
+ FROM (SELECT * FROM srcpart_date_n6 WHERE ds = "2008-04-09") `srcpart_date_n6`
+ JOIN (SELECT * FROM srcpart_small_n2 WHERE ds1 = "2008-04-08") `srcpart_small_n2`
+ ON (srcpart_date_n6.key = srcpart_small_n2.key1)
+ JOIN (
+ SELECT *
+ FROM (SELECT * FROM alltypesorc_int_n0 WHERE cint = 10) `alltypesorc_int_n0`
+ JOIN (SELECT * FROM srcpart_small_n2) `srcpart_small_n2`
+ ON (alltypesorc_int_n0.cstring = srcpart_small_n2.key1)) b
+ ON (srcpart_small_n2.key1 = b.cstring)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Reducer 6 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE)
+ Map 11 <- Reducer 10 (BROADCAST_EDGE), Reducer 6 (BROADCAST_EDGE)
+ Reducer 10 <- Map 7 (CUSTOM_SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 8 (ONE_TO_ONE_EDGE)
+ Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
+ Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE)
+ Reducer 8 <- Map 11 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
+ Reducer 9 <- Reducer 8 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: srcpart_date_n6
+ filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_20_srcpart_small_n2_key1_min) AND DynamicValue(RS_20_srcpart_small_n2_key1_max) and in_bloom_filter(key, DynamicValue(RS_20_srcpart_small_n2_key1_bloom_filter))) and (key BETWEEN DynamicValue(RS_25_alltypesorc_int_n0_cstring_min) AND DynamicValue(RS_25_alltypesorc_int_n0_cstring_max) and in_bloom_filter(key, DynamicValue(RS_25_alltypesorc_int_n0_cstring_bloom_filter)))) (type: boolean)
+ Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: ((key BETWEEN DynamicValue(RS_20_srcpart_small_n2_key1_min) AND DynamicValue(RS_20_srcpart_small_n2_key1_max) and in_bloom_filter(key, DynamicValue(RS_20_srcpart_small_n2_key1_bloom_filter))) and (key BETWEEN DynamicValue(RS_25_alltypesorc_int_n0_cstring_min) AND DynamicValue(RS_25_alltypesorc_int_n0_cstring_max) and in_bloom_filter(key, DynamicValue(RS_25_alltypesorc_int_n0_cstring_bloom_filter))) and key is not null) (type: boolean)
+ Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 11
+ Map Operator Tree:
+ TableScan
+ alias: srcpart_small_n2
+ filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(RS_12_alltypesorc_int_n0_cstring_min) AND DynamicValue(RS_12_alltypesorc_int_n0_cstring_max) and in_bloom_filter(key1, DynamicValue(RS_12_alltypesorc_int_n0_cstring_bloom_filter))) and (key1 BETWEEN DynamicValue(RS_20_srcpart_small_n2_key1_min) AND DynamicValue(RS_20_srcpart_small_n2_key1_max) and in_bloom_filter(key1, DynamicValue(RS_20_srcpart_small_n2_key1_bloom_filter)))) (type: boolean)
+ Statistics: Num rows: 20 Data size: 1740 Basic stats: PARTIAL Column stats: PARTIAL
+ Filter Operator
+ predicate: ((key1 BETWEEN DynamicValue(RS_12_alltypesorc_int_n0_cstring_min) AND DynamicValue(RS_12_alltypesorc_int_n0_cstring_max) and in_bloom_filter(key1, DynamicValue(RS_12_alltypesorc_int_n0_cstring_bloom_filter))) and (key1 BETWEEN DynamicValue(RS_20_srcpart_small_n2_key1_min) AND DynamicValue(RS_20_srcpart_small_n2_key1_max) and in_bloom_filter(key1, DynamicValue(RS_20_srcpart_small_n2_key1_bloom_filter))) and key1 is not null) (type: boolean)
+ Statistics: Num rows: 20 Data size: 1740 Basic stats: PARTIAL Column stats: PARTIAL
+ Select Operator
+ expressions: key1 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 20 Data size: 1740 Basic stats: PARTIAL Column stats: PARTIAL
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 20 Data size: 1740 Basic stats: PARTIAL Column stats: PARTIAL
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: srcpart_small_n2
+ filterExpr: key1 is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 87 Basic stats: PARTIAL Column stats: COMPLETE
+ Filter Operator
+ predicate: key1 is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 87 Basic stats: PARTIAL Column stats: COMPLETE
+ Select Operator
+ expressions: key1 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 87 Basic stats: PARTIAL Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 87 Basic stats: PARTIAL Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 87 Basic stats: PARTIAL Column stats: COMPLETE
+ Group By Operator
+ aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 639 Basic stats: PARTIAL Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 639 Basic stats: PARTIAL Column stats: COMPLETE
+ value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: alltypesorc_int_n0
+ filterExpr: ((cint = 10) and cstring is not null) (type: boolean)
+ Statistics: Num rows: 12288 Data size: 899146 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: ((cint = 10) and cstring is not null) (type: boolean)
+ Statistics: Num rows: 1 Data size: 98 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: cstring (type: string)
+ outputColumnNames: _col1
+ Statistics: Num rows: 1 Data size: 98 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 1 Data size: 98 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col1 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 10
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1)
+ mode: final
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col3
+ Statistics: Num rows: 1100 Data size: 95700 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col3 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col3 (type: string)
+ Statistics: Num rows: 1100 Data size: 95700 Basic stats: PARTIAL Column stats: NONE
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col3 (type: string)
+ 1 _col1 (type: string)
+ Statistics: Num rows: 1210 Data size: 105270 Basic stats: PARTIAL Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reducer 4
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 6
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1)
+ mode: final
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 639 Basic stats: PARTIAL Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 639 Basic stats: PARTIAL Column stats: COMPLETE
+ value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 639 Basic stats: PARTIAL Column stats: COMPLETE
+ value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
+ Reducer 8
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col1
+ Statistics: Num rows: 22 Data size: 1914 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 22 Data size: 1914 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 22 Data size: 1914 Basic stats: PARTIAL Column stats: NONE
+ Group By Operator
+ aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=22)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 552 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 552 Basic stats: PARTIAL Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
+ Reducer 9
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=22)
+ mode: final
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 552 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 552 Basic stats: PARTIAL Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: drop table srcpart_date_n6
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@srcpart_date_n6
+PREHOOK: Output: default@srcpart_date_n6
+POSTHOOK: query: drop table srcpart_date_n6
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@srcpart_date_n6
+POSTHOOK: Output: default@srcpart_date_n6
+PREHOOK: query: drop table srcpart_small_n2
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@srcpart_small_n2
+PREHOOK: Output: default@srcpart_small_n2
+POSTHOOK: query: drop table srcpart_small_n2
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@srcpart_small_n2
+POSTHOOK: Output: default@srcpart_small_n2
+PREHOOK: query: drop table alltypesorc_int_n0
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@alltypesorc_int_n0
+PREHOOK: Output: default@alltypesorc_int_n0
+POSTHOOK: query: drop table alltypesorc_int_n0
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@alltypesorc_int_n0
+POSTHOOK: Output: default@alltypesorc_int_n0
http://git-wip-us.apache.org/repos/asf/hive/blob/18beea6a/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out b/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out
index 36aff41..2c38d8c 100644
--- a/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out
+++ b/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out
@@ -424,7 +424,7 @@ POSTHOOK: type: ANALYZE_TABLE
POSTHOOK: Input: default@l3_monthly_dw_dimplan
POSTHOOK: Output: default@l3_monthly_dw_dimplan
#### A masked pattern was here ####
-Warning: Shuffle Join MERGEJOIN[47][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[48][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product
PREHOOK: query: EXPLAIN EXTENDED
SELECT DW.PROJECT_OBJECT_ID, S1.PLAN_KEY as PLAN_KEY, S2.PROJECT_KEY AS PROJECT_KEY
FROM l3_clarity__L3_SNAP_NUMBER_2018022300104 snap inner join
@@ -889,7 +889,7 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Shuffle Join MERGEJOIN[47][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[48][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product
PREHOOK: query: SELECT DW.PROJECT_OBJECT_ID, S1.PLAN_KEY as PLAN_KEY, S2.PROJECT_KEY AS PROJECT_KEY
FROM l3_clarity__L3_SNAP_NUMBER_2018022300104 snap inner join
l3_clarity__L3_MONTHLY_DW_FACTPLAN_DW_STG_2018022300104_1 DW on 1=1
@@ -931,7 +931,7 @@ POSTHOOK: Input: default@l3_monthly_dw_dimplan
7147200 NULL 27114
7147200 NULL 27114
7147200 NULL 27114
-Warning: Shuffle Join MERGEJOIN[47][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[48][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product
PREHOOK: query: EXPLAIN EXTENDED
SELECT DW.PROJECT_OBJECT_ID, S1.PLAN_KEY as PLAN_KEY, S2.PROJECT_KEY AS PROJECT_KEY
FROM l3_clarity__L3_SNAP_NUMBER_2018022300104 snap inner join
@@ -1397,7 +1397,7 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Shuffle Join MERGEJOIN[47][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[48][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product
PREHOOK: query: SELECT DW.PROJECT_OBJECT_ID, S1.PLAN_KEY as PLAN_KEY, S2.PROJECT_KEY AS PROJECT_KEY
FROM l3_clarity__L3_SNAP_NUMBER_2018022300104 snap inner join
l3_clarity__L3_MONTHLY_DW_FACTPLAN_DW_STG_2018022300104_1 DW on 1=1
http://git-wip-us.apache.org/repos/asf/hive/blob/18beea6a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_3.q.out b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_3.q.out
index 3b53ec7..6133ae1 100644
--- a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_3.q.out
+++ b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_3.q.out
@@ -365,7 +365,7 @@ STAGE PLANS:
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
Spark Partition Pruning Sink Operator
- Target Columns: [Map 4 -> [part_col:int (part_col)]]
+ Target Columns: [Map 1 -> [part_col:int (part_col)], Map 4 -> [part_col:int (part_col)]]
Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE
Local Work:
Map Reduce Local Work