You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jd...@apache.org on 2017/05/11 04:38:08 UTC
hive git commit: HIVE-16610: Semijoin Hint : Should be able to handle
more than one hint per alias (Deepak Jaiswal, reviewed by Jason Dere)
Repository: hive
Updated Branches:
refs/heads/master fd6f8da68 -> ee91b8ec5
HIVE-16610: Semijoin Hint : Should be able to handle more than one hint per alias (Deepak Jaiswal, reviewed by Jason Dere)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ee91b8ec
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ee91b8ec
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ee91b8ec
Branch: refs/heads/master
Commit: ee91b8ec51795803c4c2817c6c2ced8f933f4b86
Parents: fd6f8da
Author: Jason Dere <jd...@hortonworks.com>
Authored: Wed May 10 21:37:34 2017 -0700
Committer: Jason Dere <jd...@hortonworks.com>
Committed: Wed May 10 21:37:34 2017 -0700
----------------------------------------------------------------------
.../org/apache/hadoop/hive/conf/HiveConf.java | 2 -
.../DynamicPartitionPruningOptimization.java | 129 ++++++++-----
.../hadoop/hive/ql/parse/ParseContext.java | 6 +-
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 88 ++++-----
.../hadoop/hive/ql/parse/SemiJoinHint.java | 8 +-
.../hadoop/hive/ql/plan/ExprNodeDescUtils.java | 7 +-
.../test/queries/clientpositive/semijoin_hint.q | 27 ++-
.../llap/dynamic_semijoin_reduction.q.out | 4 +-
.../llap/dynamic_semijoin_reduction_2.q.out | 4 +-
.../llap/dynamic_semijoin_user_level.q.out | 2 +-
.../clientpositive/llap/semijoin_hint.q.out | 185 ++++++-------------
11 files changed, 199 insertions(+), 263 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/ee91b8ec/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 99c26ce..73e0290 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2907,8 +2907,6 @@ public class HiveConf extends Configuration {
"Big table for runtime filteting should be of atleast this size"),
TEZ_DYNAMIC_SEMIJOIN_REDUCTION_THRESHOLD("hive.tez.dynamic.semijoin.reduction.threshold", (float) 0.50,
"Only perform semijoin optimization if the estimated benefit at or above this fraction of the target table"),
- TEZ_DYNAMIC_SEMIJOIN_REDUCTION_HINT_ONLY("hive.tez.dynamic.semijoin.reduction.hint.only", false,
- "When true, only enforce semijoin when a hint is provided"),
TEZ_SMB_NUMBER_WAVES(
"hive.tez.smb.number.waves",
(float) 0.5,
http://git-wip-us.apache.org/repos/asf/hive/blob/ee91b8ec/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
index f56b9cb..8a62982 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java
@@ -27,6 +27,7 @@ import java.util.List;
import java.util.Map;
import java.util.Stack;
+import com.google.common.base.Preconditions;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.ql.exec.*;
@@ -212,36 +213,38 @@ public class DynamicPartitionPruningOptimization implements NodeProcessor {
if (semiJoin && ts.getConf().getFilterExpr() != null) {
LOG.debug("Initiate semijoin reduction for " + column + " ("
+ ts.getConf().getFilterExpr().getExprString());
- // Get the table name from which the min-max values and bloom filter will come.
- Operator<?> op = ctx.generator;
-
- while (!(op == null || op instanceof TableScanOperator)) {
- op = op.getParentOperators().get(0);
- }
- String tableAlias = (op == null ? "" : ((TableScanOperator) op).getConf().getAlias());
StringBuilder internalColNameBuilder = new StringBuilder();
StringBuilder colNameBuilder = new StringBuilder();
- if (getColumnName(ctx, internalColNameBuilder, colNameBuilder)) {
+
+ // Apply best effort to fetch the correct table alias. If not
+ // found, fallback to old logic.
+ StringBuilder tabAliasBuilder = new StringBuilder();
+ if (getColumnInfo(ctx, internalColNameBuilder, colNameBuilder, tabAliasBuilder)) {
String colName = colNameBuilder.toString();
+ String tableAlias;
+ if (tabAliasBuilder.length() > 0) {
+ tableAlias = tabAliasBuilder.toString();
+ } else {
+ Operator<?> op = ctx.generator;
+
+ while (!(op == null || op instanceof TableScanOperator)) {
+ op = op.getParentOperators().get(0);
+ }
+ tableAlias = (op == null ? "" : ((TableScanOperator) op).
+ getConf().getAlias());
+ }
+
+ // Use the tableAlias to generate keyBaseAlias
keyBaseAlias = ctx.generator.getOperatorId() + "_" + tableAlias
+ "_" + colName;
- Map<String, SemiJoinHint> hints = parseContext.getSemiJoinHints();
+ Map<String, List<SemiJoinHint>> hints = parseContext.getSemiJoinHints();
if (hints != null) {
- if (hints.size() > 0) {
- SemiJoinHint sjHint = hints.get(tableAlias);
- if (sjHint != null && sjHint.getColName() != null &&
- !colName.equals(sjHint.getColName())) {
- LOG.debug("Removed hint due to column mismatch + Col = " + colName + " hint column = " + sjHint.getColName());
- sjHint = null;
- }
- semiJoinAttempted = generateSemiJoinOperatorPlan(
- ctx, parseContext, ts, keyBaseAlias,
- internalColNameBuilder.toString(), colName, sjHint);
- if (!semiJoinAttempted && sjHint != null) {
- throw new SemanticException("The user hint to enforce semijoin failed required conditions");
- }
- }
+ // Create semijoin optimizations ONLY for hinted columns
+ semiJoinAttempted = processSemiJoinHints(
+ parseContext, ctx, hints, tableAlias,
+ internalColNameBuilder.toString(), colName, ts,
+ keyBaseAlias);
} else {
// fallback to regular logic
semiJoinAttempted = generateSemiJoinOperatorPlan(
@@ -297,16 +300,30 @@ public class DynamicPartitionPruningOptimization implements NodeProcessor {
}
// Given a key, find the corresponding column name.
- private boolean getColumnName(DynamicListContext ctx, StringBuilder internalColName,
- StringBuilder colName) {
+ private boolean getColumnInfo(DynamicListContext ctx, StringBuilder internalColName,
+ StringBuilder colName, StringBuilder tabAlias) {
ExprNodeDesc exprNodeDesc = ctx.generator.getConf().getKeyCols().get(ctx.desc.getKeyIndex());
ExprNodeColumnDesc colExpr = ExprNodeDescUtils.getColumnExpr(exprNodeDesc);
if (colExpr == null) {
return false;
}
-
internalColName.append(colExpr.getColumn());
+
+ // fetch table ablias
+ ExprNodeDescUtils.ColumnOrigin columnOrigin =
+ ExprNodeDescUtils.findColumnOrigin(exprNodeDesc, ctx.generator);
+
+ if (columnOrigin != null) {
+ // get both tableAlias and column name from columnOrigin
+ assert columnOrigin.op instanceof TableScanOperator;
+ TableScanOperator ts = (TableScanOperator) columnOrigin.op;
+ tabAlias.append(ts.getConf().getAlias());
+ colName.append(
+ ExprNodeDescUtils.getColumnExpr(columnOrigin.col).getColumn());
+ return true;
+ }
+
Operator<? extends OperatorDesc> parentOfRS = ctx.generator.getParentOperators().get(0);
if (!(parentOfRS instanceof SelectOperator)) {
colName.append(internalColName.toString());
@@ -324,6 +341,37 @@ public class DynamicPartitionPruningOptimization implements NodeProcessor {
return true;
}
+ // Handle hint based semijoin
+ private boolean processSemiJoinHints(
+ ParseContext pCtx, DynamicListContext ctx,
+ Map<String, List<SemiJoinHint>> hints, String tableAlias,
+ String internalColName, String colName, TableScanOperator ts,
+ String keyBaseAlias) throws SemanticException {
+ if (hints.size() == 0) {
+ return false;
+ }
+
+ List<SemiJoinHint> hintList = hints.get(tableAlias);
+ if (hintList == null) {
+ return false;
+ }
+
+ // Iterate through the list
+ for (SemiJoinHint sjHint : hintList) {
+ if (!colName.equals(sjHint.getColName())) {
+ continue;
+ }
+ // match!
+ LOG.info("Creating runtime filter due to user hint: column = " + colName);
+ if (generateSemiJoinOperatorPlan(ctx, pCtx, ts, keyBaseAlias,
+ internalColName, colName, sjHint)) {
+ return true;
+ }
+ throw new SemanticException("The user hint to enforce semijoin failed required conditions");
+ }
+ return false;
+ }
+
private void replaceExprNode(DynamicListContext ctx, FilterDesc desc, ExprNodeDesc node) {
if (ctx.grandParent == null) {
desc.setPredicate(node);
@@ -442,12 +490,6 @@ public class DynamicPartitionPruningOptimization implements NodeProcessor {
TableScanOperator ts, String keyBaseAlias, String internalColName,
String colName, SemiJoinHint sjHint) throws SemanticException {
- // If semijoin hint is enforced, make sure hint is provided
- if (parseContext.getConf().getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION_HINT_ONLY)
- && sjHint == null) {
- return false;
- }
-
// we will put a fork in the plan at the source of the reduce sink
Operator<? extends OperatorDesc> parentOfRS = ctx.generator.getParentOperators().get(0);
@@ -457,23 +499,18 @@ public class DynamicPartitionPruningOptimization implements NodeProcessor {
assert colName != null;
// Fetch the TableScan Operator.
Operator<?> op = parentOfRS;
- while (!(op == null || op instanceof TableScanOperator)) {
+ while (!(op == null || op instanceof TableScanOperator ||
+ op instanceof ReduceSinkOperator)) {
op = op.getParentOperators().get(0);
}
- assert op != null;
-
- Table table = ((TableScanOperator) op).getConf().getTableMetadata();
- if (table.isPartitionKey(colName)) {
- // The column is partition column, skip the optimization.
- return false;
- }
+ Preconditions.checkNotNull(op);
- // If hint is provided and only hinted semijoin optimizations should be
- // created, then skip other columns on the table
- if (parseContext.getConf().getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION_HINT_ONLY)
- && sjHint.getColName() != null &&
- !internalColName.equals(sjHint.getColName())) {
- return false;
+ if (op instanceof TableScanOperator) {
+ Table table = ((TableScanOperator) op).getConf().getTableMetadata();
+ if (table.isPartitionKey(colName)) {
+ // The column is partition column, skip the optimization.
+ return false;
+ }
}
// Check if there already exists a semijoin branch
http://git-wip-us.apache.org/repos/asf/hive/blob/ee91b8ec/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
index 6de4bcd..565fbef 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
@@ -123,7 +123,7 @@ public class ParseContext {
private Map<ExprNodeDesc, GroupByOperator> colExprToGBMap =
new HashMap<>();
- private Map<String, SemiJoinHint> semiJoinHints;
+ private Map<String, List<SemiJoinHint>> semiJoinHints;
public ParseContext() {
}
@@ -674,11 +674,11 @@ public class ParseContext {
return colExprToGBMap;
}
- public void setSemiJoinHints(Map<String, SemiJoinHint> hints) {
+ public void setSemiJoinHints(Map<String, List<SemiJoinHint>> hints) {
this.semiJoinHints = hints;
}
- public Map<String, SemiJoinHint> getSemiJoinHints() {
+ public Map<String, List<SemiJoinHint>> getSemiJoinHints() {
return semiJoinHints;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/ee91b8ec/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 5115fc8..ddf74f2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -9018,14 +9018,13 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
}
/** Parses semjoin hints in the query and returns the table names mapped to filter size, or -1 if not specified.
- * Hints can be in 3 formats
+ * Hints can be in 2 formats
* 1. TableName, ColumnName, bloom filter entries
- * 2. TableName, bloom filter entries, and
- * 3. TableName, ColumnName
+ * 2. TableName, ColumnName
* */
- private Map<String, SemiJoinHint> parseSemiJoinHint(List<ASTNode> hints) throws SemanticException {
+ private Map<String, List<SemiJoinHint>> parseSemiJoinHint(List<ASTNode> hints) throws SemanticException {
if (hints == null || hints.size() == 0) return null;
- Map<String, SemiJoinHint> result = null;
+ Map<String, List<SemiJoinHint>> result = null;
for (ASTNode hintNode : hints) {
for (Node node : hintNode.getChildren()) {
ASTNode hint = (ASTNode) node;
@@ -9033,8 +9032,6 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
if (result == null) {
result = new HashMap<>();
}
- String alias = null;
- String colName = null;
Tree args = hint.getChild(1);
if (args.getChildCount() == 1) {
String text = args.getChild(0).getText();
@@ -9043,46 +9040,9 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
return result;
}
}
- for (int i = 0; i < args.getChildCount(); i++) {
- // We can have table names, column names or sizes here (or incorrect hint if the user is so inclined).
- String text = args.getChild(i).getText();
- Integer number = null;
- try {
- number = Integer.parseInt(text);
- } catch (NumberFormatException ex) { // Ignore.
- }
- if (number != null) {
- if (alias == null) {
- throw new SemanticException("Invalid semijoin hint - arg " + i + " ("
- + text + ") is a number but the previous one is not an alias");
- }
- if (result.get(alias) != null) {
- // A hint with same alias already present, throw
- throw new SemanticException("A hint with alias " + alias +
- " already present. Please use unique aliases");
- }
- SemiJoinHint sjHint = new SemiJoinHint(alias, colName, number);
- result.put(alias, sjHint);
- alias = null;
- colName = null;
- } else {
- if (alias == null) {
- alias = text;
- } else if (colName == null) {
- colName = text;
- } else {
- // No bloom filter entries provided.
- if (result.get(alias) != null) {
- // A hint with same alias already present, throw
- throw new SemanticException("A hint with alias " + alias +
- " already present. Please use unique aliases");
- }
- SemiJoinHint sjHint = new SemiJoinHint(alias, colName, null);
- result.put(alias, sjHint);
- alias = text;
- colName = null;
- }
- }
+ int curIdx = 0;
+ while(curIdx < args.getChildCount()) {
+ curIdx = parseSingleSemiJoinHint(args, curIdx, result);
}
}
}
@@ -9092,6 +9052,40 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
return result;
}
+ private int parseSingleSemiJoinHint(Tree args, int curIdx, Map<String, List<SemiJoinHint>> result)
+ throws SemanticException {
+ // Check if there are enough entries in the tree to constitute a hint.
+ int numEntriesLeft = args.getChildCount() - curIdx;
+ if (numEntriesLeft < 2) {
+ throw new SemanticException("User provided only 1 entry for the hint with alias "
+ + args.getChild(curIdx).getText());
+ }
+
+ String alias = args.getChild(curIdx++).getText();
+ // validate
+ if (StringUtils.isNumeric(alias)) {
+ throw new SemanticException("User provided bloom filter entries when alias is expected");
+ }
+
+ String colName = args.getChild(curIdx++).getText();
+ // validate
+ if (StringUtils.isNumeric(colName)) {
+ throw new SemanticException("User provided bloom filter entries when column name is expected");
+ }
+
+ Integer number = null;
+ if (numEntriesLeft > 2) {
+ // Check if there exists bloom filter size entry
+ try {
+ number = Integer.parseInt(args.getChild(curIdx).getText());
+ curIdx++;
+ } catch (NumberFormatException e) { // Ignore
+ }
+ }
+ result.computeIfAbsent(alias, value -> new ArrayList<>()).add(new SemiJoinHint(colName, number));
+ return curIdx;
+ }
+
/**
* Merges node to target
*/
http://git-wip-us.apache.org/repos/asf/hive/blob/ee91b8ec/ql/src/java/org/apache/hadoop/hive/ql/parse/SemiJoinHint.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemiJoinHint.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemiJoinHint.java
index 1f24e23..f7fd306 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemiJoinHint.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemiJoinHint.java
@@ -19,20 +19,14 @@
package org.apache.hadoop.hive.ql.parse;
public class SemiJoinHint {
- private String tabAlias;
private String colName;
private Integer numEntries;
- public SemiJoinHint(String tabAlias, String colName, Integer numEntries) {
- this.tabAlias = tabAlias;
+ public SemiJoinHint(String colName, Integer numEntries) {
this.colName = colName;
this.numEntries = numEntries;
}
- public String getTabAlias() {
- return tabAlias;
- }
-
public String getColName() {
return colName;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/ee91b8ec/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
index 8701b2d..df3de03 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
@@ -854,14 +854,13 @@ public class ExprNodeDescUtils {
}
private static ExprNodeDesc findParentExpr(ExprNodeColumnDesc col, Operator<?> op) {
- if (op instanceof ReduceSinkOperator) {
- return col;
- }
-
ExprNodeDesc parentExpr = col;
Map<String, ExprNodeDesc> mapping = op.getColumnExprMap();
if (mapping != null) {
parentExpr = mapping.get(col.getColumn());
+ if (parentExpr == null && op instanceof ReduceSinkOperator) {
+ return col;
+ }
}
return parentExpr;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/ee91b8ec/ql/src/test/queries/clientpositive/semijoin_hint.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/semijoin_hint.q b/ql/src/test/queries/clientpositive/semijoin_hint.q
index 5fbc273..71fa445 100644
--- a/ql/src/test/queries/clientpositive/semijoin_hint.q
+++ b/ql/src/test/queries/clientpositive/semijoin_hint.q
@@ -42,7 +42,7 @@ set hive.cbo.returnpath.hiveop=true;
-- disabling this test case for returnpath true as the aliases in case of union are mangled due to which hints are not excercised.
--explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1)
-- union all
--- select /*+ semi(v, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1);
+-- select /*+ semi(v, key1, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1);
-- Query which creates semijoin
explain select count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1);
@@ -50,21 +50,18 @@ explain select count(*) from srcpart_date k join srcpart_small v on (k.str = v.k
explain select /*+ semi(None)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1);
EXPLAIN select /*+ semi(srcpart_date, str, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring);
-EXPLAIN select /*+ semi(i, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring);
+EXPLAIN select /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring);
explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1);
-set hive.tez.dynamic.semijoin.reduction.hint.only=true;
--- This should NOT create a semijoin as the join is on different column
+-- This should NOT create a semijoin
explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1);
-set hive.tez.dynamic.semijoin.reduction.hint.only=false;
-
set hive.cbo.returnpath.hiveop=false;
explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1)
union all
- select /*+ semi(v, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1);
+ select /*+ semi(v, key1, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1);
-- Query which creates semijoin
explain select count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1);
@@ -72,21 +69,20 @@ explain select count(*) from srcpart_date k join srcpart_small v on (k.str = v.k
explain select /*+ semi(None)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1);
EXPLAIN select /*+ semi(srcpart_date, str, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring);
-EXPLAIN select /*+ semi(i, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring);
+EXPLAIN select /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring);
explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1);
-set hive.tez.dynamic.semijoin.reduction.hint.only=true;
--- This should NOT create a semijoin as the join is on different column
+-- This should NOT create a semijoin
explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1);
-set hive.tez.dynamic.semijoin.reduction.hint.only=false;
+
set hive.cbo.enable=false;
explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1)
union all
- select /*+ semi(v, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1);
+ select /*+ semi(v, key1, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1);
-- Query which creates semijoin
explain select count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1);
@@ -94,11 +90,10 @@ explain select count(*) from srcpart_date k join srcpart_small v on (k.str = v.k
explain select /*+ semi(None)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1);
EXPLAIN select /*+ semi(srcpart_date, str, 5000)*/ count(*) from srcpart_date join srcpart_small v on (srcpart_date.str = v.key1) join alltypesorc_int i on (srcpart_date.value = i.cstring);
-EXPLAIN select /*+ semi(i, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring);
+EXPLAIN select /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring);
explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1);
-set hive.tez.dynamic.semijoin.reduction.hint.only=true;
--- This should NOT create a semijoin as the join is on different column
+-- This should NOT create a semijoin
explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small v on (k.value = v.key1);
-set hive.tez.dynamic.semijoin.reduction.hint.only=false;
+
http://git-wip-us.apache.org/repos/asf/hive/blob/ee91b8ec/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out
index e3ffcfa..35822f4 100644
--- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out
+++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out
@@ -1402,10 +1402,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: alltypesorc_int
- filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_12_srcpart_date__col1_min) AND DynamicValue(RS_12_srcpart_date__col1_max) and in_bloom_filter(cstring, DynamicValue(RS_12_srcpart_date__col1_bloom_filter)))) (type: boolean)
+ filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_12_srcpart_date_value_min) AND DynamicValue(RS_12_srcpart_date_value_max) and in_bloom_filter(cstring, DynamicValue(RS_12_srcpart_date_value_bloom_filter)))) (type: boolean)
Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_12_srcpart_date__col1_min) AND DynamicValue(RS_12_srcpart_date__col1_max) and in_bloom_filter(cstring, DynamicValue(RS_12_srcpart_date__col1_bloom_filter)))) (type: boolean)
+ predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_12_srcpart_date_value_min) AND DynamicValue(RS_12_srcpart_date_value_max) and in_bloom_filter(cstring, DynamicValue(RS_12_srcpart_date_value_bloom_filter)))) (type: boolean)
Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: cstring (type: string)
http://git-wip-us.apache.org/repos/asf/hive/blob/ee91b8ec/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out
index 650dc9f..1da1121 100644
--- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out
+++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out
@@ -107,10 +107,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: tt2
- filterExpr: (timestamp_col_18 is not null and decimal1911_col_16 is not null and (timestamp_col_18 BETWEEN DynamicValue(RS_23_t1__col3_min) AND DynamicValue(RS_23_t1__col3_max) and in_bloom_filter(timestamp_col_18, DynamicValue(RS_23_t1__col3_bloom_filter)))) (type: boolean)
+ filterExpr: (timestamp_col_18 is not null and decimal1911_col_16 is not null and (timestamp_col_18 BETWEEN DynamicValue(RS_23_t1_timestamp_col_9_min) AND DynamicValue(RS_23_t1_timestamp_col_9_max) and in_bloom_filter(timestamp_col_18, DynamicValue(RS_23_t1_timestamp_col_9_bloom_filter)))) (type: boolean)
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Filter Operator
- predicate: (timestamp_col_18 is not null and decimal1911_col_16 is not null and (timestamp_col_18 BETWEEN DynamicValue(RS_23_t1__col3_min) AND DynamicValue(RS_23_t1__col3_max) and in_bloom_filter(timestamp_col_18, DynamicValue(RS_23_t1__col3_bloom_filter)))) (type: boolean)
+ predicate: (timestamp_col_18 is not null and decimal1911_col_16 is not null and (timestamp_col_18 BETWEEN DynamicValue(RS_23_t1_timestamp_col_9_min) AND DynamicValue(RS_23_t1_timestamp_col_9_max) and in_bloom_filter(timestamp_col_18, DynamicValue(RS_23_t1_timestamp_col_9_bloom_filter)))) (type: boolean)
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: decimal1911_col_16 (type: decimal(19,11)), timestamp_col_18 (type: timestamp)
http://git-wip-us.apache.org/repos/asf/hive/blob/ee91b8ec/ql/src/test/results/clientpositive/llap/dynamic_semijoin_user_level.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_user_level.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_user_level.q.out
index 0098b89..18659b3 100644
--- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_user_level.q.out
+++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_user_level.q.out
@@ -761,7 +761,7 @@ Stage-0
Select Operator [SEL_8] (rows=9174 width=70)
Output:["_col0"]
Filter Operator [FIL_28] (rows=9174 width=70)
- predicate:(cstring is not null and (cstring BETWEEN DynamicValue(RS_12_srcpart_date__col1_min) AND DynamicValue(RS_12_srcpart_date__col1_max) and in_bloom_filter(cstring, DynamicValue(RS_12_srcpart_date__col1_bloom_filter))))
+ predicate:(cstring is not null and (cstring BETWEEN DynamicValue(RS_12_srcpart_date_value_min) AND DynamicValue(RS_12_srcpart_date_value_max) and in_bloom_filter(cstring, DynamicValue(RS_12_srcpart_date_value_bloom_filter))))
TableScan [TS_6] (rows=12288 width=70)
default@alltypesorc_int,alltypesorc_int,Tbl:COMPLETE,Col:COMPLETE,Output:["cstring"]
<-Reducer 5 [BROADCAST_EDGE] llap
http://git-wip-us.apache.org/repos/asf/hive/blob/ee91b8ec/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out b/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out
index 3ffc235..ae9bf9b 100644
--- a/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out
+++ b/ql/src/test/results/clientpositive/llap/semijoin_hint.q.out
@@ -400,23 +400,21 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Reducer 7 (BROADCAST_EDGE)
- Map 8 <- Reducer 5 (BROADCAST_EDGE)
+ Map 7 <- Reducer 5 (BROADCAST_EDGE)
Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE)
- Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+ Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
- Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: i
- filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_7_srcpart_date_value_min) AND DynamicValue(RS_7_srcpart_date_value_max) and in_bloom_filter(cstring, DynamicValue(RS_7_srcpart_date_value_bloom_filter)))) (type: boolean)
+ filterExpr: cstring is not null (type: boolean)
Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_7_srcpart_date_value_min) AND DynamicValue(RS_7_srcpart_date_value_max) and in_bloom_filter(cstring, DynamicValue(RS_7_srcpart_date_value_bloom_filter)))) (type: boolean)
+ predicate: cstring is not null (type: boolean)
Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: cstring (type: string)
@@ -448,29 +446,16 @@ STAGE PLANS:
Map-reduce partition columns: value (type: string)
Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: str (type: string)
- Select Operator
- expressions: value (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=428)
- mode: hash
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
Execution mode: llap
LLAP IO: all inputs
- Map 8
+ Map 7
Map Operator Tree:
TableScan
alias: v
- filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(RS_9_i_str_min) AND DynamicValue(RS_9_i_str_max) and in_bloom_filter(key1, DynamicValue(RS_9_i_str_bloom_filter)))) (type: boolean)
+ filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(RS_9_srcpart_date_str_min) AND DynamicValue(RS_9_srcpart_date_str_max) and in_bloom_filter(key1, DynamicValue(RS_9_srcpart_date_str_bloom_filter)))) (type: boolean)
Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL
Filter Operator
- predicate: (key1 is not null and (key1 BETWEEN DynamicValue(RS_9_i_str_min) AND DynamicValue(RS_9_i_str_max) and in_bloom_filter(key1, DynamicValue(RS_9_i_str_bloom_filter)))) (type: boolean)
+ predicate: (key1 is not null and (key1 BETWEEN DynamicValue(RS_9_srcpart_date_str_min) AND DynamicValue(RS_9_srcpart_date_str_max) and in_bloom_filter(key1, DynamicValue(RS_9_srcpart_date_str_bloom_filter)))) (type: boolean)
Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: key1 (type: string)
@@ -504,7 +489,7 @@ STAGE PLANS:
outputColumnNames: _col0
Statistics: Num rows: 3281 Data size: 285447 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=410)
+ aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=5000)
mode: hash
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
@@ -552,19 +537,7 @@ STAGE PLANS:
Execution mode: llap
Reduce Operator Tree:
Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=410)
- mode: final
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
- Reducer 7
- Execution mode: llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=428)
+ aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=5000)
mode: final
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
@@ -579,9 +552,9 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: EXPLAIN select /*+ semi(i, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring)
+PREHOOK: query: EXPLAIN select /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring)
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN select /*+ semi(i, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring)
+POSTHOOK: query: EXPLAIN select /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -962,11 +935,11 @@ STAGE PLANS:
PREHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1)
union all
- select /*+ semi(v, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1)
+ select /*+ semi(v, key1, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1)
PREHOOK: type: QUERY
POSTHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1)
union all
- select /*+ semi(v, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1)
+ select /*+ semi(v, key1, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -1430,23 +1403,21 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Reducer 6 (BROADCAST_EDGE)
- Map 5 <- Reducer 8 (BROADCAST_EDGE)
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE)
+ Map 7 <- Reducer 5 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE)
Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
- Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE)
- Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE)
+ Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: i
- filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_10_srcpart_date_value_min) AND DynamicValue(RS_10_srcpart_date_value_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_date_value_bloom_filter)))) (type: boolean)
+ filterExpr: cstring is not null (type: boolean)
Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_10_srcpart_date_value_min) AND DynamicValue(RS_10_srcpart_date_value_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_date_value_bloom_filter)))) (type: boolean)
+ predicate: cstring is not null (type: boolean)
Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: cstring (type: string)
@@ -1459,14 +1430,14 @@ STAGE PLANS:
Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: all inputs
- Map 5
+ Map 6
Map Operator Tree:
TableScan
alias: srcpart_date
- filterExpr: (str is not null and value is not null and (str BETWEEN DynamicValue(RS_13_v_key1_min) AND DynamicValue(RS_13_v_key1_max) and in_bloom_filter(str, DynamicValue(RS_13_v_key1_bloom_filter)))) (type: boolean)
+ filterExpr: (str is not null and value is not null) (type: boolean)
Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: (str is not null and value is not null and (str BETWEEN DynamicValue(RS_13_v_key1_min) AND DynamicValue(RS_13_v_key1_max) and in_bloom_filter(str, DynamicValue(RS_13_v_key1_bloom_filter)))) (type: boolean)
+ predicate: (str is not null and value is not null) (type: boolean)
Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: str (type: string), value (type: string)
@@ -1478,29 +1449,16 @@ STAGE PLANS:
Map-reduce partition columns: _col1 (type: string)
Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: string)
- Select Operator
- expressions: _col1 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=428)
- mode: hash
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
Execution mode: llap
LLAP IO: all inputs
Map 7
Map Operator Tree:
TableScan
alias: v
- filterExpr: key1 is not null (type: boolean)
+ filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(RS_12_srcpart_date_str_min) AND DynamicValue(RS_12_srcpart_date_str_max) and in_bloom_filter(key1, DynamicValue(RS_12_srcpart_date_str_bloom_filter)))) (type: boolean)
Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL
Filter Operator
- predicate: key1 is not null (type: boolean)
+ predicate: (key1 is not null and (key1 BETWEEN DynamicValue(RS_12_srcpart_date_str_min) AND DynamicValue(RS_12_srcpart_date_str_max) and in_bloom_filter(key1, DynamicValue(RS_12_srcpart_date_str_bloom_filter)))) (type: boolean)
Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: key1 (type: string)
@@ -1511,19 +1469,6 @@ STAGE PLANS:
sort order: +
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL
- Select Operator
- expressions: _col0 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL
- Group By Operator
- aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=410)
- mode: hash
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL
- value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
Execution mode: llap
LLAP IO: all inputs
Reducer 2
@@ -1542,6 +1487,19 @@ STAGE PLANS:
sort order: +
Map-reduce partition columns: _col1 (type: string)
Statistics: Num rows: 3281 Data size: 285447 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col1 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 3281 Data size: 285447 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=5000)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
Reducer 3
Execution mode: llap
Reduce Operator Tree:
@@ -1576,11 +1534,11 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Reducer 6
+ Reducer 5
Execution mode: llap
Reduce Operator Tree:
Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=428)
+ aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=5000)
mode: final
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
@@ -1588,18 +1546,6 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
- Reducer 8
- Execution mode: llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=410)
- mode: final
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL
- value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
Stage: Stage-0
Fetch Operator
@@ -1607,9 +1553,9 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: EXPLAIN select /*+ semi(i, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring)
+PREHOOK: query: EXPLAIN select /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring)
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN select /*+ semi(i, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring)
+POSTHOOK: query: EXPLAIN select /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -1984,11 +1930,11 @@ STAGE PLANS:
PREHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1)
union all
- select /*+ semi(v, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1)
+ select /*+ semi(v, key1, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1)
PREHOOK: type: QUERY
POSTHOOK: query: explain select /*+ semi(k, str, 5000)*/ count(*) from srcpart_date k join srcpart_small s on (k.str = s.key1)
union all
- select /*+ semi(v, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1)
+ select /*+ semi(v, key1, 5000)*/ count(*) from srcpart_date d join srcpart_small v on (d.str = v.key1)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -2420,13 +2366,11 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 7 <- Reducer 6 (BROADCAST_EDGE)
- Map 8 <- Reducer 5 (BROADCAST_EDGE)
- Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
- Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+ Map 6 <- Reducer 5 (BROADCAST_EDGE)
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE)
+ Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
- Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
- Reducer 6 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+ Reducer 5 <- Map 1 (CUSTOM_SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -2459,7 +2403,7 @@ STAGE PLANS:
value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
Execution mode: llap
LLAP IO: all inputs
- Map 7
+ Map 6
Map Operator Tree:
TableScan
alias: v
@@ -2475,14 +2419,14 @@ STAGE PLANS:
Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL
Execution mode: llap
LLAP IO: all inputs
- Map 8
+ Map 7
Map Operator Tree:
TableScan
alias: i
- filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_9_srcpart_date__col1_min) AND DynamicValue(RS_9_srcpart_date__col1_max) and in_bloom_filter(cstring, DynamicValue(RS_9_srcpart_date__col1_bloom_filter)))) (type: boolean)
+ filterExpr: cstring is not null (type: boolean)
Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_9_srcpart_date__col1_min) AND DynamicValue(RS_9_srcpart_date__col1_max) and in_bloom_filter(cstring, DynamicValue(RS_9_srcpart_date__col1_bloom_filter)))) (type: boolean)
+ predicate: cstring is not null (type: boolean)
Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: cstring (type: string)
@@ -2507,19 +2451,6 @@ STAGE PLANS:
sort order: +
Map-reduce partition columns: _col1 (type: string)
Statistics: Num rows: 9756 Data size: 887796 Basic stats: COMPLETE Column stats: PARTIAL
- Select Operator
- expressions: _col1 (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 9756 Data size: 887796 Basic stats: COMPLETE Column stats: PARTIAL
- Group By Operator
- aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=428)
- mode: hash
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL
- value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
Reducer 3
Execution mode: llap
Reduce Operator Tree:
@@ -2558,18 +2489,6 @@ STAGE PLANS:
Execution mode: llap
Reduce Operator Tree:
Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=428)
- mode: final
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL
- value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
- Reducer 6
- Execution mode: llap
- Reduce Operator Tree:
- Group By Operator
aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=5000)
mode: final
outputColumnNames: _col0, _col1, _col2
@@ -2585,9 +2504,9 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: EXPLAIN select /*+ semi(i, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring)
+PREHOOK: query: EXPLAIN select /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring)
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN select /*+ semi(i, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring)
+POSTHOOK: query: EXPLAIN select /*+ semi(i, cstring, 3000)*/ count(*) from srcpart_date k join srcpart_small v on (k.str = v.key1) join alltypesorc_int i on (v.key1 = i.cstring)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage