You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2017/01/11 09:47:01 UTC
[5/5] hive git commit: HIVE-15539: Optimize complex multi-insert
queries in Calcite (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
HIVE-15539: Optimize complex multi-insert queries in Calcite (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/886978db
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/886978db
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/886978db
Branch: refs/heads/master
Commit: 886978db5ccb43fd0473b0f0f80643b29482a4b0
Parents: e56b60f
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Wed Jan 4 15:45:29 2017 +0000
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Wed Jan 11 09:41:55 2017 +0000
----------------------------------------------------------------------
.../calcite/CalciteSemanticException.java | 2 +-
.../hadoop/hive/ql/parse/CalcitePlanner.java | 211 ++-
.../hadoop/hive/ql/parse/QBParseInfo.java | 21 +-
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 35 +-
.../queries/clientpositive/multi_insert_gby4.q | 26 +
.../clientpositive/multi_insert_with_join2.q | 29 +-
.../clientpositive/auto_sortmerge_join_13.q.out | 57 +-
.../clientpositive/groupby_sort_1_23.q.out | 2 +-
.../clientpositive/groupby_sort_skew_1_23.q.out | 2 +-
.../llap/auto_smb_mapjoin_14.q.out | 116 +-
.../llap/auto_sortmerge_join_13.q.out | 114 +-
.../clientpositive/llap/explainuser_2.q.out | 1319 +++++++++---------
.../results/clientpositive/llap/lineage3.q.out | 6 +-
.../llap/tez_union_multiinsert.q.out | 218 ++-
.../clientpositive/llap/unionDistinct_1.q.out | 30 +-
.../llap/vector_auto_smb_mapjoin_14.q.out | 78 +-
.../clientpositive/multi_insert_gby4.q.out | 279 ++++
.../clientpositive/multi_insert_union_src.q.out | 4 +-
.../multi_insert_with_join2.q.out | 519 ++++++-
.../spark/auto_smb_mapjoin_14.q.out | 108 +-
.../spark/auto_sortmerge_join_13.q.out | 76 +-
.../spark/groupby_sort_1_23.q.out | 2 +-
.../spark/groupby_sort_skew_1_23.q.out | 2 +-
.../spark/multi_insert_with_join2.q.out | 1006 +++++++++++++
.../results/clientpositive/spark/union17.q.out | 6 +-
.../results/clientpositive/spark/union18.q.out | 6 +-
.../results/clientpositive/spark/union19.q.out | 6 +-
.../results/clientpositive/spark/union31.q.out | 12 +-
.../clientpositive/spark/union_remove_6.q.out | 4 +-
.../spark/union_remove_6_subq.q.out | 4 +-
.../clientpositive/tez/explainanalyze_2.q.out | 972 ++++++-------
.../test/results/clientpositive/union17.q.out | 6 +-
.../test/results/clientpositive/union18.q.out | 6 +-
.../test/results/clientpositive/union19.q.out | 6 +-
.../test/results/clientpositive/union31.q.out | 12 +-
.../results/clientpositive/union_remove_6.q.out | 8 +-
.../clientpositive/union_remove_6_subq.q.out | 8 +-
37 files changed, 3659 insertions(+), 1659 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/886978db/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSemanticException.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSemanticException.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSemanticException.java
index 0038f73..5b2c9c0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSemanticException.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSemanticException.java
@@ -32,7 +32,7 @@ public class CalciteSemanticException extends SemanticException {
public enum UnsupportedFeature {
Distinct_without_an_aggreggation, Duplicates_in_RR, Filter_expression_with_non_boolean_return_type,
Having_clause_without_any_groupby, Hint, Invalid_column_reference, Invalid_decimal,
- Less_than_equal_greater_than, Multi_insert, Others, Same_name_in_multiple_expressions,
+ Less_than_equal_greater_than, Others, Same_name_in_multiple_expressions,
Schema_less_table, Select_alias_in_having_clause, Select_transform, Subquery,
Table_sample_clauses, UDTF, Union_type, Unique_join
};
http://git-wip-us.apache.org/repos/asf/hive/blob/886978db/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index cc357c5..9f1b9d5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -23,10 +23,11 @@ import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.UndeclaredThrowableException;
import java.math.BigDecimal;
import java.util.AbstractMap.SimpleEntry;
-import java.util.ArrayList;
import java.util.ArrayDeque;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
+import java.util.Collection;
import java.util.Collections;
import java.util.Deque;
import java.util.EnumSet;
@@ -38,9 +39,11 @@ import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import org.antlr.runtime.ClassicToken;
+import org.antlr.runtime.CommonToken;
import org.antlr.runtime.tree.TreeVisitor;
import org.antlr.runtime.tree.TreeVisitorAction;
import org.apache.calcite.adapter.druid.DruidQuery;
@@ -138,7 +141,6 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveDefaultRelMetadataProvider;
import org.apache.hadoop.hive.ql.optimizer.calcite.HivePlannerContext;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
-import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelDecorrelator;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRexExecutorImpl;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl;
import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
@@ -186,6 +188,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectOverIntersec
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectSortTransposeRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReduceExpressionsRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReduceExpressionsWithStatsRule;
+import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelDecorrelator;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRulesRegistry;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSemiJoinRule;
@@ -241,10 +244,12 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.joda.time.Interval;
import com.google.common.base.Function;
+import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableList.Builder;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
+import com.google.common.collect.Multimap;
public class CalcitePlanner extends SemanticAnalyzer {
@@ -328,9 +333,13 @@ public class CalcitePlanner extends SemanticAnalyzer {
queryForCbo = cboCtx.nodeOfInterest; // nodeOfInterest is the query
}
runCBO = canCBOHandleAst(queryForCbo, getQB(), cboCtx);
- profilesCBO = obtainCBOProfiles(queryProperties);
+ if (queryProperties.hasMultiDestQuery()) {
+ handleMultiDestQuery(ast, cboCtx);
+ }
if (runCBO) {
+ profilesCBO = obtainCBOProfiles(queryProperties);
+
disableJoinMerge = true;
boolean reAnalyzeAST = false;
final boolean materializedView = getQB().isMaterializedView();
@@ -454,6 +463,167 @@ public class CalcitePlanner extends SemanticAnalyzer {
return sinkOp;
}
+ /*
+ * Tries to optimize FROM clause of multi-insert. No attempt to optimize insert clauses of the query.
+ * Returns true if rewriting is successful, false otherwise.
+ */
+ private void handleMultiDestQuery(ASTNode ast, PreCboCtx cboCtx) throws SemanticException {
+ // Not supported by CBO
+ if (!runCBO) {
+ return;
+ }
+ // Currently, we only optimized the query the content of the FROM clause
+ // for multi-insert queries. Thus, nodeOfInterest is the FROM clause
+ if (isJoinToken(cboCtx.nodeOfInterest)) {
+ // Join clause: rewriting is needed
+ ASTNode subq = rewriteASTForMultiInsert(ast, cboCtx.nodeOfInterest);
+ if (subq != null) {
+ // We could rewrite into a subquery
+ cboCtx.nodeOfInterest = (ASTNode) subq.getChild(0);
+ QB newQB = new QB(null, "", false);
+ Phase1Ctx ctx_1 = initPhase1Ctx();
+ doPhase1(cboCtx.nodeOfInterest, newQB, ctx_1, null);
+ setQB(newQB);
+ getMetaData(getQB());
+ } else {
+ runCBO = false;
+ }
+ } else if (cboCtx.nodeOfInterest.getToken().getType() == HiveParser.TOK_SUBQUERY) {
+ // Subquery: no rewriting needed
+ ASTNode subq = cboCtx.nodeOfInterest;
+ // First child is subquery, second child is alias
+ // We set the node of interest and QB to the subquery
+ // We do not need to generate the QB again, but rather we use it directly
+ cboCtx.nodeOfInterest = (ASTNode) subq.getChild(0);
+ String subQAlias = unescapeIdentifier(subq.getChild(1).getText());
+ final QB newQB = getQB().getSubqForAlias(subQAlias).getQB();
+ newQB.getParseInfo().setAlias("");
+ newQB.getParseInfo().setIsSubQ(false);
+ setQB(newQB);
+ } else {
+ // No need to run CBO (table ref or virtual table) or not supported
+ runCBO = false;
+ }
+ }
+
+ private ASTNode rewriteASTForMultiInsert(ASTNode query, ASTNode nodeOfInterest) {
+ // 1. gather references from original query
+ // This is a map from aliases to references.
+ // We keep all references as we will need to modify them after creating
+ // the subquery
+ final Multimap<String, Object> aliasNodes = ArrayListMultimap.create();
+ // To know if we need to bail out
+ final AtomicBoolean notSupported = new AtomicBoolean(false);
+ TreeVisitorAction action = new TreeVisitorAction() {
+ @Override
+ public Object pre(Object t) {
+ if (!notSupported.get()) {
+ if (ParseDriver.adaptor.getType(t) == HiveParser.TOK_ALLCOLREF) {
+ // TODO: this is a limitation of the AST rewriting approach that we will
+ // not be able to overcome till proper integration of full multi-insert
+ // queries with Calcite is implemented.
+ // The current rewriting gather references from insert clauses and then
+ // updates them with the new subquery references. However, if insert
+ // clauses use * or tab.*, we cannot resolve the columns that we are
+ // referring to. Thus, we just bail out and those queries will not be
+ // currently optimized by Calcite.
+ // An example of such query is:
+ // FROM T_A a LEFT JOIN T_B b ON a.id = b.id
+ // INSERT OVERWRITE TABLE join_result_1
+ // SELECT a.*, b.*
+ // INSERT OVERWRITE TABLE join_result_3
+ // SELECT a.*, b.*;
+ notSupported.set(true);
+ } else if (ParseDriver.adaptor.getType(t) == HiveParser.DOT) {
+ Object c = ParseDriver.adaptor.getChild(t, 0);
+ if (c != null && ParseDriver.adaptor.getType(c) == HiveParser.TOK_TABLE_OR_COL) {
+ aliasNodes.put(((ASTNode) t).toStringTree(), t);
+ }
+ } else if (ParseDriver.adaptor.getType(t) == HiveParser.TOK_TABLE_OR_COL) {
+ Object p = ParseDriver.adaptor.getParent(t);
+ if (p == null || ParseDriver.adaptor.getType(p) != HiveParser.DOT) {
+ aliasNodes.put(((ASTNode) t).toStringTree(), t);
+ }
+ }
+ }
+ return t;
+ }
+ @Override
+ public Object post(Object t) {
+ return t;
+ }
+ };
+ TreeVisitor tv = new TreeVisitor(ParseDriver.adaptor);
+ // We will iterate through the children: if it is an INSERT, we will traverse
+ // the subtree to gather the references
+ for (int i = 0; i < query.getChildCount(); i++) {
+ ASTNode child = (ASTNode) query.getChild(i);
+ if (ParseDriver.adaptor.getType(child) != HiveParser.TOK_INSERT) {
+ // If it is not an INSERT, we do not need to anything
+ continue;
+ }
+ tv.visit(child, action);
+ }
+ if (notSupported.get()) {
+ // Bail out
+ return null;
+ }
+ // 2. rewrite into query
+ // TOK_QUERY
+ // TOK_FROM
+ // join
+ // TOK_INSERT
+ // TOK_DESTINATION
+ // TOK_DIR
+ // TOK_TMP_FILE
+ // TOK_SELECT
+ // refs
+ ASTNode from = new ASTNode(new CommonToken(HiveParser.TOK_FROM, "TOK_FROM"));
+ from.addChild((ASTNode) ParseDriver.adaptor.dupTree(nodeOfInterest));
+ ASTNode destination = new ASTNode(new CommonToken(HiveParser.TOK_DESTINATION, "TOK_DESTINATION"));
+ ASTNode dir = new ASTNode(new CommonToken(HiveParser.TOK_DIR, "TOK_DIR"));
+ ASTNode tmpFile = new ASTNode(new CommonToken(HiveParser.TOK_TMP_FILE, "TOK_TMP_FILE"));
+ dir.addChild(tmpFile);
+ destination.addChild(dir);
+ ASTNode select = new ASTNode(new CommonToken(HiveParser.TOK_SELECT, "TOK_SELECT"));
+ int num = 0;
+ for (Collection<Object> selectIdentifier : aliasNodes.asMap().values()) {
+ Iterator<Object> it = selectIdentifier.iterator();
+ ASTNode node = (ASTNode) it.next();
+ // Add select expression
+ ASTNode selectExpr = new ASTNode(new CommonToken(HiveParser.TOK_SELEXPR, "TOK_SELEXPR"));
+ selectExpr.addChild((ASTNode) ParseDriver.adaptor.dupTree(node)); // Identifier
+ String colAlias = "col" + num;
+ selectExpr.addChild(new ASTNode(new CommonToken(HiveParser.Identifier, colAlias))); // Alias
+ select.addChild(selectExpr);
+ // Rewrite all INSERT references (all the node values for this key)
+ ASTNode colExpr = new ASTNode(new CommonToken(HiveParser.TOK_TABLE_OR_COL, "TOK_TABLE_OR_COL"));
+ colExpr.addChild(new ASTNode(new CommonToken(HiveParser.Identifier, colAlias)));
+ replaceASTChild(node, colExpr);
+ while (it.hasNext()) {
+ // Loop to rewrite rest of INSERT references
+ node = (ASTNode) it.next();
+ colExpr = new ASTNode(new CommonToken(HiveParser.TOK_TABLE_OR_COL, "TOK_TABLE_OR_COL"));
+ colExpr.addChild(new ASTNode(new CommonToken(HiveParser.Identifier, colAlias)));
+ replaceASTChild(node, colExpr);
+ }
+ num++;
+ }
+ ASTNode insert = new ASTNode(new CommonToken(HiveParser.TOK_INSERT, "TOK_INSERT"));
+ insert.addChild(destination);
+ insert.addChild(select);
+ ASTNode newQuery = new ASTNode(new CommonToken(HiveParser.TOK_QUERY, "TOK_QUERY"));
+ newQuery.addChild(from);
+ newQuery.addChild(insert);
+ // 3. create subquery
+ ASTNode subq = new ASTNode(new CommonToken(HiveParser.TOK_SUBQUERY, "TOK_SUBQUERY"));
+ subq.addChild(newQuery);
+ subq.addChild(new ASTNode(new CommonToken(HiveParser.Identifier, "subq")));
+ replaceASTChild(nodeOfInterest, subq);
+ // 4. return subquery
+ return subq;
+ }
+
/**
* Can CBO handle the given AST?
*
@@ -476,7 +646,8 @@ public class CalcitePlanner extends SemanticAnalyzer {
|| qb.isCTAS() || qb.isMaterializedView();
// Queries without a source table currently are not supported by CBO
boolean isSupportedType = (qb.getIsQuery() && !qb.containsQueryWithoutSourceTable())
- || qb.isCTAS() || qb.isMaterializedView() || cboCtx.type == PreCboCtx.Type.INSERT;
+ || qb.isCTAS() || qb.isMaterializedView() || cboCtx.type == PreCboCtx.Type.INSERT
+ || cboCtx.type == PreCboCtx.Type.MULTI_INSERT;
boolean noBadTokens = HiveCalciteUtil.validateASTForUnsupportedTokens(ast);
boolean result = isSupportedRoot && isSupportedType
&& (getCreateViewDesc() == null || getCreateViewDesc().isMaterialized())
@@ -542,7 +713,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
if (!queryProperties.hasClusterBy() && !queryProperties.hasDistributeBy()
&& !queryProperties.hasSortBy() && !queryProperties.hasPTF() && !queryProperties.usesScript()
- && !queryProperties.hasMultiDestQuery() && !queryProperties.hasLateralViews()) {
+ && !queryProperties.hasLateralViews()) {
// Ok to run CBO.
return null;
}
@@ -560,8 +731,6 @@ public class CalcitePlanner extends SemanticAnalyzer {
msg += "has PTF; ";
if (queryProperties.usesScript())
msg += "uses scripts; ";
- if (queryProperties.hasMultiDestQuery())
- msg += "is a multi-destination query; ";
if (queryProperties.hasLateralViews())
msg += "has lateral views; ";
@@ -664,7 +833,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
*/
static class PreCboCtx extends PlannerContext {
enum Type {
- NONE, INSERT, CTAS_OR_MV, UNEXPECTED
+ NONE, INSERT, MULTI_INSERT, CTAS_OR_MV, UNEXPECTED
}
private ASTNode nodeOfInterest;
@@ -692,6 +861,17 @@ public class CalcitePlanner extends SemanticAnalyzer {
set(PreCboCtx.Type.INSERT, ast);
}
}
+
+ @Override
+ void setMultiInsertToken(ASTNode child) {
+ set(PreCboCtx.Type.MULTI_INSERT, child);
+ }
+
+ @Override
+ void resetToken() {
+ this.type = Type.NONE;
+ this.nodeOfInterest = null;
+ }
}
ASTNode fixUpAfterCbo(ASTNode originalAst, ASTNode newAst, PreCboCtx cboCtx)
@@ -722,6 +902,12 @@ public class CalcitePlanner extends SemanticAnalyzer {
return newAst;
}
+ case MULTI_INSERT: {
+ // Patch the optimized query back into original FROM clause.
+ replaceASTChild(cboCtx.nodeOfInterest, newAst);
+ return originalAst;
+ }
+
default:
throw new AssertionError("Unexpected type " + cboCtx.type);
}
@@ -3803,14 +3989,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
}
private QBParseInfo getQBParseInfo(QB qb) throws CalciteSemanticException {
- QBParseInfo qbp = qb.getParseInfo();
- if (qbp.getClauseNames().size() > 1) {
- String msg = String.format("Multi Insert is currently not supported in CBO,"
- + " turn off cbo to use Multi Insert.");
- LOG.debug(msg);
- throw new CalciteSemanticException(msg, UnsupportedFeature.Multi_insert);
- }
- return qbp;
+ return qb.getParseInfo();
}
private List<String> getTabAliases(RowResolver inputRR) {
http://git-wip-us.apache.org/repos/asf/hive/blob/886978db/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java
index f549dff..7bf1c59 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java
@@ -40,8 +40,8 @@ import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.TableSpec;
**/
public class QBParseInfo {
- private final boolean isSubQ;
- private final String alias;
+ private boolean isSubQ;
+ private String alias;
private ASTNode joinExpr;
private ASTNode hints;
private final HashMap<String, ASTNode> aliasToSrc;
@@ -66,6 +66,7 @@ public class QBParseInfo {
// insertIntoTables/insertOverwriteTables map a table's fullName to its ast;
private final Map<String, ASTNode> insertIntoTables;
private final Map<String, ASTNode> insertOverwriteTables;
+ private ASTNode queryFromExpr;
private boolean isAnalyzeCommand; // used for the analyze command (statistics)
private boolean isNoScanAnalyzeCommand; // used for the analyze command (statistics) (noscan)
@@ -235,6 +236,10 @@ public class QBParseInfo {
destToSelExpr.put(clause, ast);
}
+ public void setQueryFromExpr(ASTNode ast) {
+ queryFromExpr = ast;
+ }
+
public void setWhrExprForClause(String clause, ASTNode ast) {
destToWhereExpr.put(clause, ast);
}
@@ -354,6 +359,10 @@ public class QBParseInfo {
return destToSelExpr.get(clause);
}
+ public ASTNode getQueryFrom() {
+ return queryFromExpr;
+ }
+
/**
* Get the Cluster By AST for the clause.
*
@@ -415,10 +424,18 @@ public class QBParseInfo {
return alias;
}
+ public void setAlias(String alias) {
+ this.alias = alias;
+ }
+
public boolean getIsSubQ() {
return isSubQ;
}
+ public void setIsSubQ(boolean isSubQ) {
+ this.isSubQ = isSubQ;
+ }
+
public ASTNode getJoinExpr() {
return joinExpr;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/886978db/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index d0131b7..f275f6a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -1482,15 +1482,31 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
throw new SemanticException(ErrorMsg.NO_INSERT_INSUBQUERY.getMsg(ast));
}
- if (plannerCtx != null) {
- plannerCtx.setInsertToken(ast, isTmpFileDest);
- }
-
qbp.setDestForClause(ctx_1.dest, (ASTNode) ast.getChild(0));
handleInsertStatementSpecPhase1(ast, qbp, ctx_1);
- if (qbp.getClauseNamesForDest().size() > 1) {
+
+ if (qbp.getClauseNamesForDest().size() == 2) {
+ // From the moment that we have two destination clauses,
+ // we know that this is a multi-insert query.
+ // Thus, set property to right value.
+ // Using qbp.getClauseNamesForDest().size() >= 2 would be
+ // equivalent, but we use == to avoid setting the property
+ // multiple times
queryProperties.setMultiDestQuery(true);
}
+
+ if (plannerCtx != null && !queryProperties.hasMultiDestQuery()) {
+ plannerCtx.setInsertToken(ast, isTmpFileDest);
+ } else if (plannerCtx != null && qbp.getClauseNamesForDest().size() == 2) {
+ // For multi-insert query, currently we only optimize the FROM clause.
+ // Hence, introduce multi-insert token on top of it.
+ // However, first we need to reset existing token (insert).
+ // Using qbp.getClauseNamesForDest().size() >= 2 would be
+ // equivalent, but we use == to avoid setting the property
+ // multiple times
+ plannerCtx.resetToken();
+ plannerCtx.setMultiInsertToken((ASTNode) qbp.getQueryFrom().getChild(0));
+ }
break;
case HiveParser.TOK_FROM:
@@ -1500,6 +1516,10 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
"Multiple Children " + child_count));
}
+ if (!qbp.getIsSubQ()) {
+ qbp.setQueryFromExpr(ast);
+ }
+
// Check if this is a subquery / lateral view
ASTNode frm = (ASTNode) ast.getChild(0);
if (frm.getToken().getType() == HiveParser.TOK_TABREF) {
@@ -10662,6 +10682,11 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
void setInsertToken(ASTNode ast, boolean isTmpFileDest) {
}
+ void setMultiInsertToken(ASTNode child) {
+ }
+
+ void resetToken() {
+ }
}
private Table getTableObjectByName(String tableName) throws HiveException {
http://git-wip-us.apache.org/repos/asf/hive/blob/886978db/ql/src/test/queries/clientpositive/multi_insert_gby4.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/multi_insert_gby4.q b/ql/src/test/queries/clientpositive/multi_insert_gby4.q
new file mode 100644
index 0000000..2e22096
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/multi_insert_gby4.q
@@ -0,0 +1,26 @@
+-- SORT_QUERY_RESULTS
+
+create table e1 (key string, count int);
+create table e2 (key string, count int);
+create table e3 (key string, count int);
+
+explain
+FROM (SELECT key, value FROM src) a
+INSERT OVERWRITE TABLE e1
+ SELECT key, COUNT(*) WHERE key>450 GROUP BY key
+INSERT OVERWRITE TABLE e2
+ SELECT key, COUNT(*) WHERE key>500 GROUP BY key
+INSERT OVERWRITE TABLE e3
+ SELECT key, COUNT(*) WHERE key>490 GROUP BY key;
+
+FROM (SELECT key, value FROM src) a
+INSERT OVERWRITE TABLE e1
+ SELECT key, COUNT(*) WHERE key>450 GROUP BY key
+INSERT OVERWRITE TABLE e2
+ SELECT key, COUNT(*) WHERE key>500 GROUP BY key
+INSERT OVERWRITE TABLE e3
+ SELECT key, COUNT(*) WHERE key>490 GROUP BY key;
+
+select * from e1;
+select * from e2;
+select * from e3;
http://git-wip-us.apache.org/repos/asf/hive/blob/886978db/ql/src/test/queries/clientpositive/multi_insert_with_join2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/multi_insert_with_join2.q b/ql/src/test/queries/clientpositive/multi_insert_with_join2.q
index 1529fa2..37d1678 100644
--- a/ql/src/test/queries/clientpositive/multi_insert_with_join2.q
+++ b/ql/src/test/queries/clientpositive/multi_insert_with_join2.q
@@ -1,4 +1,4 @@
-set hive.cbo.enable=false;
+set hive.strict.checks.cartesian.product=false;
CREATE TABLE T_A ( id STRING, val STRING );
CREATE TABLE T_B ( id STRING, val STRING );
@@ -49,3 +49,30 @@ WHERE b.id = 'Id_1' AND b.val = 'val_103'
INSERT OVERWRITE TABLE join_result_3
SELECT a.*, b.*
WHERE b.val = 'val_104' AND b.id = 'Id_2';
+
+explain
+FROM T_A a JOIN T_B b ON a.id = b.id
+INSERT OVERWRITE TABLE join_result_1
+SELECT *
+WHERE b.id = 'Id_1' AND b.val = 'val_103'
+INSERT OVERWRITE TABLE join_result_3
+SELECT *
+WHERE b.val = 'val_104' AND b.id = 'Id_2';
+
+explain
+FROM T_A a JOIN T_B b ON a.id = b.id
+INSERT OVERWRITE TABLE join_result_1
+SELECT a.id, a.val, b.id, b.val
+WHERE b.id = 'Id_1' AND b.val = 'val_103'
+INSERT OVERWRITE TABLE join_result_3
+SELECT a.id, a.val, b.id, b.val
+WHERE b.val = 'val_104' AND b.id = 'Id_2';
+
+explain
+FROM T_A a JOIN T_B b ON a.id = b.id
+INSERT OVERWRITE TABLE join_result_1
+SELECT a.val, a.id, b.id, b.val
+WHERE b.id = 'Id_1' AND b.val = 'val_103'
+INSERT OVERWRITE TABLE join_result_3
+SELECT a.id, b.val, b.id, a.val
+WHERE b.val = 'val_104' AND b.id = 'Id_2';
http://git-wip-us.apache.org/repos/asf/hive/blob/886978db/ql/src/test/results/clientpositive/auto_sortmerge_join_13.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_13.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_13.q.out
index b45411c..90493ff 100644
--- a/ql/src/test/results/clientpositive/auto_sortmerge_join_13.q.out
+++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_13.q.out
@@ -89,15 +89,16 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Sorted Merge Bucket Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 key (type: int)
- 1 key (type: int)
- outputColumnNames: _col0, _col1, _col5, _col6
- Select Operator
- expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string)
+ Select Operator
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
Select Operator
expressions: _col0 (type: int), _col2 (type: int)
@@ -267,15 +268,16 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Sorted Merge Bucket Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 key (type: int)
- 1 key (type: int)
- outputColumnNames: _col0, _col1, _col5, _col6
- Select Operator
- expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string)
+ Select Operator
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
Select Operator
expressions: _col0 (type: int), _col2 (type: int)
@@ -445,15 +447,16 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Sorted Merge Bucket Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 key (type: int)
- 1 key (type: int)
- outputColumnNames: _col0, _col1, _col5, _col6
- Select Operator
- expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string)
+ Select Operator
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
Select Operator
expressions: _col0 (type: int), _col2 (type: int)
http://git-wip-us.apache.org/repos/asf/hive/blob/886978db/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out b/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out
index 6572f6c..008b796 100644
--- a/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out
+++ b/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out
@@ -5808,7 +5808,7 @@ STAGE PLANS:
alias: t2
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key = 8) (type: boolean)
+ predicate: (UDFToDouble(key) = 8.0) (type: boolean)
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), val (type: string)
http://git-wip-us.apache.org/repos/asf/hive/blob/886978db/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out b/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out
index ce71354..35b38ca 100644
--- a/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out
+++ b/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out
@@ -6296,7 +6296,7 @@ STAGE PLANS:
alias: t2
Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (key = 8) (type: boolean)
+ predicate: (UDFToDouble(key) = 8.0) (type: boolean)
Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), val (type: string)
http://git-wip-us.apache.org/repos/asf/hive/blob/886978db/ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out b/ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out
index e999077..3083291 100644
--- a/ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out
+++ b/ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out
@@ -1599,6 +1599,10 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
Map Operator Tree:
TableScan
alias: a
@@ -1606,22 +1610,34 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Merge Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 key (type: int)
- 1 key (type: int)
- outputColumnNames: _col0, _col1, _col6
- Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string)
- outputColumnNames: _col0, _col1, _col2
+ Select Operator
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col3
Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: int), _col1 (type: string)
- outputColumnNames: _col0, _col1
+ expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string)
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
File Output Operator
compressed: false
Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
@@ -1629,15 +1645,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest1
- File Output Operator
- compressed: false
- Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest2
+ name: default.dest2
Execution mode: llap
Stage: Stage-3
@@ -1812,6 +1820,10 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
Map Operator Tree:
TableScan
alias: a
@@ -1819,38 +1831,42 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Merge Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 key (type: int)
- 1 key (type: int)
+ Select Operator
+ expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest1
- Select Operator
- expressions: _col0 (type: int)
- outputColumnNames: _col0
+ Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count()
- keys: _col0 (type: int)
- mode: hash
- outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
+ Select Operator
+ expressions: _col0 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint)
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: bigint)
Execution mode: llap
Reducer 2
Execution mode: llap
http://git-wip-us.apache.org/repos/asf/hive/blob/886978db/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out
index a539e03..633abff 100644
--- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out
+++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out
@@ -96,19 +96,19 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 key (type: int)
- 1 key (type: int)
- outputColumnNames: _col0, _col1, _col5, _col6
- input vertices:
- 1 Map 2
- Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string)
+ Select Operator
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
+ input vertices:
+ 1 Map 2
Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: int), _col2 (type: int)
@@ -144,12 +144,16 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: int)
- sort order: +
- Map-reduce partition columns: key (type: int)
+ Select Operator
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- value expressions: value (type: string)
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
Execution mode: llap
LLAP IO: no inputs
@@ -310,19 +314,19 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 key (type: int)
- 1 key (type: int)
- outputColumnNames: _col0, _col1, _col5, _col6
- input vertices:
- 1 Map 2
- Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string)
+ Select Operator
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
+ input vertices:
+ 1 Map 2
Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: int), _col2 (type: int)
@@ -358,12 +362,16 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: int)
- sort order: +
- Map-reduce partition columns: key (type: int)
+ Select Operator
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- value expressions: value (type: string)
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
Execution mode: llap
LLAP IO: no inputs
@@ -524,19 +532,19 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 key (type: int)
- 1 key (type: int)
- outputColumnNames: _col0, _col1, _col5, _col6
- input vertices:
- 1 Map 2
- Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string)
+ Select Operator
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
+ input vertices:
+ 1 Map 2
Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: int), _col2 (type: int)
@@ -572,12 +580,16 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: key (type: int)
- sort order: +
- Map-reduce partition columns: key (type: int)
+ Select Operator
+ expressions: key (type: int), value (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- value expressions: value (type: string)
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
Execution mode: llap
LLAP IO: no inputs