You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ek...@apache.org on 2017/02/18 17:04:08 UTC
hive git commit: HIVE-15970 Fix merge to work in presence of AST
rewrites (Eugene Koifman, reviewed by Wei Zheng)
Repository: hive
Updated Branches:
refs/heads/master 3f986d7a2 -> 1a6902ce8
HIVE-15970 Fix merge to work in presence of AST rewrites (Eugene Koifman, reviewed by Wei Zheng)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1a6902ce
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1a6902ce
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1a6902ce
Branch: refs/heads/master
Commit: 1a6902ce81c3ac5da98ee5183fa24b98c63642fb
Parents: 3f986d7
Author: Eugene Koifman <ek...@hortonworks.com>
Authored: Sat Feb 18 08:58:07 2017 -0800
Committer: Eugene Koifman <ek...@hortonworks.com>
Committed: Sat Feb 18 08:58:07 2017 -0800
----------------------------------------------------------------------
.../java/org/apache/hadoop/hive/ql/Context.java | 109 +++++++++++++++----
.../ql/parse/UpdateDeleteSemanticAnalyzer.java | 38 ++++---
2 files changed, 111 insertions(+), 36 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/1a6902ce/ql/src/java/org/apache/hadoop/hive/ql/Context.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Context.java b/ql/src/java/org/apache/hadoop/hive/ql/Context.java
index bbdce63..fae2a12 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/Context.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/Context.java
@@ -25,7 +25,6 @@ import java.net.URI;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
-import java.util.IdentityHashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
@@ -54,6 +53,7 @@ import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.parse.ASTNode;
import org.apache.hadoop.hive.ql.parse.ExplainConfiguration;
import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.AnalyzeState;
+import org.apache.hadoop.hive.ql.parse.HiveParser;
import org.apache.hadoop.hive.ql.plan.LoadTableDesc;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.shims.ShimLoader;
@@ -132,7 +132,17 @@ public class Context {
* given tree but multi-insert has several and multi-insert representing MERGE must use
* different prefixes to encode the purpose of different Insert branches
*/
- private Map<ASTNode, DestClausePrefix> tree2DestNamePrefix;
+ private Map<Integer, DestClausePrefix> insertBranchToNamePrefix = new HashMap<>();
+ private Operation operation = Operation.OTHER;
+ public void setOperation(Operation operation) {
+ this.operation = operation;
+ }
+
+ /**
+ * These ops require special handling in various places
+ * (note that Insert into Acid table is in OTHER category)
+ */
+ public enum Operation {UPDATE, DELETE, MERGE, OTHER};
public enum DestClausePrefix {
INSERT("insclause-"), UPDATE("updclause-"), DELETE("delclause-");
private final String prefix;
@@ -143,36 +153,95 @@ public class Context {
return prefix;
}
}
+ private String getMatchedText(ASTNode n) {
+ return getTokenRewriteStream().toString(n.getTokenStartIndex(), n.getTokenStopIndex() + 1).trim();
+ }
/**
* The suffix is always relative to a given ASTNode
*/
public DestClausePrefix getDestNamePrefix(ASTNode curNode) {
- //if there is no mapping, we want to default to "old" naming
assert curNode != null : "must supply curNode";
- if(tree2DestNamePrefix == null || tree2DestNamePrefix.isEmpty()) {
+ if(curNode.getType() != HiveParser.TOK_INSERT_INTO) {
+ //select statement
+ assert curNode.getType() == HiveParser.TOK_DESTINATION;
+ if(operation == Operation.OTHER) {
+ //not an 'interesting' op
+ return DestClausePrefix.INSERT;
+ }
+ //if it is an 'interesting' op but it's a select it must be a sub-query or a derived table
+ //it doesn't require a special Acid code path - the reset of the code here is to ensure
+ //the tree structure is what we expect
+ boolean thisIsInASubquery = false;
+ parentLoop: while(curNode.getParent() != null) {
+ curNode = (ASTNode) curNode.getParent();
+ switch (curNode.getType()) {
+ case HiveParser.TOK_SUBQUERY_EXPR:
+ //this is a real subquery (foo IN (select ...))
+ case HiveParser.TOK_SUBQUERY:
+ //this is a Derived Table Select * from (select a from ...))
+ //strictly speaking SetOps should have a TOK_SUBQUERY parent so next 6 items are redundant
+ case HiveParser.TOK_UNIONALL:
+ case HiveParser.TOK_UNIONDISTINCT:
+ case HiveParser.TOK_EXCEPTALL:
+ case HiveParser.TOK_EXCEPTDISTINCT:
+ case HiveParser.TOK_INTERSECTALL:
+ case HiveParser.TOK_INTERSECTDISTINCT:
+ thisIsInASubquery = true;
+ break parentLoop;
+ }
+ }
+ if(!thisIsInASubquery) {
+ throw new IllegalStateException("Expected '" + getMatchedText(curNode) + "' to be in sub-query or set operation.");
+ }
return DestClausePrefix.INSERT;
}
- do {
- DestClausePrefix prefix = tree2DestNamePrefix.get(curNode);
- if(prefix != null) {
- return prefix;
- }
- curNode = (ASTNode) curNode.parent;
- } while(curNode != null);
- return DestClausePrefix.INSERT;
+ switch (operation) {
+ case OTHER:
+ return DestClausePrefix.INSERT;
+ case UPDATE:
+ return DestClausePrefix.UPDATE;
+ case DELETE:
+ return DestClausePrefix.DELETE;
+ case MERGE:
+ /* This is the structrue expected here
+ HiveParser.TOK_QUERY;
+ HiveParser.TOK_FROM
+ HiveParser.TOK_INSERT;
+ HiveParser.TOK_INSERT_INTO;
+ HiveParser.TOK_INSERT;
+ HiveParser.TOK_INSERT_INTO;
+ .....*/
+ ASTNode insert = (ASTNode) curNode.getParent();
+ assert insert != null && insert.getType() == HiveParser.TOK_INSERT;
+ ASTNode query = (ASTNode) insert.getParent();
+ assert query != null && query.getType() == HiveParser.TOK_QUERY;
+
+ for(int childIdx = 1; childIdx < query.getChildCount(); childIdx++) {//1st child is TOK_FROM
+ assert query.getChild(childIdx).getType() == HiveParser.TOK_INSERT;
+ if(insert == query.getChild(childIdx)) {
+ DestClausePrefix prefix = insertBranchToNamePrefix.get(childIdx);
+ if(prefix == null) {
+ throw new IllegalStateException("Found a node w/o branch mapping: '" +
+ getMatchedText(insert) + "'");
+ }
+ return prefix;
+ }
+ }
+ throw new IllegalStateException("Could not locate '" + getMatchedText(insert) + "'");
+ default:
+ throw new IllegalStateException("Unexpected operation: " + operation);
+ }
}
/**
- * Will make SemanticAnalyzer.Phase1Ctx#dest in subtree rooted at 'tree' use 'prefix'
- * @param tree
+ * Will make SemanticAnalyzer.Phase1Ctx#dest in subtree rooted at 'tree' use 'prefix'. This to
+ * handle multi-insert stmt that represents Merge stmt and has insert branches representing
+ * update/delete/insert.
+ * @param pos ordinal index of specific TOK_INSERT as child of TOK_QUERY
* @return previous prefix for 'tree' or null
*/
- public DestClausePrefix addDestNamePrefix(ASTNode tree, DestClausePrefix prefix) {
- if(tree2DestNamePrefix == null) {
- tree2DestNamePrefix = new IdentityHashMap<>();
- }
- return tree2DestNamePrefix.put(tree, prefix);
+ public DestClausePrefix addDestNamePrefix(int pos, DestClausePrefix prefix) {
+ return insertBranchToNamePrefix.put(pos, prefix);
}
-
public Context(Configuration conf) throws IOException {
this(conf, generateExecutionId());
}
http://git-wip-us.apache.org/repos/asf/hive/blob/1a6902ce/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
index 12a43a2..725f2ce 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
@@ -58,9 +58,9 @@ import org.apache.hadoop.hive.ql.session.SessionState;
*/
public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
- boolean useSuper = false;
+ private boolean useSuper = false;
- public UpdateDeleteSemanticAnalyzer(QueryState queryState) throws SemanticException {
+ UpdateDeleteSemanticAnalyzer(QueryState queryState) throws SemanticException {
super(queryState);
}
@@ -92,19 +92,19 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
}
}
private boolean updating() {
- return currentOperation == Operation.UPDATE;
+ return currentOperation == Context.Operation.UPDATE;
}
private boolean deleting() {
- return currentOperation == Operation.DELETE;
+ return currentOperation == Context.Operation.DELETE;
}
private void analyzeUpdate(ASTNode tree) throws SemanticException {
- currentOperation = Operation.UPDATE;
+ currentOperation = Context.Operation.UPDATE;
reparseAndSuperAnalyze(tree);
}
private void analyzeDelete(ASTNode tree) throws SemanticException {
- currentOperation = Operation.DELETE;
+ currentOperation = Context.Operation.DELETE;
reparseAndSuperAnalyze(tree);
}
/**
@@ -410,10 +410,12 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
"Expected TOK_INSERT as second child of TOK_QUERY but found " + rewrittenInsert.getName();
if(updating()) {
- rewrittenCtx.addDestNamePrefix(rewrittenInsert, Context.DestClausePrefix.UPDATE);
+ rewrittenCtx.setOperation(Context.Operation.UPDATE);
+ rewrittenCtx.addDestNamePrefix(1, Context.DestClausePrefix.UPDATE);
}
else if(deleting()) {
- rewrittenCtx.addDestNamePrefix(rewrittenInsert, Context.DestClausePrefix.DELETE);
+ rewrittenCtx.setOperation(Context.Operation.DELETE);
+ rewrittenCtx.addDestNamePrefix(1, Context.DestClausePrefix.DELETE);
}
if (where != null) {
@@ -489,7 +491,7 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
return false;
}
private String operation() {
- if (currentOperation == Operation.NOT_ACID) {
+ if (currentOperation == Context.Operation.OTHER) {
throw new IllegalStateException("UpdateDeleteSemanticAnalyzer neither updating nor " +
"deleting, operation not known.");
}
@@ -523,8 +525,7 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
return colName.toLowerCase();
}
- private enum Operation {UPDATE, DELETE, MERGE, NOT_ACID};
- private Operation currentOperation = Operation.NOT_ACID;
+ private Context.Operation currentOperation = Context.Operation.OTHER;
private static final String Indent = " ";
private IdentifierQuoter quotedIdenfierHelper;
@@ -589,7 +590,7 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
* @throws SemanticException
*/
private void analyzeMerge(ASTNode tree) throws SemanticException {
- currentOperation = Operation.MERGE;
+ currentOperation = Context.Operation.MERGE;
quotedIdenfierHelper = new IdentifierQuoter(ctx.getTokenRewriteStream());
/*
* See org.apache.hadoop.hive.ql.parse.TestMergeStatement for some examples of the merge AST
@@ -701,8 +702,9 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
ReparseResult rr = parseRewrittenQuery(rewrittenQueryStr, ctx.getCmd());
Context rewrittenCtx = rr.rewrittenCtx;
ASTNode rewrittenTree = rr.rewrittenTree;
+ rewrittenCtx.setOperation(Context.Operation.MERGE);
- //set dest name mapping on new context
+ //set dest name mapping on new context; 1st chid is TOK_FROM
for(int insClauseIdx = 1, whenClauseIdx = 0;
insClauseIdx < rewrittenTree.getChildCount() - (validating ? 1 : 0/*skip cardinality violation clause*/);
insClauseIdx++, whenClauseIdx++) {
@@ -710,18 +712,22 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
ASTNode insertClause = (ASTNode) rewrittenTree.getChild(insClauseIdx);
switch (getWhenClauseOperation(whenClauses.get(whenClauseIdx)).getType()) {
case HiveParser.TOK_INSERT:
- rewrittenCtx.addDestNamePrefix(insertClause, Context.DestClausePrefix.INSERT);
+ rewrittenCtx.addDestNamePrefix(insClauseIdx, Context.DestClausePrefix.INSERT);
break;
case HiveParser.TOK_UPDATE:
- rewrittenCtx.addDestNamePrefix(insertClause, Context.DestClausePrefix.UPDATE);
+ rewrittenCtx.addDestNamePrefix(insClauseIdx, Context.DestClausePrefix.UPDATE);
break;
case HiveParser.TOK_DELETE:
- rewrittenCtx.addDestNamePrefix(insertClause, Context.DestClausePrefix.DELETE);
+ rewrittenCtx.addDestNamePrefix(insClauseIdx, Context.DestClausePrefix.DELETE);
break;
default:
assert false;
}
}
+ if(validating) {
+ //here means the last branch of the multi-insert is Cardinality Validation
+ rewrittenCtx.addDestNamePrefix(rewrittenTree.getChildCount() - 1, Context.DestClausePrefix.INSERT);
+ }
try {
useSuper = true;
super.analyze(rewrittenTree, rewrittenCtx);