You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by vg...@apache.org on 2019/09/26 18:44:53 UTC
[hive] branch master updated: HIVE-22079: Post order walker for
iterating over expression tree (Vineet Garg,
reviewed by Jesus Camacho Rodriguez)
This is an automated email from the ASF dual-hosted git repository.
vgarg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 4c8b374 HIVE-22079: Post order walker for iterating over expression tree (Vineet Garg, reviewed by Jesus Camacho Rodriguez)
4c8b374 is described below
commit 4c8b374f20499bd980a59c06943a1aaca5deab8f
Author: Vineet Garg <vg...@apache.org>
AuthorDate: Thu Sep 26 11:44:12 2019 -0700
HIVE-22079: Post order walker for iterating over expression tree (Vineet Garg, reviewed by Jesus Camacho Rodriguez)
---
.../hadoop/hive/ql/lib/ExpressionWalker.java | 100 ++++++++++++---------
...onWalker.java => SubqueryExpressionWalker.java} | 45 +---------
.../hive/ql/optimizer/pcr/PcrExprProcFactory.java | 4 +-
.../hadoop/hive/ql/parse/TypeCheckProcFactory.java | 4 +-
.../hadoop/hive/ql/ppd/ExprWalkerProcFactory.java | 4 +-
5 files changed, 65 insertions(+), 92 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lib/ExpressionWalker.java b/ql/src/java/org/apache/hadoop/hive/ql/lib/ExpressionWalker.java
index 41607ae..7b19314 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/lib/ExpressionWalker.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/lib/ExpressionWalker.java
@@ -18,11 +18,17 @@
package org.apache.hadoop.hive.ql.lib;
-import org.apache.hadoop.hive.ql.parse.ASTNode;
-import org.apache.hadoop.hive.ql.parse.HiveParser;
import org.apache.hadoop.hive.ql.parse.SemanticException;
-public class ExpressionWalker extends DefaultGraphWalker {
+import java.util.ArrayDeque;
+import java.util.Deque;
+
+/**
+ * class for traversing tree
+ * This class assumes that the given node represents TREE and not GRAPH
+ * i.e. there is only single path to reach a node
+ */
+public class ExpressionWalker extends DefaultGraphWalker{
/**
* Constructor.
@@ -35,63 +41,69 @@ public class ExpressionWalker extends DefaultGraphWalker {
}
- /**
- * We should bypass subquery since we have already processed and created logical plan
- * (in genLogicalPlan) for subquery at this point.
- * SubQueryExprProcessor will use generated plan and creates appropriate ExprNodeSubQueryDesc.
- */
- private boolean shouldByPass(Node childNode, Node parentNode) {
- if(parentNode instanceof ASTNode
- && ((ASTNode)parentNode).getType() == HiveParser.TOK_SUBQUERY_EXPR) {
- ASTNode parentOp = (ASTNode)parentNode;
- //subquery either in WHERE <LHS> IN <SUBQUERY> form OR WHERE EXISTS <SUBQUERY> form
- //in first case LHS should not be bypassed
- assert(parentOp.getChildCount() == 2 || parentOp.getChildCount()==3);
- if(parentOp.getChildCount() == 3 && (ASTNode)childNode == parentOp.getChild(2)) {
- return false;
- }
- return true;
+ private static class NodeLabeled {
+ final private Node nd;
+ private int currChildIdx;
+
+ NodeLabeled(Node nd) {
+ this.nd = nd;
+ this.currChildIdx = -1;
+ }
+
+ public void incrementChildIdx() {
+ this.currChildIdx++;
+ }
+
+ public int getCurrChildIdx() {
+ return this.currChildIdx;
}
+
+ public Node getNd() {
+ return this.nd;
+ }
+ }
+
+ protected boolean shouldByPass(Node childNode, Node parentNode) {
return false;
}
+
/**
* walk the current operator and its descendants.
*
* @param nd
- * current operator in the graph
+ * current operator in the tree
* @throws SemanticException
*/
protected void walk(Node nd) throws SemanticException {
- // Push the node in the stack
+ Deque<NodeLabeled> traversalStack = new ArrayDeque<>();
+ traversalStack.push(new NodeLabeled(nd));
+
opStack.push(nd);
- // While there are still nodes to dispatch...
- while (!opStack.empty()) {
- Node node = opStack.peek();
+ while(!traversalStack.isEmpty()) {
+ NodeLabeled currLabeledNode = traversalStack.peek();
+ Node currNode = currLabeledNode.getNd();
+ int currIdx = currLabeledNode.getCurrChildIdx();
- if (node.getChildren() == null ||
- getDispatchedList().containsAll(node.getChildren())) {
- // Dispatch current node
- if (!getDispatchedList().contains(node)) {
- dispatch(node, opStack);
- opQueue.add(node);
+ if(currNode.getChildren() != null && currNode.getChildren().size() > currIdx + 1) {
+ Node nextChild = currNode.getChildren().get(currIdx+1);
+ //check if this node should be skipped and not dispatched
+ if(shouldByPass(nextChild, currNode)) {
+ retMap.put(nextChild, null);
+ currLabeledNode.incrementChildIdx();
+ continue;
}
+ traversalStack.push(new NodeLabeled(nextChild));
+ opStack.push(nextChild);
+ currLabeledNode.incrementChildIdx();
+ } else {
+ // dispatch the node
+ dispatch(currNode, opStack);
+ opQueue.add(currNode);
opStack.pop();
- continue;
+ traversalStack.pop();
}
-
- // Add a single child and restart the loop
- for (Node childNode : node.getChildren()) {
- if (!getDispatchedList().contains(childNode)) {
- if(shouldByPass(childNode, node)) {
- retMap.put(childNode, null);
- } else {
- opStack.push(childNode);
- }
- break;
- }
- }
- } // end while
+ }
}
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lib/ExpressionWalker.java b/ql/src/java/org/apache/hadoop/hive/ql/lib/SubqueryExpressionWalker.java
similarity index 58%
copy from ql/src/java/org/apache/hadoop/hive/ql/lib/ExpressionWalker.java
copy to ql/src/java/org/apache/hadoop/hive/ql/lib/SubqueryExpressionWalker.java
index 41607ae..75f09e4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/lib/ExpressionWalker.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/lib/SubqueryExpressionWalker.java
@@ -20,9 +20,8 @@ package org.apache.hadoop.hive.ql.lib;
import org.apache.hadoop.hive.ql.parse.ASTNode;
import org.apache.hadoop.hive.ql.parse.HiveParser;
-import org.apache.hadoop.hive.ql.parse.SemanticException;
-public class ExpressionWalker extends DefaultGraphWalker {
+public class SubqueryExpressionWalker extends ExpressionWalker{
/**
* Constructor.
@@ -30,7 +29,7 @@ public class ExpressionWalker extends DefaultGraphWalker {
* @param disp
* dispatcher to call for each op encountered
*/
- public ExpressionWalker(Dispatcher disp) {
+ public SubqueryExpressionWalker(Dispatcher disp) {
super(disp);
}
@@ -40,7 +39,7 @@ public class ExpressionWalker extends DefaultGraphWalker {
* (in genLogicalPlan) for subquery at this point.
* SubQueryExprProcessor will use generated plan and creates appropriate ExprNodeSubQueryDesc.
*/
- private boolean shouldByPass(Node childNode, Node parentNode) {
+ protected boolean shouldByPass(Node childNode, Node parentNode) {
if(parentNode instanceof ASTNode
&& ((ASTNode)parentNode).getType() == HiveParser.TOK_SUBQUERY_EXPR) {
ASTNode parentOp = (ASTNode)parentNode;
@@ -54,44 +53,6 @@ public class ExpressionWalker extends DefaultGraphWalker {
}
return false;
}
- /**
- * walk the current operator and its descendants.
- *
- * @param nd
- * current operator in the graph
- * @throws SemanticException
- */
- protected void walk(Node nd) throws SemanticException {
- // Push the node in the stack
- opStack.push(nd);
-
- // While there are still nodes to dispatch...
- while (!opStack.empty()) {
- Node node = opStack.peek();
- if (node.getChildren() == null ||
- getDispatchedList().containsAll(node.getChildren())) {
- // Dispatch current node
- if (!getDispatchedList().contains(node)) {
- dispatch(node, opStack);
- opQueue.add(node);
- }
- opStack.pop();
- continue;
- }
-
- // Add a single child and restart the loop
- for (Node childNode : node.getChildren()) {
- if (!getDispatchedList().contains(childNode)) {
- if(shouldByPass(childNode, node)) {
- retMap.put(childNode, null);
- } else {
- opStack.push(childNode);
- }
- break;
- }
- }
- } // end while
- }
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java
index f612cd2..a689ab5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java
@@ -28,9 +28,9 @@ import java.util.Map;
import java.util.Stack;
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
-import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
import org.apache.hadoop.hive.ql.lib.Dispatcher;
+import org.apache.hadoop.hive.ql.lib.ExpressionWalker;
import org.apache.hadoop.hive.ql.lib.GraphWalker;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
@@ -588,7 +588,7 @@ public final class PcrExprProcFactory {
// rule and passes the context along
Dispatcher disp = new DefaultRuleDispatcher(getDefaultExprProcessor(),
exprRules, pprCtx);
- GraphWalker egw = new DefaultGraphWalker(disp);
+ GraphWalker egw = new ExpressionWalker(disp);
List<Node> startNodes = new ArrayList<Node>();
startNodes.add(pred);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
index 86bb537..e45276f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
@@ -49,7 +49,7 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
import org.apache.hadoop.hive.ql.lib.Dispatcher;
-import org.apache.hadoop.hive.ql.lib.ExpressionWalker;
+import org.apache.hadoop.hive.ql.lib.SubqueryExpressionWalker;
import org.apache.hadoop.hive.ql.lib.GraphWalker;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
@@ -236,7 +236,7 @@ public class TypeCheckProcFactory {
// rule and passes the context along
Dispatcher disp = new DefaultRuleDispatcher(tf.getDefaultExprProcessor(),
opRules, tcCtx);
- GraphWalker ogw = new ExpressionWalker(disp);
+ GraphWalker ogw = new SubqueryExpressionWalker(disp);
// Create a list of top nodes
ArrayList<Node> topNodes = Lists.<Node>newArrayList(expr);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java
index 1c662d7..f8b4ace 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java
@@ -29,9 +29,9 @@ import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.exec.GroupByOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.RowSchema;
-import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
import org.apache.hadoop.hive.ql.lib.Dispatcher;
+import org.apache.hadoop.hive.ql.lib.ExpressionWalker;
import org.apache.hadoop.hive.ql.lib.GraphWalker;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
@@ -346,7 +346,7 @@ public final class ExprWalkerProcFactory {
// rule and passes the context along
Dispatcher disp = new DefaultRuleDispatcher(getDefaultExprProcessor(),
exprRules, exprContext);
- GraphWalker egw = new DefaultGraphWalker(disp);
+ GraphWalker egw = new ExpressionWalker(disp);
List<Node> startNodes = new ArrayList<Node>();
List<ExprNodeDesc> clonedPreds = new ArrayList<ExprNodeDesc>();