You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2015/09/09 09:08:48 UTC
[18/50] [abbrv] hive git commit: HIVE-11652: Avoid expensive call to
removeAll in DefaultGraphWalker (Jesus Camacho Rodriguez,
reviewed by Ashutosh Chauhan/Hari Sankar Sivarama Subramaniyan)
HIVE-11652: Avoid expensive call to removeAll in DefaultGraphWalker (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan/Hari Sankar Sivarama Subramaniyan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/af91308e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/af91308e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/af91308e
Branch: refs/heads/beeline-cli
Commit: af91308e5b6573ea6dc793912bcc628a5a40c000
Parents: 22fa921
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Sat Aug 29 11:40:03 2015 +0200
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Sat Aug 29 11:42:59 2015 +0200
----------------------------------------------------------------------
.../hadoop/hive/ql/lib/DefaultGraphWalker.java | 80 ++++++++++++++------
.../hadoop/hive/ql/lib/ForwardWalker.java | 33 ++++----
.../hadoop/hive/ql/optimizer/ColumnPruner.java | 6 +-
.../hive/ql/optimizer/ConstantPropagate.java | 10 +--
4 files changed, 79 insertions(+), 50 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/af91308e/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java b/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java
index 583c113..07d2734 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java
@@ -22,7 +22,9 @@ import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.IdentityHashMap;
+import java.util.LinkedList;
import java.util.List;
+import java.util.Queue;
import java.util.Set;
import java.util.Stack;
@@ -36,7 +38,21 @@ import org.apache.hadoop.hive.ql.parse.SemanticException;
*/
public class DefaultGraphWalker implements GraphWalker {
- protected Stack<Node> opStack;
+ /**
+ * opStack keeps the nodes that have been visited, but have not been
+ * dispatched yet
+ */
+ protected final Stack<Node> opStack;
+ /**
+ * opQueue keeps the nodes in the order that the were dispatched.
+ * Then it is used to go through the processed nodes and store
+ * the results that the dispatcher has produced (if any)
+ */
+ protected final Queue<Node> opQueue;
+ /**
+ * toWalk stores the starting nodes for the graph that needs to be
+ * traversed
+ */
protected final List<Node> toWalk = new ArrayList<Node>();
protected final IdentityHashMap<Node, Object> retMap = new IdentityHashMap<Node, Object>();
protected final Dispatcher dispatcher;
@@ -50,13 +66,7 @@ public class DefaultGraphWalker implements GraphWalker {
public DefaultGraphWalker(Dispatcher disp) {
dispatcher = disp;
opStack = new Stack<Node>();
- }
-
- /**
- * @return the toWalk
- */
- public List<Node> getToWalk() {
- return toWalk;
+ opQueue = new LinkedList<Node>();
}
/**
@@ -108,10 +118,22 @@ public class DefaultGraphWalker implements GraphWalker {
while (toWalk.size() > 0) {
Node nd = toWalk.remove(0);
walk(nd);
+ // Some walkers extending DefaultGraphWalker e.g. ForwardWalker
+ // do not use opQueue and rely uniquely in the toWalk structure,
+ // thus we store the results produced by the dispatcher here
+ // TODO: rewriting the logic of those walkers to use opQueue
if (nodeOutput != null && getDispatchedList().contains(nd)) {
nodeOutput.put(nd, retMap.get(nd));
}
}
+
+ // Store the results produced by the dispatcher
+ while (!opQueue.isEmpty()) {
+ Node node = opQueue.poll();
+ if (nodeOutput != null && getDispatchedList().contains(node)) {
+ nodeOutput.put(node, retMap.get(node));
+ }
+ }
}
/**
@@ -121,23 +143,33 @@ public class DefaultGraphWalker implements GraphWalker {
* current operator in the graph
* @throws SemanticException
*/
- public void walk(Node nd) throws SemanticException {
- if (opStack.empty() || nd != opStack.peek()) {
- opStack.push(nd);
- }
+ public void walk(Node nd) throws SemanticException {
+ // Push the node in the stack
+ opStack.push(nd);
+
+ // While there are still nodes to dispatch...
+ while (!opStack.empty()) {
+ Node node = opStack.peek();
- if ((nd.getChildren() == null)
- || getDispatchedList().containsAll(nd.getChildren())) {
- // all children are done or no need to walk the children
- if (!getDispatchedList().contains(nd)) {
- dispatch(nd, opStack);
+ if (node.getChildren() == null ||
+ getDispatchedList().containsAll(node.getChildren())) {
+ // Dispatch current node
+ if (!getDispatchedList().contains(node)) {
+ dispatch(node, opStack);
+ opQueue.add(node);
+ }
+ opStack.pop();
+ continue;
}
- opStack.pop();
- return;
- }
- // add children, self to the front of the queue in that order
- getToWalk().add(0, nd);
- getToWalk().removeAll(nd.getChildren());
- getToWalk().addAll(0, nd.getChildren());
+
+ // Add a single child and restart the loop
+ for (Node childNode : node.getChildren()) {
+ if (!getDispatchedList().contains(childNode)) {
+ opStack.push(childNode);
+ break;
+ }
+ }
+ } // end while
}
+
}
http://git-wip-us.apache.org/repos/asf/hive/blob/af91308e/ql/src/java/org/apache/hadoop/hive/ql/lib/ForwardWalker.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lib/ForwardWalker.java b/ql/src/java/org/apache/hadoop/hive/ql/lib/ForwardWalker.java
index a2db3b5..67b4700 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/lib/ForwardWalker.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/lib/ForwardWalker.java
@@ -19,20 +19,17 @@
package org.apache.hadoop.hive.ql.lib;
import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
-import org.apache.hadoop.hive.ql.lib.Dispatcher;
-import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
public class ForwardWalker extends DefaultGraphWalker {
/**
-* Constructor.
-*
-* @param disp
-* dispatcher to call for each op encountered
-*/
+ * Constructor.
+ *
+ * @param disp
+ * dispatcher to call for each op encountered
+ */
public ForwardWalker(Dispatcher disp) {
super(disp);
}
@@ -54,17 +51,17 @@ public class ForwardWalker extends DefaultGraphWalker {
@SuppressWarnings("unchecked")
protected void addAllParents(Node nd) {
Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) nd;
- getToWalk().removeAll(op.getParentOperators());
- getToWalk().addAll(0, op.getParentOperators());
+ toWalk.removeAll(op.getParentOperators());
+ toWalk.addAll(0, op.getParentOperators());
}
/**
-* walk the current operator and its descendants.
-*
-* @param nd
-* current operator in the graph
-* @throws SemanticException
-*/
+ * walk the current operator and its descendants.
+ *
+ * @param nd
+ * current operator in the graph
+ * @throws SemanticException
+ */
@Override
public void walk(Node nd) throws SemanticException {
if (opStack.empty() || nd != opStack.peek()) {
@@ -73,14 +70,14 @@ public class ForwardWalker extends DefaultGraphWalker {
if (allParentsDispatched(nd)) {
// all children are done or no need to walk the children
if (!getDispatchedList().contains(nd)) {
- getToWalk().addAll(nd.getChildren());
+ toWalk.addAll(nd.getChildren());
dispatch(nd, opStack);
}
opStack.pop();
return;
}
// add children, self to the front of the queue in that order
- getToWalk().add(0, nd);
+ toWalk.add(0, nd);
addAllParents(nd);
}
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/af91308e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java
index 9a45458..735b448 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java
@@ -174,10 +174,10 @@ public class ColumnPruner implements Transform {
return;
}
// move all the children to the front of queue
- getToWalk().removeAll(nd.getChildren());
- getToWalk().addAll(0, nd.getChildren());
+ toWalk.removeAll(nd.getChildren());
+ toWalk.addAll(0, nd.getChildren());
// add self to the end of the queue
- getToWalk().add(nd);
+ toWalk.add(nd);
opStack.pop();
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/af91308e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java
index dd53ced..b6f1f27 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java
@@ -151,17 +151,17 @@ public class ConstantPropagate implements Transform {
dispatch(nd, opStack);
opStack.pop();
} else {
- getToWalk().removeAll(parents);
- getToWalk().add(0, nd);
- getToWalk().addAll(0, parents);
+ toWalk.removeAll(parents);
+ toWalk.add(0, nd);
+ toWalk.addAll(0, parents);
return;
}
// move all the children to the front of queue
List<? extends Node> children = nd.getChildren();
if (children != null) {
- getToWalk().removeAll(children);
- getToWalk().addAll(children);
+ toWalk.removeAll(children);
+ toWalk.addAll(children);
}
}
}