You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2015/09/09 09:08:48 UTC

[18/50] [abbrv] hive git commit: HIVE-11652: Avoid expensive call to removeAll in DefaultGraphWalker (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan/Hari Sankar Sivarama Subramaniyan)

HIVE-11652: Avoid expensive call to removeAll in DefaultGraphWalker (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan/Hari Sankar Sivarama Subramaniyan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/af91308e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/af91308e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/af91308e

Branch: refs/heads/beeline-cli
Commit: af91308e5b6573ea6dc793912bcc628a5a40c000
Parents: 22fa921
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Sat Aug 29 11:40:03 2015 +0200
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Sat Aug 29 11:42:59 2015 +0200

----------------------------------------------------------------------
 .../hadoop/hive/ql/lib/DefaultGraphWalker.java  | 80 ++++++++++++++------
 .../hadoop/hive/ql/lib/ForwardWalker.java       | 33 ++++----
 .../hadoop/hive/ql/optimizer/ColumnPruner.java  |  6 +-
 .../hive/ql/optimizer/ConstantPropagate.java    | 10 +--
 4 files changed, 79 insertions(+), 50 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/af91308e/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java b/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java
index 583c113..07d2734 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/lib/DefaultGraphWalker.java
@@ -22,7 +22,9 @@ import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.IdentityHashMap;
+import java.util.LinkedList;
 import java.util.List;
+import java.util.Queue;
 import java.util.Set;
 import java.util.Stack;
 
@@ -36,7 +38,21 @@ import org.apache.hadoop.hive.ql.parse.SemanticException;
  */
 public class DefaultGraphWalker implements GraphWalker {
 
-  protected Stack<Node> opStack;
+  /**
+   * opStack keeps the nodes that have been visited, but have not been
+   * dispatched yet
+   */
+  protected final Stack<Node> opStack;
+  /**
+   * opQueue keeps the nodes in the order that the were dispatched.
+   * Then it is used to go through the processed nodes and store
+   * the results that the dispatcher has produced (if any)
+   */
+  protected final Queue<Node> opQueue;
+  /**
+   * toWalk stores the starting nodes for the graph that needs to be
+   * traversed
+   */
   protected final List<Node> toWalk = new ArrayList<Node>();
   protected final IdentityHashMap<Node, Object> retMap = new  IdentityHashMap<Node, Object>();
   protected final Dispatcher dispatcher;
@@ -50,13 +66,7 @@ public class DefaultGraphWalker implements GraphWalker {
   public DefaultGraphWalker(Dispatcher disp) {
     dispatcher = disp;
     opStack = new Stack<Node>();
-  }
-
-  /**
-   * @return the toWalk
-   */
-  public List<Node> getToWalk() {
-    return toWalk;
+    opQueue = new LinkedList<Node>();
   }
 
   /**
@@ -108,10 +118,22 @@ public class DefaultGraphWalker implements GraphWalker {
     while (toWalk.size() > 0) {
       Node nd = toWalk.remove(0);
       walk(nd);
+      // Some walkers extending DefaultGraphWalker e.g. ForwardWalker
+      // do not use opQueue and rely uniquely in the toWalk structure,
+      // thus we store the results produced by the dispatcher here
+      // TODO: rewriting the logic of those walkers to use opQueue
       if (nodeOutput != null && getDispatchedList().contains(nd)) {
         nodeOutput.put(nd, retMap.get(nd));
       }
     }
+
+    // Store the results produced by the dispatcher
+    while (!opQueue.isEmpty()) {
+      Node node = opQueue.poll();
+      if (nodeOutput != null && getDispatchedList().contains(node)) {
+        nodeOutput.put(node, retMap.get(node));
+      }
+    }
   }
 
   /**
@@ -121,23 +143,33 @@ public class DefaultGraphWalker implements GraphWalker {
    *          current operator in the graph
    * @throws SemanticException
    */
-  public void walk(Node nd) throws SemanticException {
-    if (opStack.empty() || nd != opStack.peek()) {
-      opStack.push(nd);
-    }
+  public void walk(Node nd) throws SemanticException {    
+    // Push the node in the stack
+    opStack.push(nd);
+
+    // While there are still nodes to dispatch...
+    while (!opStack.empty()) {
+      Node node = opStack.peek();
 
-    if ((nd.getChildren() == null)
-        || getDispatchedList().containsAll(nd.getChildren())) {
-      // all children are done or no need to walk the children
-      if (!getDispatchedList().contains(nd)) {
-        dispatch(nd, opStack);
+      if (node.getChildren() == null ||
+              getDispatchedList().containsAll(node.getChildren())) {
+        // Dispatch current node
+        if (!getDispatchedList().contains(node)) {
+          dispatch(node, opStack);
+          opQueue.add(node);
+        }
+        opStack.pop();
+        continue;
       }
-      opStack.pop();
-      return;
-    }
-    // add children, self to the front of the queue in that order
-    getToWalk().add(0, nd);
-    getToWalk().removeAll(nd.getChildren());
-    getToWalk().addAll(0, nd.getChildren());
+
+      // Add a single child and restart the loop
+      for (Node childNode : node.getChildren()) {
+        if (!getDispatchedList().contains(childNode)) {
+          opStack.push(childNode);
+          break;
+        }
+      }
+    } // end while
   }
+
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/af91308e/ql/src/java/org/apache/hadoop/hive/ql/lib/ForwardWalker.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lib/ForwardWalker.java b/ql/src/java/org/apache/hadoop/hive/ql/lib/ForwardWalker.java
index a2db3b5..67b4700 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/lib/ForwardWalker.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/lib/ForwardWalker.java
@@ -19,20 +19,17 @@
 package org.apache.hadoop.hive.ql.lib;
 
 import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
-import org.apache.hadoop.hive.ql.lib.Dispatcher;
-import org.apache.hadoop.hive.ql.lib.Node;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 
 public class ForwardWalker extends DefaultGraphWalker {
 
   /**
-* Constructor.
-*
-* @param disp
-* dispatcher to call for each op encountered
-*/
+   * Constructor.
+   *
+   * @param disp
+   * dispatcher to call for each op encountered
+   */
   public ForwardWalker(Dispatcher disp) {
     super(disp);
   }
@@ -54,17 +51,17 @@ public class ForwardWalker extends DefaultGraphWalker {
   @SuppressWarnings("unchecked")
   protected void addAllParents(Node nd) {
     Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) nd;
-    getToWalk().removeAll(op.getParentOperators());
-    getToWalk().addAll(0, op.getParentOperators());
+    toWalk.removeAll(op.getParentOperators());
+    toWalk.addAll(0, op.getParentOperators());
   }
 
   /**
-* walk the current operator and its descendants.
-*
-* @param nd
-* current operator in the graph
-* @throws SemanticException
-*/
+   * walk the current operator and its descendants.
+   *
+   * @param nd
+   * current operator in the graph
+   * @throws SemanticException
+   */
   @Override
   public void walk(Node nd) throws SemanticException {
     if (opStack.empty() || nd != opStack.peek()) {
@@ -73,14 +70,14 @@ public class ForwardWalker extends DefaultGraphWalker {
     if (allParentsDispatched(nd)) {
       // all children are done or no need to walk the children
       if (!getDispatchedList().contains(nd)) {
-        getToWalk().addAll(nd.getChildren());
+        toWalk.addAll(nd.getChildren());
         dispatch(nd, opStack);
       }
       opStack.pop();
       return;
     }
     // add children, self to the front of the queue in that order
-    getToWalk().add(0, nd);
+    toWalk.add(0, nd);
     addAllParents(nd);
   }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/af91308e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java
index 9a45458..735b448 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java
@@ -174,10 +174,10 @@ public class ColumnPruner implements Transform {
         return;
       }
       // move all the children to the front of queue
-      getToWalk().removeAll(nd.getChildren());
-      getToWalk().addAll(0, nd.getChildren());
+      toWalk.removeAll(nd.getChildren());
+      toWalk.addAll(0, nd.getChildren());
       // add self to the end of the queue
-      getToWalk().add(nd);
+      toWalk.add(nd);
       opStack.pop();
     }
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/af91308e/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java
index dd53ced..b6f1f27 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java
@@ -151,17 +151,17 @@ public class ConstantPropagate implements Transform {
         dispatch(nd, opStack);
         opStack.pop();
       } else {
-        getToWalk().removeAll(parents);
-        getToWalk().add(0, nd);
-        getToWalk().addAll(0, parents);
+        toWalk.removeAll(parents);
+        toWalk.add(0, nd);
+        toWalk.addAll(0, parents);
         return;
       }
 
       // move all the children to the front of queue
       List<? extends Node> children = nd.getChildren();
       if (children != null) {
-        getToWalk().removeAll(children);
-        getToWalk().addAll(children);
+        toWalk.removeAll(children);
+        toWalk.addAll(children);
       }
     }
   }