You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by vg...@apache.org on 2019/09/26 18:44:53 UTC

[hive] branch master updated: HIVE-22079: Post order walker for iterating over expression tree (Vineet Garg, reviewed by Jesus Camacho Rodriguez)

This is an automated email from the ASF dual-hosted git repository.

vgarg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 4c8b374  HIVE-22079: Post order walker for iterating over expression tree (Vineet Garg, reviewed by Jesus Camacho Rodriguez)
4c8b374 is described below

commit 4c8b374f20499bd980a59c06943a1aaca5deab8f
Author: Vineet Garg <vg...@apache.org>
AuthorDate: Thu Sep 26 11:44:12 2019 -0700

    HIVE-22079: Post order walker for iterating over expression tree (Vineet Garg, reviewed by Jesus Camacho Rodriguez)
---
 .../hadoop/hive/ql/lib/ExpressionWalker.java       | 100 ++++++++++++---------
 ...onWalker.java => SubqueryExpressionWalker.java} |  45 +---------
 .../hive/ql/optimizer/pcr/PcrExprProcFactory.java  |   4 +-
 .../hadoop/hive/ql/parse/TypeCheckProcFactory.java |   4 +-
 .../hadoop/hive/ql/ppd/ExprWalkerProcFactory.java  |   4 +-
 5 files changed, 65 insertions(+), 92 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lib/ExpressionWalker.java b/ql/src/java/org/apache/hadoop/hive/ql/lib/ExpressionWalker.java
index 41607ae..7b19314 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/lib/ExpressionWalker.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/lib/ExpressionWalker.java
@@ -18,11 +18,17 @@
 
 package org.apache.hadoop.hive.ql.lib;
 
-import org.apache.hadoop.hive.ql.parse.ASTNode;
-import org.apache.hadoop.hive.ql.parse.HiveParser;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 
-public class ExpressionWalker extends DefaultGraphWalker {
+import java.util.ArrayDeque;
+import java.util.Deque;
+
+/**
+ * class for traversing tree
+ * This class assumes that the given node represents TREE and not GRAPH
+ * i.e. there is only single path to reach a node
+ */
+public class ExpressionWalker extends DefaultGraphWalker{
 
   /**
    * Constructor.
@@ -35,63 +41,69 @@ public class ExpressionWalker extends DefaultGraphWalker {
   }
 
 
-  /**
-   * We should bypass subquery since we have already processed and created logical plan
-   * (in genLogicalPlan) for subquery at this point.
-   * SubQueryExprProcessor will use generated plan and creates appropriate ExprNodeSubQueryDesc.
-   */
-  private boolean shouldByPass(Node childNode, Node parentNode) {
-    if(parentNode instanceof ASTNode
-            && ((ASTNode)parentNode).getType() == HiveParser.TOK_SUBQUERY_EXPR) {
-      ASTNode parentOp = (ASTNode)parentNode;
-      //subquery either in WHERE <LHS> IN <SUBQUERY> form OR WHERE EXISTS <SUBQUERY> form
-      //in first case LHS should not be bypassed
-      assert(parentOp.getChildCount() == 2 || parentOp.getChildCount()==3);
-      if(parentOp.getChildCount() == 3 && (ASTNode)childNode == parentOp.getChild(2)) {
-        return false;
-      }
-      return true;
+  private static class NodeLabeled {
+    final private Node nd;
+    private int currChildIdx;
+
+    NodeLabeled(Node nd) {
+      this.nd = nd;
+      this.currChildIdx = -1;
+    }
+
+    public void incrementChildIdx() {
+      this.currChildIdx++;
+    }
+
+    public int getCurrChildIdx() {
+      return  this.currChildIdx;
     }
+
+    public Node getNd() {
+      return this.nd;
+    }
+  }
+
+  protected boolean shouldByPass(Node childNode, Node parentNode) {
     return false;
   }
+
   /**
    * walk the current operator and its descendants.
    *
    * @param nd
-   *          current operator in the graph
+   *          current operator in the tree
    * @throws SemanticException
    */
   protected void walk(Node nd) throws SemanticException {
-    // Push the node in the stack
+    Deque<NodeLabeled> traversalStack = new ArrayDeque<>();
+    traversalStack.push(new NodeLabeled(nd));
+
     opStack.push(nd);
 
-    // While there are still nodes to dispatch...
-    while (!opStack.empty()) {
-      Node node = opStack.peek();
+    while(!traversalStack.isEmpty()) {
+      NodeLabeled currLabeledNode = traversalStack.peek();
+      Node currNode = currLabeledNode.getNd();
+      int currIdx = currLabeledNode.getCurrChildIdx();
 
-      if (node.getChildren() == null ||
-              getDispatchedList().containsAll(node.getChildren())) {
-        // Dispatch current node
-        if (!getDispatchedList().contains(node)) {
-          dispatch(node, opStack);
-          opQueue.add(node);
+      if(currNode.getChildren() != null && currNode.getChildren().size() > currIdx + 1) {
+        Node nextChild = currNode.getChildren().get(currIdx+1);
+        //check if this node should be skipped and not dispatched
+        if(shouldByPass(nextChild, currNode)) {
+          retMap.put(nextChild, null);
+          currLabeledNode.incrementChildIdx();
+          continue;
         }
+        traversalStack.push(new NodeLabeled(nextChild));
+        opStack.push(nextChild);
+        currLabeledNode.incrementChildIdx();
+      } else {
+        // dispatch the node
+        dispatch(currNode, opStack);
+        opQueue.add(currNode);
         opStack.pop();
-        continue;
+        traversalStack.pop();
       }
-
-      // Add a single child and restart the loop
-      for (Node childNode : node.getChildren()) {
-        if (!getDispatchedList().contains(childNode)) {
-          if(shouldByPass(childNode, node)) {
-            retMap.put(childNode, null);
-          } else {
-            opStack.push(childNode);
-          }
-          break;
-        }
-      }
-    } // end while
+    }
   }
 }
 
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lib/ExpressionWalker.java b/ql/src/java/org/apache/hadoop/hive/ql/lib/SubqueryExpressionWalker.java
similarity index 58%
copy from ql/src/java/org/apache/hadoop/hive/ql/lib/ExpressionWalker.java
copy to ql/src/java/org/apache/hadoop/hive/ql/lib/SubqueryExpressionWalker.java
index 41607ae..75f09e4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/lib/ExpressionWalker.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/lib/SubqueryExpressionWalker.java
@@ -20,9 +20,8 @@ package org.apache.hadoop.hive.ql.lib;
 
 import org.apache.hadoop.hive.ql.parse.ASTNode;
 import org.apache.hadoop.hive.ql.parse.HiveParser;
-import org.apache.hadoop.hive.ql.parse.SemanticException;
 
-public class ExpressionWalker extends DefaultGraphWalker {
+public class SubqueryExpressionWalker extends ExpressionWalker{
 
   /**
    * Constructor.
@@ -30,7 +29,7 @@ public class ExpressionWalker extends DefaultGraphWalker {
    * @param disp
    * dispatcher to call for each op encountered
    */
-  public ExpressionWalker(Dispatcher disp) {
+  public SubqueryExpressionWalker(Dispatcher disp) {
     super(disp);
   }
 
@@ -40,7 +39,7 @@ public class ExpressionWalker extends DefaultGraphWalker {
    * (in genLogicalPlan) for subquery at this point.
    * SubQueryExprProcessor will use generated plan and creates appropriate ExprNodeSubQueryDesc.
    */
-  private boolean shouldByPass(Node childNode, Node parentNode) {
+  protected boolean shouldByPass(Node childNode, Node parentNode) {
     if(parentNode instanceof ASTNode
             && ((ASTNode)parentNode).getType() == HiveParser.TOK_SUBQUERY_EXPR) {
       ASTNode parentOp = (ASTNode)parentNode;
@@ -54,44 +53,6 @@ public class ExpressionWalker extends DefaultGraphWalker {
     }
     return false;
   }
-  /**
-   * walk the current operator and its descendants.
-   *
-   * @param nd
-   *          current operator in the graph
-   * @throws SemanticException
-   */
-  protected void walk(Node nd) throws SemanticException {
-    // Push the node in the stack
-    opStack.push(nd);
-
-    // While there are still nodes to dispatch...
-    while (!opStack.empty()) {
-      Node node = opStack.peek();
 
-      if (node.getChildren() == null ||
-              getDispatchedList().containsAll(node.getChildren())) {
-        // Dispatch current node
-        if (!getDispatchedList().contains(node)) {
-          dispatch(node, opStack);
-          opQueue.add(node);
-        }
-        opStack.pop();
-        continue;
-      }
-
-      // Add a single child and restart the loop
-      for (Node childNode : node.getChildren()) {
-        if (!getDispatchedList().contains(childNode)) {
-          if(shouldByPass(childNode, node)) {
-            retMap.put(childNode, null);
-          } else {
-            opStack.push(childNode);
-          }
-          break;
-        }
-      }
-    } // end while
-  }
 }
 
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java
index f612cd2..a689ab5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java
@@ -28,9 +28,9 @@ import java.util.Map;
 import java.util.Stack;
 
 import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
-import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
 import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
 import org.apache.hadoop.hive.ql.lib.Dispatcher;
+import org.apache.hadoop.hive.ql.lib.ExpressionWalker;
 import org.apache.hadoop.hive.ql.lib.GraphWalker;
 import org.apache.hadoop.hive.ql.lib.Node;
 import org.apache.hadoop.hive.ql.lib.NodeProcessor;
@@ -588,7 +588,7 @@ public final class PcrExprProcFactory {
     // rule and passes the context along
     Dispatcher disp = new DefaultRuleDispatcher(getDefaultExprProcessor(),
         exprRules, pprCtx);
-    GraphWalker egw = new DefaultGraphWalker(disp);
+    GraphWalker egw = new ExpressionWalker(disp);
 
     List<Node> startNodes = new ArrayList<Node>();
     startNodes.add(pred);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
index 86bb537..e45276f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
@@ -49,7 +49,7 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
 import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
 import org.apache.hadoop.hive.ql.lib.Dispatcher;
-import org.apache.hadoop.hive.ql.lib.ExpressionWalker;
+import org.apache.hadoop.hive.ql.lib.SubqueryExpressionWalker;
 import org.apache.hadoop.hive.ql.lib.GraphWalker;
 import org.apache.hadoop.hive.ql.lib.Node;
 import org.apache.hadoop.hive.ql.lib.NodeProcessor;
@@ -236,7 +236,7 @@ public class TypeCheckProcFactory {
     // rule and passes the context along
     Dispatcher disp = new DefaultRuleDispatcher(tf.getDefaultExprProcessor(),
         opRules, tcCtx);
-    GraphWalker ogw = new ExpressionWalker(disp);
+    GraphWalker ogw = new SubqueryExpressionWalker(disp);
 
     // Create a list of top nodes
     ArrayList<Node> topNodes = Lists.<Node>newArrayList(expr);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java
index 1c662d7..f8b4ace 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java
@@ -29,9 +29,9 @@ import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
 import org.apache.hadoop.hive.ql.exec.GroupByOperator;
 import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.exec.RowSchema;
-import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
 import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
 import org.apache.hadoop.hive.ql.lib.Dispatcher;
+import org.apache.hadoop.hive.ql.lib.ExpressionWalker;
 import org.apache.hadoop.hive.ql.lib.GraphWalker;
 import org.apache.hadoop.hive.ql.lib.Node;
 import org.apache.hadoop.hive.ql.lib.NodeProcessor;
@@ -346,7 +346,7 @@ public final class ExprWalkerProcFactory {
     // rule and passes the context along
     Dispatcher disp = new DefaultRuleDispatcher(getDefaultExprProcessor(),
         exprRules, exprContext);
-    GraphWalker egw = new DefaultGraphWalker(disp);
+    GraphWalker egw = new ExpressionWalker(disp);
 
     List<Node> startNodes = new ArrayList<Node>();
     List<ExprNodeDesc> clonedPreds = new ArrayList<ExprNodeDesc>();