You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2012/10/05 06:13:07 UTC

svn commit: r1394358 - in /hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer: PrunerExpressionOperatorFactory.java PrunerOperatorFactory.java PrunerUtils.java ppr/ExprProcFactory.java ppr/OpProcFactory.java ppr/PartitionPruner.java

Author: namit
Date: Fri Oct  5 04:13:06 2012
New Revision: 1394358

URL: http://svn.apache.org/viewvc?rev=1394358&view=rev
Log:
HIVE-3514 Refactor Partition Pruner so that logic can be reused.
(Gang Tim Liu via namit)


Added:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerExpressionOperatorFactory.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerOperatorFactory.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerUtils.java
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/ExprProcFactory.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerExpressionOperatorFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerExpressionOperatorFactory.java?rev=1394358&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerExpressionOperatorFactory.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerExpressionOperatorFactory.java Fri Oct  5 04:13:06 2012
@@ -0,0 +1,220 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer;
+
+import java.util.ArrayList;
+import java.util.Stack;
+
+import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
+import org.apache.hadoop.hive.ql.lib.Node;
+import org.apache.hadoop.hive.ql.lib.NodeProcessor;
+import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc;
+
+/**
+ * Expression processor factory for pruning. Each processor tries to
+ * convert the expression subtree into a pruning expression.
+ *
+ * It can be used for partition prunner and list bucketing pruner.
+ */
+public abstract class PrunerExpressionOperatorFactory {
+
+  /**
+   * If all children are candidates and refer only to one table alias then this
+   * expr is a candidate else it is not a candidate but its children could be
+   * final candidates.
+   */
+  public static class GenericFuncExprProcessor implements NodeProcessor {
+
+    @Override
+    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+        Object... nodeOutputs) throws SemanticException {
+
+      ExprNodeDesc newfd = null;
+      ExprNodeGenericFuncDesc fd = (ExprNodeGenericFuncDesc) nd;
+
+      boolean unknown = false;
+
+      if (FunctionRegistry.isOpAndOrNot(fd)) {
+        // do nothing because "And" and "Or" and "Not" supports null value
+        // evaluation
+        // NOTE: In the future all UDFs that treats null value as UNKNOWN (both
+        // in parameters and return
+        // values) should derive from a common base class UDFNullAsUnknown, so
+        // instead of listing the classes
+        // here we would test whether a class is derived from that base class.
+        // If All childs are null, set unknown to true
+        boolean isAllNull = true;
+        for (Object child : nodeOutputs) {
+          ExprNodeDesc child_nd = (ExprNodeDesc) child;
+          if (!(child_nd instanceof ExprNodeConstantDesc
+              && ((ExprNodeConstantDesc) child_nd).getValue() == null)) {
+            isAllNull = false;
+          }
+        }
+        unknown = isAllNull;
+      } else if (!FunctionRegistry.isDeterministic(fd.getGenericUDF())) {
+        // If it's a non-deterministic UDF, set unknown to true
+        unknown = true;
+      } else {
+        // If any child is null, set unknown to true
+        for (Object child : nodeOutputs) {
+          ExprNodeDesc child_nd = (ExprNodeDesc) child;
+          if (child_nd instanceof ExprNodeConstantDesc
+              && ((ExprNodeConstantDesc) child_nd).getValue() == null) {
+            unknown = true;
+          }
+        }
+      }
+
+      if (unknown) {
+        newfd = new ExprNodeConstantDesc(fd.getTypeInfo(), null);
+      } else {
+        // Create the list of children
+        ArrayList<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
+        for (Object child : nodeOutputs) {
+          children.add((ExprNodeDesc) child);
+        }
+        // Create a copy of the function descriptor
+        newfd = new ExprNodeGenericFuncDesc(fd.getTypeInfo(), fd.getGenericUDF(), children);
+      }
+
+      return newfd;
+    }
+
+  }
+
+  /**
+   * FieldExprProcessor.
+   *
+   */
+  public static class FieldExprProcessor implements NodeProcessor {
+
+    @Override
+    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+        Object... nodeOutputs) throws SemanticException {
+
+      ExprNodeFieldDesc fnd = (ExprNodeFieldDesc) nd;
+      boolean unknown = false;
+      int idx = 0;
+      ExprNodeDesc left_nd = null;
+      for (Object child : nodeOutputs) {
+        ExprNodeDesc child_nd = (ExprNodeDesc) child;
+        if (child_nd instanceof ExprNodeConstantDesc
+            && ((ExprNodeConstantDesc) child_nd).getValue() == null) {
+          unknown = true;
+        }
+        left_nd = child_nd;
+      }
+
+      assert (idx == 0);
+
+      ExprNodeDesc newnd = null;
+      if (unknown) {
+        newnd = new ExprNodeConstantDesc(fnd.getTypeInfo(), null);
+      } else {
+        newnd = new ExprNodeFieldDesc(fnd.getTypeInfo(), left_nd, fnd.getFieldName(),
+            fnd.getIsList());
+      }
+      return newnd;
+    }
+
+  }
+
+
+  /**
+   * Processor for column expressions.
+   */
+  public static abstract class ColumnExprProcessor implements NodeProcessor {
+
+    @Override
+    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+        Object... nodeOutputs) throws SemanticException {
+
+      ExprNodeDesc newcd = null;
+      ExprNodeColumnDesc cd = (ExprNodeColumnDesc) nd;
+      newcd = processColumnDesc(procCtx, cd);
+
+      return newcd;
+    }
+
+    /**
+     * Process column desc. It should be done by subclass.
+     *
+     * @param procCtx
+     * @param cd
+     * @return
+     */
+    protected abstract ExprNodeDesc processColumnDesc(NodeProcessorCtx procCtx,
+        ExprNodeColumnDesc cd);
+
+  }
+
+  /**
+   * Processor for constants and null expressions. For such expressions the
+   * processor simply clones the exprNodeDesc and returns it.
+   */
+  public static class DefaultExprProcessor implements NodeProcessor {
+
+    @Override
+    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+        Object... nodeOutputs) throws SemanticException {
+      if (nd instanceof ExprNodeConstantDesc) {
+        return ((ExprNodeConstantDesc) nd).clone();
+      } else if (nd instanceof ExprNodeNullDesc) {
+        return ((ExprNodeNullDesc) nd).clone();
+      }
+
+      assert (false);
+      return null;
+    }
+  }
+
+  /**
+   * Instantiate default expression processor.
+   * @return
+   */
+  public static final NodeProcessor getDefaultExprProcessor() {
+    return new DefaultExprProcessor();
+  }
+
+  /**
+   * Instantiate generic function processor.
+   *
+   * @return
+   */
+  public static final NodeProcessor getGenericFuncProcessor() {
+    return new GenericFuncExprProcessor();
+  }
+
+  /**
+   * Instantiate field processor.
+   *
+   * @return
+   */
+  public static final NodeProcessor getFieldProcessor() {
+    return new FieldExprProcessor();
+  }
+
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerOperatorFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerOperatorFactory.java?rev=1394358&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerOperatorFactory.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerOperatorFactory.java Fri Oct  5 04:13:06 2012
@@ -0,0 +1,155 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer;
+
+import java.util.Map;
+import java.util.Stack;
+
+import org.apache.hadoop.hive.ql.exec.FilterOperator;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.lib.Node;
+import org.apache.hadoop.hive.ql.lib.NodeProcessor;
+import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+
+/**
+ * Operator factory for pruning processing of operator graph We find
+ * all the filter operators that appear just beneath the table scan operators.
+ * We then pass the filter to the pruner to construct a pruner for
+ * that table alias and store a mapping from the table scan operator to that
+ * pruner. We call that pruner later during plan generation.
+ *
+ * Create this class from org.apache.hadoop.hive.ql.optimizer.ppr.OpProcFactory
+ * so that in addition to ppr, other pruner like list bucketin pruner can use it.
+ */
+public abstract class PrunerOperatorFactory {
+
+  /**
+   * Determines the partition pruner for the filter. This is called only when
+   * the filter follows a table scan operator.
+   */
+  public static abstract class FilterPruner implements NodeProcessor {
+
+    @Override
+    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+        Object... nodeOutputs) throws SemanticException {
+      FilterOperator fop = (FilterOperator) nd;
+      FilterOperator fop2 = null;
+
+      // The stack contains either ... TS, Filter or
+      // ... TS, Filter, Filter with the head of the stack being the rightmost
+      // symbol. So we just pop out the two elements from the top and if the
+      // second one of them is not a table scan then the operator on the top of
+      // the stack is the Table scan operator.
+      Node tmp = stack.pop();
+      Node tmp2 = stack.pop();
+      TableScanOperator top = null;
+      if (tmp2 instanceof TableScanOperator) {
+        top = (TableScanOperator) tmp2;
+      } else {
+        top = (TableScanOperator) stack.peek();
+        fop2 = (FilterOperator) tmp2;
+      }
+      stack.push(tmp2);
+      stack.push(tmp);
+
+      // If fop2 exists (i.e this is not the top level filter and fop2 is not
+      // a sampling filter then we ignore the current filter
+      if (fop2 != null && !fop2.getConf().getIsSamplingPred()) {
+        return null;
+      }
+
+      // ignore the predicate in case it is not a sampling predicate
+      if (fop.getConf().getIsSamplingPred()) {
+        return null;
+      }
+
+      generatePredicate(procCtx, fop, top);
+
+      return null;
+    }
+
+    /**
+     * Generate predicate.
+     *
+     * Subclass should implement the function. Please refer to {@link OpProcFactory.FilterPPR}
+     *
+     * @param procCtx
+     * @param fop
+     * @param top
+     * @throws SemanticException
+     * @throws UDFArgumentException
+     */
+    protected abstract void generatePredicate(NodeProcessorCtx procCtx, FilterOperator fop,
+        TableScanOperator top) throws SemanticException, UDFArgumentException;
+
+    /**
+     * Add pruning predicate.
+     *
+     * @param opToPrunner
+     * @param top
+     * @param new_pruner_pred
+     * @throws UDFArgumentException
+     */
+    protected void addPruningPred(Map<TableScanOperator, ExprNodeDesc> opToPrunner,
+        TableScanOperator top, ExprNodeDesc new_pruner_pred) throws UDFArgumentException {
+      ExprNodeDesc old_pruner_pred = opToPrunner.get(top);
+      ExprNodeDesc pruner_pred = null;
+      if (old_pruner_pred != null) {
+        // or the old_pruner_pred and the new_ppr_pred
+        pruner_pred = TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc("OR",
+            old_pruner_pred, new_pruner_pred);
+      } else {
+        pruner_pred = new_pruner_pred;
+      }
+
+      // Put the mapping from table scan operator to pruner_pred
+      opToPrunner.put(top, pruner_pred);
+
+      return;
+    }
+  }
+
+  /**
+   * Default processor which just merges its children.
+   */
+  public static class DefaultPruner implements NodeProcessor {
+
+    @Override
+    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+        Object... nodeOutputs) throws SemanticException {
+      // Nothing needs to be done.
+      return null;
+    }
+  }
+
+  /**
+   * Instantiate default processor.
+   *
+   * It's not supposed to be overwritten.
+   *
+   * @return
+   */
+  public final static NodeProcessor getDefaultProc() {
+    return new DefaultPruner();
+  }
+
+}

Added: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerUtils.java?rev=1394358&view=auto
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerUtils.java (added)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PrunerUtils.java Fri Oct  5 04:13:06 2012
@@ -0,0 +1,132 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.exec.FilterOperator;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
+import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
+import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
+import org.apache.hadoop.hive.ql.lib.Dispatcher;
+import org.apache.hadoop.hive.ql.lib.GraphWalker;
+import org.apache.hadoop.hive.ql.lib.Node;
+import org.apache.hadoop.hive.ql.lib.NodeProcessor;
+import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
+import org.apache.hadoop.hive.ql.lib.Rule;
+import org.apache.hadoop.hive.ql.lib.RuleRegExp;
+import org.apache.hadoop.hive.ql.parse.ParseContext;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+
+/**
+ * General utility common functions for the Pruner to do optimization.
+ *
+ */
+public final class PrunerUtils {
+  private static Log LOG;
+
+  static {
+    LOG = LogFactory.getLog("org.apache.hadoop.hive.ql.optimizer.PrunerUtils");
+  }
+
+  private PrunerUtils() {
+    //prevent instantiation
+  }
+
+  /**
+   * Walk operator tree for pruner generation.
+   *
+   * @param pctx
+   * @param opWalkerCtx
+   * @param filterProc
+   * @param defaultProc
+   * @throws SemanticException
+   */
+  public static void walkOperatorTree(ParseContext pctx, NodeProcessorCtx opWalkerCtx,
+      NodeProcessor filterProc, NodeProcessor defaultProc) throws SemanticException {
+    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
+
+    // Build regular expression for operator rule.
+    // "(TS%FIL%)|(TS%FIL%FIL%)"
+    String tsOprName = TableScanOperator.getOperatorName();
+    String filtOprName = FilterOperator.getOperatorName();
+
+    opRules.put(new RuleRegExp("R1", new StringBuilder().append("(").append(tsOprName).append("%")
+        .append(filtOprName).append("%)|(").append(tsOprName).append("%").append(filtOprName)
+        .append("%").append(filtOprName).append("%)").toString()), filterProc);
+
+    // The dispatcher fires the processor corresponding to the closest matching
+    // rule and passes the context along
+    Dispatcher disp = new DefaultRuleDispatcher(defaultProc, opRules, opWalkerCtx);
+    GraphWalker ogw = new DefaultGraphWalker(disp);
+
+    // Create a list of topop nodes
+    ArrayList<Node> topNodes = new ArrayList<Node>();
+    topNodes.addAll(pctx.getTopOps().values());
+    ogw.startWalking(topNodes, null);
+  }
+
+  /**
+   * Walk expression tree for pruner generation.
+   *
+   * @param pred
+   * @param ctx
+   * @param colProc
+   * @param fieldProc
+   * @param genFuncProc
+   * @param defProc
+   * @return
+   * @throws SemanticException
+   */
+  public static Map<Node, Object> walkExprTree(ExprNodeDesc pred, NodeProcessorCtx ctx,
+      NodeProcessor colProc, NodeProcessor fieldProc, NodeProcessor genFuncProc,
+      NodeProcessor defProc)
+      throws SemanticException {
+    // create a walker which walks the tree in a DFS manner while maintaining
+    // the operator stack. The dispatcher
+    // generates the plan from the operator tree
+    Map<Rule, NodeProcessor> exprRules = new LinkedHashMap<Rule, NodeProcessor>();
+    exprRules.put(new RuleRegExp("R1", ExprNodeColumnDesc.class.getName() + "%"), colProc);
+    exprRules.put(new RuleRegExp("R2", ExprNodeFieldDesc.class.getName() + "%"), fieldProc);
+    exprRules.put(new RuleRegExp("R5", ExprNodeGenericFuncDesc.class.getName() + "%"),
+        genFuncProc);
+
+    // The dispatcher fires the processor corresponding to the closest matching
+    // rule and passes the context along
+    Dispatcher disp = new DefaultRuleDispatcher(defProc, exprRules, ctx);
+    GraphWalker egw = new DefaultGraphWalker(disp);
+
+    List<Node> startNodes = new ArrayList<Node>();
+    startNodes.add(pred);
+
+    HashMap<Node, Object> outputMap = new HashMap<Node, Object>();
+    egw.startWalking(startNodes, outputMap);
+    return outputMap;
+  }
+
+}

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/ExprProcFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/ExprProcFactory.java?rev=1394358&r1=1394357&r2=1394358&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/ExprProcFactory.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/ExprProcFactory.java Fri Oct  5 04:13:06 2012
@@ -18,54 +18,46 @@
 
 package org.apache.hadoop.hive.ql.optimizer.ppr;
 
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.LinkedHashMap;
-import java.util.List;
 import java.util.Map;
-import java.util.Stack;
 
-import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
-import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
-import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
-import org.apache.hadoop.hive.ql.lib.Dispatcher;
-import org.apache.hadoop.hive.ql.lib.GraphWalker;
 import org.apache.hadoop.hive.ql.lib.Node;
 import org.apache.hadoop.hive.ql.lib.NodeProcessor;
 import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
-import org.apache.hadoop.hive.ql.lib.Rule;
-import org.apache.hadoop.hive.ql.lib.RuleRegExp;
+import org.apache.hadoop.hive.ql.optimizer.PrunerExpressionOperatorFactory;
+import org.apache.hadoop.hive.ql.optimizer.PrunerUtils;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc;
 
 /**
  * Expression processor factory for partition pruning. Each processor tries to
  * convert the expression subtree into a partition pruning expression. This
  * expression is then used to figure out whether a particular partition should
  * be scanned or not.
+ *
+ *  * Refactor:
+ * Move main logic to PrunerExpressionOperatorFactory. ExprProcFactory extends it to reuse logic.
+ *
+ * Any other pruner can reuse it by creating a class extending from PrunerExpressionOperatorFactory.
+ *
+ * Only specific logic is in genPruner(..) which is in its own class like ExprProcFactory.
+ *
  */
-public final class ExprProcFactory {
+public final class ExprProcFactory extends PrunerExpressionOperatorFactory {
 
   private ExprProcFactory() {
     // prevent instantiation
   }
 
   /**
-   * Processor for column expressions.
+   * Processor for ppr column expressions.
    */
-  public static class ColumnExprProcessor implements NodeProcessor {
+  public static class PPRColumnExprProcessor extends ColumnExprProcessor {
 
     @Override
-    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
-        Object... nodeOutputs) throws SemanticException {
-
-      ExprNodeDesc newcd = null;
-      ExprNodeColumnDesc cd = (ExprNodeColumnDesc) nd;
+    protected ExprNodeDesc processColumnDesc(NodeProcessorCtx procCtx, ExprNodeColumnDesc cd) {
+      ExprNodeDesc newcd;
       ExprProcCtx epc = (ExprProcCtx) procCtx;
       if (cd.getTabAlias().equalsIgnoreCase(epc.getTabAlias())
           && cd.getIsPartitionColOrVirtualCol()) {
@@ -74,154 +66,22 @@ public final class ExprProcFactory {
         newcd = new ExprNodeConstantDesc(cd.getTypeInfo(), null);
         epc.setHasNonPartCols(true);
       }
-
       return newcd;
     }
-
   }
 
   /**
-   * If all children are candidates and refer only to one table alias then this
-   * expr is a candidate else it is not a candidate but its children could be
-   * final candidates.
-   */
-  public static class GenericFuncExprProcessor implements NodeProcessor {
-
-    @Override
-    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
-        Object... nodeOutputs) throws SemanticException {
-
-      ExprNodeDesc newfd = null;
-      ExprNodeGenericFuncDesc fd = (ExprNodeGenericFuncDesc) nd;
-
-      boolean unknown = false;
-
-      if (FunctionRegistry.isOpAndOrNot(fd)) {
-        // do nothing because "And" and "Or" and "Not" supports null value
-        // evaluation
-        // NOTE: In the future all UDFs that treats null value as UNKNOWN (both
-        // in parameters and return
-        // values) should derive from a common base class UDFNullAsUnknown, so
-        // instead of listing the classes
-        // here we would test whether a class is derived from that base class.
-        // If All childs are null, set unknown to true
-        boolean isAllNull = true;
-        for (Object child : nodeOutputs) {
-          ExprNodeDesc child_nd = (ExprNodeDesc) child;
-          if (!(child_nd instanceof ExprNodeConstantDesc
-              && ((ExprNodeConstantDesc) child_nd).getValue() == null)) {
-            isAllNull = false;
-          }
-        }
-        unknown = isAllNull;
-      } else if (!FunctionRegistry.isDeterministic(fd.getGenericUDF())) {
-        // If it's a non-deterministic UDF, set unknown to true
-        unknown = true;
-      } else {
-        // If any child is null, set unknown to true
-        for (Object child : nodeOutputs) {
-          ExprNodeDesc child_nd = (ExprNodeDesc) child;
-          if (child_nd instanceof ExprNodeConstantDesc
-              && ((ExprNodeConstantDesc) child_nd).getValue() == null) {
-            unknown = true;
-          }
-        }
-      }
-
-      if (unknown) {
-        newfd = new ExprNodeConstantDesc(fd.getTypeInfo(), null);
-      } else {
-        // Create the list of children
-        ArrayList<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
-        for (Object child : nodeOutputs) {
-          children.add((ExprNodeDesc) child);
-        }
-        // Create a copy of the function descriptor
-        newfd = new ExprNodeGenericFuncDesc(fd.getTypeInfo(), fd
-            .getGenericUDF(), children);
-      }
-
-      return newfd;
-    }
-
-  }
-
-  /**
-   * FieldExprProcessor.
+   * Instantiate column processor.
    *
+   * @return
    */
-  public static class FieldExprProcessor implements NodeProcessor {
-
-    @Override
-    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
-        Object... nodeOutputs) throws SemanticException {
-
-      ExprNodeFieldDesc fnd = (ExprNodeFieldDesc) nd;
-      boolean unknown = false;
-      int idx = 0;
-      ExprNodeDesc left_nd = null;
-      for (Object child : nodeOutputs) {
-        ExprNodeDesc child_nd = (ExprNodeDesc) child;
-        if (child_nd instanceof ExprNodeConstantDesc
-            && ((ExprNodeConstantDesc) child_nd).getValue() == null) {
-          unknown = true;
-        }
-        left_nd = child_nd;
-      }
-
-      assert (idx == 0);
-
-      ExprNodeDesc newnd = null;
-      if (unknown) {
-        newnd = new ExprNodeConstantDesc(fnd.getTypeInfo(), null);
-      } else {
-        newnd = new ExprNodeFieldDesc(fnd.getTypeInfo(), left_nd, fnd
-            .getFieldName(), fnd.getIsList());
-      }
-      return newnd;
-    }
-
-  }
-
-  /**
-   * Processor for constants and null expressions. For such expressions the
-   * processor simply clones the exprNodeDesc and returns it.
-   */
-  public static class DefaultExprProcessor implements NodeProcessor {
-
-    @Override
-    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
-        Object... nodeOutputs) throws SemanticException {
-      if (nd instanceof ExprNodeConstantDesc) {
-        return ((ExprNodeConstantDesc) nd).clone();
-      } else if (nd instanceof ExprNodeNullDesc) {
-        return ((ExprNodeNullDesc) nd).clone();
-      }
-
-      assert (false);
-      return null;
-    }
-  }
-
-  public static NodeProcessor getDefaultExprProcessor() {
-    return new DefaultExprProcessor();
-  }
-
-  public static NodeProcessor getGenericFuncProcessor() {
-    return new GenericFuncExprProcessor();
-  }
-
-  public static NodeProcessor getFieldProcessor() {
-    return new FieldExprProcessor();
-  }
-
   public static NodeProcessor getColumnProcessor() {
-    return new ColumnExprProcessor();
+    return new PPRColumnExprProcessor();
   }
 
   /**
    * Generates the partition pruner for the expression tree.
-   * 
+   *
    * @param tabAlias
    *          The table alias of the partition table that is being considered
    *          for pruning
@@ -237,30 +97,10 @@ public final class ExprProcFactory {
     // Create the walker, the rules dispatcher and the context.
     ExprProcCtx pprCtx = new ExprProcCtx(tabAlias);
 
-    // create a walker which walks the tree in a DFS manner while maintaining
-    // the operator stack. The dispatcher
-    // generates the plan from the operator tree
-    Map<Rule, NodeProcessor> exprRules = new LinkedHashMap<Rule, NodeProcessor>();
-    exprRules.put(
-        new RuleRegExp("R1", ExprNodeColumnDesc.class.getName() + "%"),
-        getColumnProcessor());
-    exprRules.put(
-        new RuleRegExp("R2", ExprNodeFieldDesc.class.getName() + "%"),
-        getFieldProcessor());
-    exprRules.put(new RuleRegExp("R5", ExprNodeGenericFuncDesc.class.getName()
-        + "%"), getGenericFuncProcessor());
-
-    // The dispatcher fires the processor corresponding to the closest matching
-    // rule and passes the context along
-    Dispatcher disp = new DefaultRuleDispatcher(getDefaultExprProcessor(),
-        exprRules, pprCtx);
-    GraphWalker egw = new DefaultGraphWalker(disp);
-
-    List<Node> startNodes = new ArrayList<Node>();
-    startNodes.add(pred);
+    /* Move common logic to PrunerUtils.walkExprTree(...) so that it can be reused. */
+    Map<Node, Object> outputMap = PrunerUtils.walkExprTree(pred, pprCtx, getColumnProcessor(),
+        getFieldProcessor(), getGenericFuncProcessor(), getDefaultExprProcessor());
 
-    HashMap<Node, Object> outputMap = new HashMap<Node, Object>();
-    egw.startWalking(startNodes, outputMap);
     hasNonPartCols = pprCtx.getHasNonPartCols();
 
     // Get the exprNodeDesc corresponding to the first start node;

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java?rev=1394358&r1=1394357&r2=1394358&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/OpProcFactory.java Fri Oct  5 04:13:06 2012
@@ -18,17 +18,13 @@
 
 package org.apache.hadoop.hive.ql.optimizer.ppr;
 
-import java.util.Map;
-import java.util.Stack;
-
 import org.apache.hadoop.hive.ql.exec.FilterOperator;
 import org.apache.hadoop.hive.ql.exec.TableScanOperator;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
-import org.apache.hadoop.hive.ql.lib.Node;
 import org.apache.hadoop.hive.ql.lib.NodeProcessor;
 import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
+import org.apache.hadoop.hive.ql.optimizer.PrunerOperatorFactory;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
-import org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 
 /**
@@ -37,50 +33,27 @@ import org.apache.hadoop.hive.ql.plan.Ex
  * We then pass the filter to the partition pruner to construct a pruner for
  * that table alias and store a mapping from the table scan operator to that
  * pruner. We call that pruner later during plan generation.
+ *
+ *
+ * Refactor:
+ * Move main logic to PrunerOperatorFactory. OpProcFactory extends it to reuse logic.
+ *
+ * Any other pruner can reuse it by creating a class extending from PrunerOperatorFactory.
+ *
+ * Only specific logic is in generatePredicate(..) which is in its own class like OpProcFactory.
  */
-public final class OpProcFactory {
+public final class OpProcFactory extends PrunerOperatorFactory {
 
   /**
    * Determines the partition pruner for the filter. This is called only when
    * the filter follows a table scan operator.
    */
-  public static class FilterPPR implements NodeProcessor {
+  public static class FilterPPR extends FilterPruner {
 
     @Override
-    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
-        Object... nodeOutputs) throws SemanticException {
+    protected void generatePredicate(NodeProcessorCtx procCtx, FilterOperator fop,
+        TableScanOperator top) throws SemanticException, UDFArgumentException {
       OpWalkerCtx owc = (OpWalkerCtx) procCtx;
-      FilterOperator fop = (FilterOperator) nd;
-      FilterOperator fop2 = null;
-
-      // The stack contains either ... TS, Filter or
-      // ... TS, Filter, Filter with the head of the stack being the rightmost
-      // symbol. So we just pop out the two elements from the top and if the
-      // second one of them is not a table scan then the operator on the top of
-      // the stack is the Table scan operator.
-      Node tmp = stack.pop();
-      Node tmp2 = stack.pop();
-      TableScanOperator top = null;
-      if (tmp2 instanceof TableScanOperator) {
-        top = (TableScanOperator) tmp2;
-      } else {
-        top = (TableScanOperator) stack.peek();
-        fop2 = (FilterOperator) tmp2;
-      }
-      stack.push(tmp2);
-      stack.push(tmp);
-
-      // If fop2 exists (i.e this is not the top level filter and fop2 is not
-      // a sampling filter then we ignore the current filter
-      if (fop2 != null && !fop2.getConf().getIsSamplingPred()) {
-        return null;
-      }
-
-      // ignore the predicate in case it is not a sampling predicate
-      if (fop.getConf().getIsSamplingPred()) {
-        return null;
-      }
-
       // Otherwise this is not a sampling predicate and we need to
       ExprNodeDesc predicate = fop.getConf().getPredicate();
       String alias = top.getConf().getAlias();
@@ -93,50 +66,14 @@ public final class OpProcFactory {
 
       // Add the pruning predicate to the table scan operator
       addPruningPred(owc.getOpToPartPruner(), top, ppr_pred);
-
-      return null;
-    }
-
-    private void addPruningPred(Map<TableScanOperator, ExprNodeDesc> opToPPR,
-        TableScanOperator top, ExprNodeDesc new_ppr_pred) throws UDFArgumentException {
-      ExprNodeDesc old_ppr_pred = opToPPR.get(top);
-      ExprNodeDesc ppr_pred = null;
-      if (old_ppr_pred != null) {
-        // or the old_ppr_pred and the new_ppr_pred
-        ppr_pred = TypeCheckProcFactory.DefaultExprProcessor
-            .getFuncExprNodeDesc("OR", old_ppr_pred, new_ppr_pred);
-      } else {
-        ppr_pred = new_ppr_pred;
-      }
-
-      // Put the mapping from table scan operator to ppr_pred
-      opToPPR.put(top, ppr_pred);
-
-      return;
     }
-  }
-
-  /**
-   * Default processor which just merges its children.
-   */
-  public static class DefaultPPR implements NodeProcessor {
 
-    @Override
-    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
-        Object... nodeOutputs) throws SemanticException {
-      // Nothing needs to be done.
-      return null;
-    }
   }
 
   public static NodeProcessor getFilterProc() {
     return new FilterPPR();
   }
 
-  public static NodeProcessor getDefaultProc() {
-    return new DefaultPPR();
-  }
-
   private OpProcFactory() {
     // prevent instantiation
   }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java?rev=1394358&r1=1394357&r2=1394358&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java Fri Oct  5 04:13:06 2012
@@ -37,22 +37,12 @@ import org.apache.hadoop.hive.ql.ErrorMs
 import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
 import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
 import org.apache.hadoop.hive.ql.exec.Utilities;
-import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.exec.FilterOperator;
-import org.apache.hadoop.hive.ql.exec.TableScanOperator;
-import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
-import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
-import org.apache.hadoop.hive.ql.lib.Dispatcher;
-import org.apache.hadoop.hive.ql.lib.GraphWalker;
-import org.apache.hadoop.hive.ql.lib.Node;
-import org.apache.hadoop.hive.ql.lib.NodeProcessor;
-import org.apache.hadoop.hive.ql.lib.Rule;
-import org.apache.hadoop.hive.ql.lib.RuleRegExp;
 import org.apache.hadoop.hive.ql.log.PerfLogger;
 import org.apache.hadoop.hive.ql.metadata.Hive;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.metadata.Partition;
 import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.optimizer.PrunerUtils;
 import org.apache.hadoop.hive.ql.optimizer.Transform;
 import org.apache.hadoop.hive.ql.parse.ParseContext;
 import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
@@ -76,7 +66,7 @@ public class PartitionPruner implements 
 
   // The log
   private static final Log LOG = LogFactory
-      .getLog("hive.ql.optimizer.ppr.PartitionPruner");
+    .getLog("hive.ql.optimizer.ppr.PartitionPruner");
 
   /*
    * (non-Javadoc)
@@ -91,25 +81,9 @@ public class PartitionPruner implements 
     // create a the context for walking operators
     OpWalkerCtx opWalkerCtx = new OpWalkerCtx(pctx.getOpToPartPruner());
 
-    Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>();
-    opRules.put(new RuleRegExp("R1",
-      "(" + TableScanOperator.getOperatorName() + "%"
-      + FilterOperator.getOperatorName() + "%)|("
-      + TableScanOperator.getOperatorName() + "%"
-      + FilterOperator.getOperatorName() + "%"
-      + FilterOperator.getOperatorName() + "%)"),
-      OpProcFactory.getFilterProc());
-
-    // The dispatcher fires the processor corresponding to the closest matching
-    // rule and passes the context along
-    Dispatcher disp = new DefaultRuleDispatcher(OpProcFactory.getDefaultProc(),
-        opRules, opWalkerCtx);
-    GraphWalker ogw = new DefaultGraphWalker(disp);
-
-    // Create a list of topop nodes
-    ArrayList<Node> topNodes = new ArrayList<Node>();
-    topNodes.addAll(pctx.getTopOps().values());
-    ogw.startWalking(topNodes, null);
+    /* Move logic to PrunerUtils.walkOperatorTree() so that it can be reused. */
+    PrunerUtils.walkOperatorTree(pctx, opWalkerCtx, OpProcFactory.getFilterProc(),
+        OpProcFactory.getDefaultProc());
     pctx.setHasNonPartCols(opWalkerCtx.getHasNonPartCols());
 
     return pctx;