You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ol...@apache.org on 2009/05/29 00:40:42 UTC

svn commit: r779788 - in /hadoop/pig/trunk: ./ src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/plans/ src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/ src/org/apache/pig/impl/logicalLayer/ src/org/apache/pig/impl/...

Author: olga
Date: Thu May 28 22:40:42 2009
New Revision: 779788

URL: http://svn.apache.org/viewvc?rev=779788&view=rev
Log:
PIG-818: Explain doesn't handle PODemux properly (hagleitn via olgan)

Modified:
    hadoop/pig/trunk/CHANGES.txt
    hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/plans/DotMRPrinter.java
    hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/DotPOPrinter.java
    hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PlanPrinter.java
    hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/DotLOPrinter.java
    hadoop/pig/trunk/src/org/apache/pig/impl/plan/DotPlanDumper.java
    hadoop/pig/trunk/src/org/apache/pig/impl/plan/PlanDumper.java

Modified: hadoop/pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=779788&r1=779787&r2=779788&view=diff
==============================================================================
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Thu May 28 22:40:42 2009
@@ -48,6 +48,8 @@
 
 BUG FIXES
 
+PIG-818: Explain doesn't handle PODemux properly (hagleitn via olgan)
+
 PIG-819: run -param -param; is a valid grunt command (milindb via olgan)
 
 PIG-656: Use of eval or any other keyword in the package hierarchy of a UDF causes

Modified: hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/plans/DotMRPrinter.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/plans/DotMRPrinter.java?rev=779788&r1=779787&r2=779788&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/plans/DotMRPrinter.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/plans/DotMRPrinter.java Thu May 28 22:40:42 2009
@@ -49,13 +49,16 @@
     boolean isVerboseNesting = true;
 
     public DotMRPrinter(MROperPlan plan, PrintStream ps) {
-        this(plan, ps, false, new HashSet<Operator>(), new HashSet<Operator>());
+        this(plan, ps, false, new HashSet<Operator>(), new HashSet<Operator>(),
+             new HashSet<Operator>());
     }
 
     private DotMRPrinter(MROperPlan plan, PrintStream ps, boolean isSubGraph,
                          Set<Operator> subgraphs, 
-                         Set<Operator> multiInputSubgraphs) {
-        super(plan, ps, isSubGraph, subgraphs, multiInputSubgraphs);
+                         Set<Operator> multiInputSubgraphs,
+                         Set<Operator> multiOutputSubgraphs) {
+        super(plan, ps, isSubGraph, subgraphs, 
+              multiInputSubgraphs, multiOutputSubgraphs);
     }
 
     @Override
@@ -66,7 +69,8 @@
 
     @Override
     protected DotPlanDumper makeDumper(InnerPlan plan, PrintStream ps) {
-        return new InnerPrinter(plan, ps, mSubgraphs, mMultiInputSubgraphs);
+        return new InnerPrinter(plan, ps, mSubgraphs, mMultiInputSubgraphs, 
+                                mMultiOutputSubgraphs);
     }
 
     @Override
@@ -169,8 +173,10 @@
 
         public InnerPrinter(InnerPlan plan, PrintStream ps,
                             Set<Operator> subgraphs, 
-                            Set<Operator> multiInputSubgraphs) {
-            super(plan, ps, true, subgraphs, multiInputSubgraphs);
+                            Set<Operator> multiInputSubgraphs,
+                            Set<Operator> multiOutputSubgraphs) {
+            super(plan, ps, true, subgraphs, multiInputSubgraphs,
+                  multiOutputSubgraphs);
         }
 
         @Override
@@ -193,7 +199,8 @@
         protected DotPOPrinter makeDumper(PhysicalPlan plan, PrintStream ps) {
             DotPOPrinter printer = new DotPOPrinter(plan, ps, true, 
                                                     mSubgraphs, 
-                                                    mMultiInputSubgraphs);
+                                                    mMultiInputSubgraphs,
+                                                    mMultiOutputSubgraphs);
             printer.setVerbose(isVerboseNesting);
             return printer;
         }

Modified: hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/DotPOPrinter.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/DotPOPrinter.java?rev=779788&r1=779787&r2=779788&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/DotPOPrinter.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/DotPOPrinter.java Thu May 28 22:40:42 2009
@@ -40,19 +40,25 @@
                                   PhysicalOperator, PhysicalPlan> {
 
     public DotPOPrinter(PhysicalPlan plan, PrintStream ps) {
-        this(plan, ps, false, new HashSet<Operator>(), new HashSet<Operator>());
+        this(plan, ps, false, new HashSet<Operator>(), new HashSet<Operator>(),
+             new HashSet<Operator>());
     }
 
     public DotPOPrinter(PhysicalPlan plan, PrintStream ps, boolean isSubGraph,
                         Set<Operator> subgraphs, 
-                        Set<Operator> multiInputSubgraphs) {
-        super(plan, ps, isSubGraph, subgraphs, multiInputSubgraphs);
+                        Set<Operator> multiInputSubgraphs,
+                        Set<Operator> multiOutputSubgraphs) {
+        super(plan, ps, isSubGraph, subgraphs, multiInputSubgraphs,
+              multiOutputSubgraphs);
     }
 
     @Override
     protected DotPlanDumper makeDumper(PhysicalPlan plan, PrintStream ps) {
-        return new DotPOPrinter(plan, ps, true, mSubgraphs, 
-                                mMultiInputSubgraphs);
+        DotPOPrinter dumper = new DotPOPrinter(plan, ps, true, mSubgraphs, 
+                                               mMultiInputSubgraphs,
+                                               mMultiOutputSubgraphs);
+        dumper.setVerbose(this.isVerbose());
+        return dumper;
     }
 
     @Override
@@ -83,6 +89,22 @@
     }
 
     @Override
+    protected Collection<PhysicalPlan> getMultiOutputNestedPlans(PhysicalOperator op) {
+        Collection<PhysicalPlan> plans = new LinkedList<PhysicalPlan>();
+        
+        if (op instanceof POSplit) {
+            plans.addAll(((POSplit)op).getPlans());
+        }
+        else if(op instanceof PODemux) {
+            Set<PhysicalPlan> pl = new HashSet<PhysicalPlan>();
+            pl.addAll(((PODemux)op).getPlans());
+            plans.addAll(pl);
+        }
+        
+        return plans;
+    }
+
+    @Override
     protected Collection<PhysicalPlan> getNestedPlans(PhysicalOperator op) {
         Collection<PhysicalPlan> plans = new LinkedList<PhysicalPlan>();
 
@@ -107,9 +129,6 @@
                 }
             }
         }
-        else if(op instanceof POSplit) {
-            plans.addAll(((POSplit)op).getPlans());
-        }
 
         return plans;
     }

Modified: hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PlanPrinter.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PlanPrinter.java?rev=779788&r1=779787&r2=779788&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PlanPrinter.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PlanPrinter.java Thu May 28 22:40:42 2009
@@ -160,12 +160,6 @@
           else if(node instanceof POForEach){
             sb.append(planString(((POForEach)node).getInputPlans()));
           }
-          else if (node instanceof POSplit) {
-              sb.append(planString(((POSplit)node).getPlans()));
-          }
-          else if (node instanceof PODemux) {
-              sb.append(planString(((PODemux)node).getPlans()));
-          }
           else if (node instanceof POMultiQueryPackage) {
               List<POPackage> pkgs = ((POMultiQueryPackage)node).getPackages();
               for (POPackage pkg : pkgs) {
@@ -182,6 +176,17 @@
           }
         }
         
+        if (node instanceof POSplit) {
+            sb.append(planString(((POSplit)node).getPlans()));
+        }
+        else if (node instanceof PODemux) {
+            List<PhysicalPlan> plans = new ArrayList<PhysicalPlan>();
+            Set<PhysicalPlan> pl = new HashSet<PhysicalPlan>();
+            pl.addAll(((PODemux)node).getPlans());
+            plans.addAll(pl);
+            sb.append(planString(plans));
+        }
+        
         List<O> originalPredecessors = mPlan.getPredecessors(node);
         if (originalPredecessors == null)
             return sb.toString();

Modified: hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/DotLOPrinter.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/DotLOPrinter.java?rev=779788&r1=779787&r2=779788&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/DotLOPrinter.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/DotLOPrinter.java Thu May 28 22:40:42 2009
@@ -37,19 +37,23 @@
                                   LogicalOperator, LogicalPlan> {
 
     public DotLOPrinter(LogicalPlan plan, PrintStream ps) {
-        this(plan, ps, false, new HashSet<Operator>(), new HashSet<Operator>());
+        this(plan, ps, false, new HashSet<Operator>(), new HashSet<Operator>(),
+             new HashSet<Operator>());
     }
 
     private DotLOPrinter(LogicalPlan plan, PrintStream ps, boolean isSubGraph,
                          Set<Operator> subgraphs, 
-                         Set<Operator> multiInSubgraphs) {
-        super(plan, ps, isSubGraph, subgraphs, multiInSubgraphs);
+                         Set<Operator> multiInSubgraphs,
+                         Set<Operator> multiOutSubgraphs) {
+        super(plan, ps, isSubGraph, subgraphs, 
+              multiInSubgraphs, multiOutSubgraphs);
     }
 
     @Override
     protected DotPlanDumper makeDumper(LogicalPlan plan, PrintStream ps) {
         return new DotLOPrinter(plan, ps, true, mSubgraphs, 
-                                mMultiInputSubgraphs);
+                                mMultiInputSubgraphs,
+                                mMultiOutputSubgraphs);
     }
 
     @Override

Modified: hadoop/pig/trunk/src/org/apache/pig/impl/plan/DotPlanDumper.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/plan/DotPlanDumper.java?rev=779788&r1=779787&r2=779788&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/impl/plan/DotPlanDumper.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/impl/plan/DotPlanDumper.java Thu May 28 22:40:42 2009
@@ -36,19 +36,23 @@
 
     protected Set<Operator> mSubgraphs;
     protected Set<Operator> mMultiInputSubgraphs;    
+    protected Set<Operator> mMultiOutputSubgraphs;
     private boolean isSubGraph = false;
   
     public DotPlanDumper(P plan, PrintStream ps) {
-        this(plan, ps, false, new HashSet<Operator>(), new HashSet<Operator>());
+        this(plan, ps, false, new HashSet<Operator>(), new HashSet<Operator>(),
+             new HashSet<Operator>());
     }
 
     protected DotPlanDumper(P plan, PrintStream ps, boolean isSubGraph, 
                             Set<Operator> mSubgraphs, 
-                            Set<Operator> mMultiInputSubgraphs) {
+                            Set<Operator> mMultiInputSubgraphs,
+                            Set<Operator> mMultiOutputSubgraphs) {
         super(plan, ps);
         this.isSubGraph = isSubGraph;
         this.mSubgraphs = mSubgraphs;
         this.mMultiInputSubgraphs = mMultiInputSubgraphs;
+        this.mMultiOutputSubgraphs = mMultiOutputSubgraphs;
     }
 
     @Override
@@ -98,6 +102,18 @@
         }
     }
 
+    @Override 
+    protected void dumpMultiOutputNestedOperator(E op, Collection<S> plans) {
+        super.dumpMultiOutputNestedOperator(op, plans);
+
+        mMultiOutputSubgraphs.add(op);
+        
+        dumpInvisibleOutput(op);
+        for (S plan: plans) {
+            connectInvisibleOutput(op, plan);
+        }
+    }
+
     @Override
     protected void dumpNestedOperator(E op, Collection<S> plans) {
         dumpInvisibleOperators(op);
@@ -130,12 +146,14 @@
     }
 
     @Override
-    protected void dumpEdge(E op, E suc) {
+    protected void dumpEdge(Operator op, Operator suc) {
         String in = getID(op);
         String out = getID(suc);
         String attributes = "";
 
-        if (mMultiInputSubgraphs.contains(op) || mSubgraphs.contains(op)) {
+        if (mMultiInputSubgraphs.contains(op) 
+            || mSubgraphs.contains(op) 
+            || mMultiOutputSubgraphs.contains(op)) {
             in = getSubgraphID(op, false);
         }
 
@@ -159,7 +177,8 @@
     @Override
     protected PlanDumper makeDumper(S plan, PrintStream ps) {
         return new DotPlanDumper(plan, ps, true, 
-                                 mSubgraphs, mMultiInputSubgraphs);
+                                 mSubgraphs, mMultiInputSubgraphs, 
+                                 mMultiOutputSubgraphs);
     }
 
     /**
@@ -204,12 +223,15 @@
         }
     }
 
-    private void connectInvisibleOutput(E op, S plan) {
+    private void connectInvisibleOutput(E op, 
+                                        OperatorPlan<? extends Operator> plan) {
         String out = getSubgraphID(op, false);
 
-        for (N l: plan.getLeaves()) {
+        for (Operator l: plan.getLeaves()) {
             String in;
-            if (mSubgraphs.contains(l) || mMultiInputSubgraphs.contains(l)) {
+            if (mSubgraphs.contains(l) 
+                || mMultiInputSubgraphs.contains(l)
+                || mMultiOutputSubgraphs.contains(l)) {
                 in = getSubgraphID(l, false);
             } else {
                 in = getID(l);

Modified: hadoop/pig/trunk/src/org/apache/pig/impl/plan/PlanDumper.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/plan/PlanDumper.java?rev=779788&r1=779787&r2=779788&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/impl/plan/PlanDumper.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/impl/plan/PlanDumper.java Thu May 28 22:40:42 2009
@@ -31,7 +31,7 @@
  */
 public class PlanDumper<E extends Operator, 
                         P extends OperatorPlan<E>, 
-                        S extends OperatorPlan> {
+                        S extends OperatorPlan<? extends Operator>> {
     
     protected PrintStream ps;
     protected P plan;
@@ -61,8 +61,14 @@
                 dumpMultiInputNestedOperator(op, map);
                 continue;
             }
+
+            Collection<S> plans = getMultiOutputNestedPlans(op);
+            if (plans.size() > 0) {
+                dumpMultiOutputNestedOperator(op, plans);
+                continue;
+            }
             
-            Collection<S> plans = getNestedPlans(op);
+            plans = getNestedPlans(op);
             if (isVerbose && plans.size() > 0) {
                 dumpNestedOperator(op, plans);
                 continue;
@@ -85,7 +91,7 @@
      * makeDumper is a factory method. Used by subclasses to specify
      * what dumper should handle the nested plan.
      * @param plan Plan that the new dumper should handle
-     * @return the dumper for S
+     * @return the dumper for plan
      */
     protected PlanDumper makeDumper(S plan, PrintStream ps) {
         return new PlanDumper(plan, ps);
@@ -109,13 +115,30 @@
         dumpOperator(op);
         for (E aop: plans.keySet()) {
             for (S plan: plans.get(aop)) {
-                PlanDumper dumper = new PlanDumper(plan, ps);
+                PlanDumper dumper = makeDumper(plan, ps);
                 dumper.dump();
             }
         }
     }
 
     /**
+     * Will be called for nested operators, where the plans represent
+     * how the output of the operator is processed. 
+     * @param op the nested operator
+     * @param plans a collection of sub plans.
+     */
+    protected void dumpMultiOutputNestedOperator(E op, Collection<S> plans) {
+        dumpOperator(op);
+        for (S plan: plans) {
+            PlanDumper  dumper = makeDumper(plan, ps);
+            dumper.dump();
+            for (Operator p: plan.getRoots()) {
+                dumpEdge(op, p);
+            }
+        }
+    }
+
+    /**
      * Will be called for nested operators. The operators are not
      * specifically connected to any input or output operators of E
      * @param op the nested operator
@@ -124,7 +147,7 @@
     protected void dumpNestedOperator(E op, Collection<S> plans) {
         dumpOperator(op);
         for (S plan: plans) {
-            PlanDumper  dumper = new PlanDumper(plan, ps);
+            PlanDumper  dumper = makeDumper(plan, ps);
             dumper.dump();
         }
     }
@@ -135,7 +158,7 @@
      * @param op tail of the edge
      * @param suc head of the edge
      */
-    protected void dumpEdge(E op, E suc) {
+    protected void dumpEdge(Operator op, Operator suc) {
         ps.println(op.name()+" -> "+suc.name());
     }
 
@@ -150,6 +173,16 @@
     }
 
     /**
+     * Used to determine if an operator has nested output plans
+     *
+     * @param op operator
+     * @return Map describing the input to nested plan relationship.
+     */
+    protected Collection<S> getMultiOutputNestedPlans(E op) {
+        return new LinkedList<S>();
+    }
+
+    /**
      * Used to determine if an operator has nested plans (without
      * connections to in- or output operators.
      * @param op operator