You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by da...@apache.org on 2011/05/03 03:31:33 UTC

svn commit: r1098887 - in /pig/branches/branch-0.9: ./ src/org/apache/pig/newplan/ src/org/apache/pig/newplan/logical/ src/org/apache/pig/newplan/logical/relational/ test/org/apache/pig/test/ test/org/apache/pig/test/data/DotFiles/

Author: daijy
Date: Tue May  3 01:31:33 2011
New Revision: 1098887

URL: http://svn.apache.org/viewvc?rev=1098887&view=rev
Log:
PIG-2016: -dot option does not work with explain and new logical plan

Added:
    pig/branches/branch-0.9/src/org/apache/pig/newplan/DotPlanDumper.java
    pig/branches/branch-0.9/src/org/apache/pig/newplan/PlanDumper.java
    pig/branches/branch-0.9/src/org/apache/pig/newplan/logical/DotLOPrinter.java
    pig/branches/branch-0.9/test/org/apache/pig/test/data/DotFiles/explain1.dot
Modified:
    pig/branches/branch-0.9/CHANGES.txt
    pig/branches/branch-0.9/src/org/apache/pig/newplan/logical/relational/LogicalPlan.java
    pig/branches/branch-0.9/test/org/apache/pig/test/TestEvalPipelineLocal.java

Modified: pig/branches/branch-0.9/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.9/CHANGES.txt?rev=1098887&r1=1098886&r2=1098887&view=diff
==============================================================================
--- pig/branches/branch-0.9/CHANGES.txt (original)
+++ pig/branches/branch-0.9/CHANGES.txt Tue May  3 01:31:33 2011
@@ -174,6 +174,8 @@ PIG-1696: Performance: Use System.arrayc
 
 BUG FIXES
 
+PIG-2016: -dot option does not work with explain and new logical plan (daijy)
+
 PIG-2018: NPE for co-group with group-by column having complex schema and
  different load functions for each input (thejas)
 

Added: pig/branches/branch-0.9/src/org/apache/pig/newplan/DotPlanDumper.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.9/src/org/apache/pig/newplan/DotPlanDumper.java?rev=1098887&view=auto
==============================================================================
--- pig/branches/branch-0.9/src/org/apache/pig/newplan/DotPlanDumper.java (added)
+++ pig/branches/branch-0.9/src/org/apache/pig/newplan/DotPlanDumper.java Tue May  3 01:31:33 2011
@@ -0,0 +1,345 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.newplan;
+
+import java.io.PrintStream;
+import java.util.Collection;
+import java.util.List;
+import java.util.Set;
+import java.util.HashSet;
+
+import org.apache.pig.impl.util.MultiMap;
+
+/**
+ * This class puts everything that is needed to dump a plan in a
+ * format readable by graphviz's dot algorithm. Out of the box it does
+ * not print any nested plans.
+ */
+public class DotPlanDumper extends PlanDumper {
+
+    protected Set<Operator> mSubgraphs;
+    protected Set<Operator> mMultiInputSubgraphs;    
+    protected Set<Operator> mMultiOutputSubgraphs;
+    private boolean isSubGraph = false;
+  
+    public DotPlanDumper(BaseOperatorPlan plan, PrintStream ps) {
+        this(plan, ps, false, new HashSet<Operator>(), new HashSet<Operator>(),
+             new HashSet<Operator>());
+    }
+
+    protected DotPlanDumper(BaseOperatorPlan plan, PrintStream ps, boolean isSubGraph, 
+                            Set<Operator> mSubgraphs, 
+                            Set<Operator> mMultiInputSubgraphs,
+                            Set<Operator> mMultiOutputSubgraphs) {
+        super(plan, ps);
+        this.isSubGraph = isSubGraph;
+        this.mSubgraphs = mSubgraphs;
+        this.mMultiInputSubgraphs = mMultiInputSubgraphs;
+        this.mMultiOutputSubgraphs = mMultiOutputSubgraphs;
+    }
+
+    @Override
+    public void dump() {
+        if (!isSubGraph) {
+            ps.println("digraph plan {");
+            ps.println("compound=true;");
+            ps.println("node [shape=rect];");
+        }
+        super.dump();
+        if (!isSubGraph) {
+            ps.println("}");
+        }
+    }
+
+    @Override
+    protected void dumpMultiInputNestedOperator(Operator op, MultiMap<Operator, BaseOperatorPlan> plans) {
+        dumpInvisibleOutput(op);
+
+        ps.print("subgraph ");
+        ps.print(getClusterID(op));
+        ps.println(" {");
+        join("; ", getAttributes(op));
+        ps.println("labelloc=b;");
+        
+        mMultiInputSubgraphs.add(op);
+
+        for (Operator o: plans.keySet()) {
+            ps.print("subgraph ");
+            ps.print(getClusterID(op, o));
+            ps.println(" {");
+            ps.println("label=\"\";");
+            dumpInvisibleInput(op, o);
+            for (BaseOperatorPlan plan : plans.get(o)) {
+                PlanDumper dumper = makeDumper(plan, ps);
+                dumper.dump();
+                connectInvisibleInput(op, o, plan);
+            }
+            ps.println("};");
+        }
+        ps.println("};");
+        
+        for (Operator o: plans.keySet()) {
+            for (BaseOperatorPlan plan: plans.get(o)) {
+                connectInvisibleOutput(op, plan);
+            }
+        }
+    }
+
+    @Override 
+    protected void dumpMultiOutputNestedOperator(Operator op, Collection<BaseOperatorPlan> plans) {
+        super.dumpMultiOutputNestedOperator(op, plans);
+
+        mMultiOutputSubgraphs.add(op);
+        
+        dumpInvisibleOutput(op);
+        for (BaseOperatorPlan plan: plans) {
+            connectInvisibleOutput(op, plan);
+        }
+    }
+
+    @Override
+    protected void dumpNestedOperator(Operator op, Collection<BaseOperatorPlan> plans) {
+        dumpInvisibleOperators(op);
+        ps.print("subgraph ");
+        ps.print(getClusterID(op));
+        ps.println(" {");
+        join("; ", getAttributes(op));
+        ps.println("labelloc=b;");
+
+        mSubgraphs.add(op);
+        
+        for (BaseOperatorPlan plan: plans) {
+            PlanDumper dumper = makeDumper(plan, ps);
+            dumper.dump();
+            connectInvisibleInput(op, plan);
+        }
+        ps.println("};");
+
+        for (BaseOperatorPlan plan: plans) {
+            connectInvisibleOutput(op, plan);
+        }
+    }
+
+    @Override
+    protected void dumpOperator(Operator op) {
+        ps.print(getID(op));
+        ps.print(" [");
+        join(", ", getAttributes(op));
+        ps.println("];");
+    }
+
+    @Override
+    protected void dumpEdge(Operator op, Operator suc) {
+        String in = getID(op);
+        String out = getID(suc);
+        String attributes = "";
+
+        if (mMultiInputSubgraphs.contains(op) 
+            || mSubgraphs.contains(op) 
+            || mMultiOutputSubgraphs.contains(op)) {
+            in = getSubgraphID(op, false);
+        }
+
+        if (mMultiInputSubgraphs.contains(suc)) {
+            out = getSubgraphID(suc, op, true);
+            attributes = " [lhead="+getClusterID(suc,op)+"]";
+        }
+
+        if (mSubgraphs.contains(suc)) {
+            out = getSubgraphID(suc, true);
+            attributes = " [lhead="+getClusterID(suc)+"]";
+        }
+        
+        if (reverse(plan)) {
+            ps.print(out);
+            ps.print(" -> ");
+            ps.print(in);
+        } else {
+            ps.print(in);
+            ps.print(" -> ");
+            ps.print(out);
+        }
+        ps.println(attributes);
+    }
+
+    @SuppressWarnings("unchecked")
+    @Override
+    protected PlanDumper makeDumper(BaseOperatorPlan plan, PrintStream ps) {
+        return new DotPlanDumper(plan, ps, true, 
+                                 mSubgraphs, mMultiInputSubgraphs, 
+                                 mMultiOutputSubgraphs);
+    }
+
+    /**
+     * Used to generate the label for an operator.
+     * @param op operator to dump
+     */
+    protected String getName(Operator op) {
+        return op.getName();
+    }
+    
+    /**
+     * Used to generate the the attributes of a node
+     * @param op operator
+     */
+    protected String[] getAttributes(Operator op) {
+        String[] attributes = new String[1];
+        attributes[0] =  "label=\""+getName(op)+"\"";
+        return attributes;
+    }
+
+
+    private void connectInvisibleInput(Operator op1, Operator op2, BaseOperatorPlan plan) {
+        String in = getSubgraphID(op1, op2, true);
+        
+        List<Operator> sources;
+        if (reverse(plan))
+            sources = plan.getSinks();
+        else
+            sources = plan.getSources();
+        
+        for (Operator l: sources) {
+            dumpInvisibleEdge(in, getID(l));
+        }
+    }
+
+    private void connectInvisibleInput(Operator op, BaseOperatorPlan plan) {
+        String in = getSubgraphID(op, true);
+
+        List<Operator> sources;
+        if (reverse(plan))
+            sources = plan.getSinks();
+        else
+            sources = plan.getSources();
+        
+        for (Operator l: sources) {
+            String out;
+            if (mSubgraphs.contains(l) || mMultiInputSubgraphs.contains(l)) {
+                out = getSubgraphID(l, true);
+            } else {
+                out = getID(l);
+            }
+
+            dumpInvisibleEdge(in, out);
+        }
+    }
+
+    private void connectInvisibleOutput(Operator op, 
+                                        BaseOperatorPlan plan) {
+        String out = getSubgraphID(op, false);
+
+        List<Operator> sinks;
+        if (reverse(plan))
+            sinks = plan.getSources();
+        else
+            sinks = plan.getSinks();
+        
+        for (Operator l: sinks) {
+            String in;
+            if (mSubgraphs.contains(l) 
+                || mMultiInputSubgraphs.contains(l)
+                || mMultiOutputSubgraphs.contains(l)) {
+                in = getSubgraphID(l, false);
+            } else {
+                in = getID(l);
+            }
+
+            dumpInvisibleEdge(in, out);
+        }
+    }
+
+    private void connectInvisible(Operator op, BaseOperatorPlan plan) {
+        connectInvisibleInput(op, plan);
+        connectInvisibleOutput(op, plan);
+    }        
+
+    private void dumpInvisibleInput(Operator op1, Operator op2) {
+        ps.print(getSubgraphID(op1, op2, true));
+        ps.print(" ");
+        ps.print(getInvisibleAttributes(op1));
+        ps.println(";");
+    }
+    
+    private void dumpInvisibleInput(Operator op) {
+        ps.print(getSubgraphID(op, true));
+        ps.print(" ");
+        ps.print(getInvisibleAttributes(op));
+        ps.println(";");
+    }
+
+    private void dumpInvisibleOutput(Operator op) {
+        ps.print(getSubgraphID(op, false));
+        ps.print(" ");
+        ps.print(getInvisibleAttributes(op));
+        ps.println(";");
+    }
+
+    protected void dumpInvisibleOperators(Operator op) {
+        dumpInvisibleInput(op);
+        dumpInvisibleOutput(op);
+    }
+
+    private String getClusterID(Operator op1, Operator op2) {
+        return getClusterID(op1)+"_"+getID(op2);
+    }
+
+    private String getClusterID(Operator op) {
+        return "cluster_"+getID(op);
+    }
+
+    private String getSubgraphID(Operator op1, Operator op2, boolean in) {
+        String id = "s"+getID(op1)+"_"+getID(op2);
+        if (in) {
+            id += "_in";
+        }
+        else {
+            id += "_out";
+        }
+        return id;
+    }
+
+    private String getSubgraphID(Operator op, boolean in) {
+        String id =  "s"+getID(op);
+        if (in) {
+            id += "_in";
+        }
+        else {
+            id += "_out";
+        }
+        return id;
+    }
+
+    private String getID(Operator op) {
+        return ""+Math.abs(op.hashCode());
+    }
+
+    private String getInvisibleAttributes(Operator op) {
+        return "[label=\"\", style=invis, height=0, width=0]";
+    }
+    
+    private void dumpInvisibleEdge(String op, String suc) {
+        ps.print(op);
+        ps.print(" -> ");
+        ps.print(suc);
+        ps.println(" [style=invis];");
+    }
+    
+    protected boolean reverse(BaseOperatorPlan plan) {
+        return false;
+    }
+}

Added: pig/branches/branch-0.9/src/org/apache/pig/newplan/PlanDumper.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.9/src/org/apache/pig/newplan/PlanDumper.java?rev=1098887&view=auto
==============================================================================
--- pig/branches/branch-0.9/src/org/apache/pig/newplan/PlanDumper.java (added)
+++ pig/branches/branch-0.9/src/org/apache/pig/newplan/PlanDumper.java Tue May  3 01:31:33 2011
@@ -0,0 +1,216 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.newplan;
+
+import java.io.PrintStream;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.Collection;
+import org.apache.pig.impl.util.MultiMap;
+
+/**
+ * This class dumps a nested plan to a print stream. It does not walk
+ * the graph in any particular fashion it merely iterates over all
+ * operators and edges and calls a corresponding dump function. If a
+ * node of the plan has nested plans this will be dumped when the
+ * node is handled.
+ */
+public class PlanDumper {
+    
+    protected PrintStream ps;
+    protected BaseOperatorPlan plan;
+    protected boolean isVerbose = true;
+  
+    public PlanDumper(BaseOperatorPlan plan, PrintStream ps) {
+        this.plan = plan;
+        this.ps = ps;
+    }
+
+    public void setVerbose(boolean verbose) {
+        this.isVerbose = verbose;
+    }
+
+    public boolean isVerbose() {
+        return isVerbose;
+    }
+
+    /**
+     * This is the public interface. Dump writes the plan and nested
+     * plans to the stream.
+     */
+    public void dump() {
+        Iterator<Operator> iter = plan.getOperators();
+        while (iter.hasNext()) {
+            Operator op = iter.next();
+            MultiMap<Operator,BaseOperatorPlan> map = getMultiInputNestedPlans(op);
+            if (isVerbose && !map.isEmpty()) {
+                dumpMultiInputNestedOperator(op, map);
+                continue;
+            }
+
+            Collection<BaseOperatorPlan> plans = getMultiOutputNestedPlans(op);
+            if (plans.size() > 0) {
+                dumpMultiOutputNestedOperator(op, plans);
+                continue;
+            }
+            
+            plans = getNestedPlans(op);
+            if (isVerbose && plans.size() > 0) {
+                dumpNestedOperator(op, plans);
+                continue;
+            }
+
+            dumpOperator(op);
+        }
+
+        iter = plan.getOperators();
+        while (iter.hasNext()) {
+            Operator op = iter.next();
+            Collection<Operator> successors = plan.getSuccessors(op);
+            if (successors != null) {
+                for (Operator suc: successors) {
+                    dumpEdge(op, suc);
+                }
+            }
+        }
+    }
+
+    /**
+     * makeDumper is a factory method. Used by subclasses to specify
+     * what dumper should handle the nested plan.
+     * @param plan Plan that the new dumper should handle
+     * @return the dumper for plan
+     */
+    @SuppressWarnings("unchecked")
+    protected PlanDumper makeDumper(BaseOperatorPlan plan, PrintStream ps) {
+        return new PlanDumper(plan, ps);
+    }
+
+    /**
+     * Will be called to dump a simple operator
+     * @param op the operator to be dumped
+     */
+    protected void dumpOperator(Operator op) {
+        ps.println(op.getName().replace(" ","_"));
+    }
+
+    /**
+     * Will be called when an operator has nested plans, which are
+     * connected to one of the multiple inputs.
+     * @param op the nested operator
+     * @param plans a map of input operator to connected nested plan
+     */
+    protected void dumpMultiInputNestedOperator(Operator op, MultiMap<Operator,BaseOperatorPlan> plans) {
+        dumpOperator(op);
+        for (Operator aop: plans.keySet()) {
+            for (BaseOperatorPlan plan: plans.get(aop)) {
+                PlanDumper dumper = makeDumper(plan, ps);
+                dumper.dump();
+            }
+        }
+    }
+
+    /**
+     * Will be called for nested operators, where the plans represent
+     * how the output of the operator is processed. 
+     * @param op the nested operator
+     * @param plans a collection of sub plans.
+     */
+    protected void dumpMultiOutputNestedOperator(Operator op, Collection<BaseOperatorPlan> plans) {
+        dumpOperator(op);
+        for (BaseOperatorPlan plan: plans) {
+            PlanDumper  dumper = makeDumper(plan, ps);
+            dumper.dump();
+            for (Operator p: plan.getSources()) {
+                dumpEdge(op, p);
+            }
+        }
+    }
+
+    /**
+     * Will be called for nested operators. The operators are not
+     * specifically connected to any input or output operators of E
+     * @param op the nested operator
+     * @param plans a collection of sub plans.
+     */
+    protected void dumpNestedOperator(Operator op, Collection<BaseOperatorPlan> plans) {
+        dumpOperator(op);
+        for (BaseOperatorPlan plan: plans) {
+            PlanDumper  dumper = makeDumper(plan, ps);
+            dumper.dump();
+        }
+    }
+
+    /**
+     * Will be called to dump the edges of the plan. Each edge results
+     * in one call.
+     * @param op tail of the edge
+     * @param suc head of the edge
+     */
+    protected void dumpEdge(Operator op, Operator suc) {
+        ps.println(op.getName()+" -> "+suc.getName());
+    }
+
+    /**
+     * Used to determine if an operator has nested plans, which are
+     * connected to specific input operators.
+     * @param op operator
+     * @return Map describing the input to nested plan relationship.
+     */
+    protected MultiMap<Operator, BaseOperatorPlan> getMultiInputNestedPlans(Operator op) {
+        return new MultiMap<Operator, BaseOperatorPlan>();
+    }
+
+    /**
+     * Used to determine if an operator has nested output plans
+     *
+     * @param op operator
+     * @return Map describing the input to nested plan relationship.
+     */
+    protected Collection<BaseOperatorPlan> getMultiOutputNestedPlans(Operator op) {
+        return new LinkedList<BaseOperatorPlan>();
+    }
+
+    /**
+     * Used to determine if an operator has nested plans (without
+     * connections to in- or output operators.
+     * @param op operator
+     * @return Collection of nested plans.
+     */
+    protected Collection<BaseOperatorPlan> getNestedPlans(Operator op) {
+        return new LinkedList<BaseOperatorPlan>();
+    }
+
+    /**
+     * Helper function to print a string array.
+     * @param sep Separator
+     * @param strings Array to print
+     */
+    protected void join(String sep, String[] strings) {
+        if (strings == null) {
+            return;
+        }
+        
+        for (int i = 0; i < strings.length; ++i) {
+            if (i != 0) {
+                ps.print(sep);
+            }
+            ps.print(strings[i]);
+        }
+    }
+}

Added: pig/branches/branch-0.9/src/org/apache/pig/newplan/logical/DotLOPrinter.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.9/src/org/apache/pig/newplan/logical/DotLOPrinter.java?rev=1098887&view=auto
==============================================================================
--- pig/branches/branch-0.9/src/org/apache/pig/newplan/logical/DotLOPrinter.java (added)
+++ pig/branches/branch-0.9/src/org/apache/pig/newplan/logical/DotLOPrinter.java Tue May  3 01:31:33 2011
@@ -0,0 +1,158 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.newplan.logical;
+
+import java.io.PrintStream;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.LinkedList;
+import java.util.Collection;
+import org.apache.pig.impl.util.MultiMap;
+import org.apache.pig.newplan.BaseOperatorPlan;
+import org.apache.pig.newplan.DotPlanDumper;
+import org.apache.pig.newplan.Operator;
+import org.apache.pig.newplan.logical.expression.LogicalExpressionPlan;
+import org.apache.pig.newplan.logical.expression.ProjectExpression;
+import org.apache.pig.newplan.logical.relational.LOCogroup;
+import org.apache.pig.newplan.logical.relational.LOFilter;
+import org.apache.pig.newplan.logical.relational.LOForEach;
+import org.apache.pig.newplan.logical.relational.LOGenerate;
+import org.apache.pig.newplan.logical.relational.LOJoin;
+import org.apache.pig.newplan.logical.relational.LOLoad;
+import org.apache.pig.newplan.logical.relational.LOSort;
+import org.apache.pig.newplan.logical.relational.LOSplitOutput;
+import org.apache.pig.newplan.logical.relational.LOStore;
+
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * This class can print a logical plan in the DOT format. It uses
+ * clusters to illustrate nesting. If "verbose" is off, it will skip
+ * any nesting.
+ */
+public class DotLOPrinter extends DotPlanDumper {
+
+    public DotLOPrinter(BaseOperatorPlan plan, PrintStream ps) {
+        this(plan, ps, false, new HashSet<Operator>(), new HashSet<Operator>(),
+             new HashSet<Operator>());
+    }
+
+    private DotLOPrinter(BaseOperatorPlan plan, PrintStream ps, boolean isSubGraph,
+                         Set<Operator> subgraphs, 
+                         Set<Operator> multiInSubgraphs,
+                         Set<Operator> multiOutSubgraphs) {
+        super(plan, ps, isSubGraph, subgraphs, 
+              multiInSubgraphs, multiOutSubgraphs);
+    }
+
+    @Override
+    protected DotPlanDumper makeDumper(BaseOperatorPlan plan, PrintStream ps) {
+        return new DotLOPrinter(plan, ps, true, mSubgraphs, 
+                                mMultiInputSubgraphs,
+                                mMultiOutputSubgraphs);
+    }
+
+    @Override
+    protected String getName(Operator op) {
+        StringBuffer info = new StringBuffer(op.getName());
+        if (op instanceof ProjectExpression) {
+            ProjectExpression pr = (ProjectExpression)op;
+            info.append(pr.getInputNum());
+            info.append(":");
+            if (pr.isProjectStar())
+                info.append("(*)");
+            else if (pr.isRangeProject())
+                info.append("[").append(pr.getStartCol()).append(" .. ").append(pr.getEndCol()).append("]");
+            else
+                info.append(pr.getColNum());
+        }
+        return info.toString();
+    }
+
+    @Override
+    protected String[] getAttributes(Operator op) {
+        if (op instanceof LOStore || op instanceof LOLoad) {
+            String[] attributes = new String[3];
+            attributes[0] = "label=\""+getName(op).replace(":",",\\n")+"\"";
+            attributes[1] = "style=\"filled\"";
+            attributes[2] = "fillcolor=\"gray\"";
+            return attributes;
+        }
+        else {
+            return super.getAttributes(op);
+        }
+    }
+
+    @Override
+    protected MultiMap<Operator, BaseOperatorPlan> 
+        getMultiInputNestedPlans(Operator op) {
+        
+        if(op instanceof LOCogroup){
+            MultiMap<Operator, BaseOperatorPlan> planMap = new MultiMap<Operator, BaseOperatorPlan>();
+            for (Integer i : ((LOCogroup)op).getExpressionPlans().keySet()) {
+                List<BaseOperatorPlan> plans = new ArrayList<BaseOperatorPlan>();
+                plans.addAll(((LOCogroup)op).getExpressionPlans().get(i));
+                Operator pred = plan.getPredecessors(op).get(i);
+                planMap.put(pred, plans);
+            }
+            return  planMap;
+        }
+        else if(op instanceof LOJoin){
+            MultiMap<Operator, BaseOperatorPlan> planMap = new MultiMap<Operator, BaseOperatorPlan>();
+            for (Integer i : ((LOJoin)op).getExpressionPlans().keySet()) {
+                List<BaseOperatorPlan> plans = new ArrayList<BaseOperatorPlan>();
+                plans.addAll(((LOJoin)op).getExpressionPlans().get(i));
+                Operator pred = plan.getPredecessors(op).get(i);
+                planMap.put(pred, plans);
+            }
+            return  planMap;
+        }
+        return new MultiMap<Operator, BaseOperatorPlan>();
+    }
+
+    @Override
+    protected Collection<BaseOperatorPlan> getNestedPlans(Operator op) {
+        Collection<BaseOperatorPlan> plans = new LinkedList<BaseOperatorPlan>();
+
+        if(op instanceof LOFilter){
+            plans.add(((LOFilter)op).getFilterPlan());
+        }
+        else if(op instanceof LOForEach){
+            plans.add(((LOForEach)op).getInnerPlan());
+        }
+        else if(op instanceof LOGenerate){
+            plans.addAll(((LOGenerate)op).getOutputPlans());
+        }
+        else if(op instanceof LOSort){
+            plans.addAll(((LOSort)op).getSortColPlans()); 
+        }
+        else if(op instanceof LOSplitOutput){
+            plans.add(((LOSplitOutput)op).getFilterPlan());
+        }
+        
+        return plans;
+    }
+    
+    @Override
+    protected boolean reverse(BaseOperatorPlan plan) {
+        if (plan instanceof LogicalExpressionPlan)
+            return true;
+        return false;
+    }
+}

Modified: pig/branches/branch-0.9/src/org/apache/pig/newplan/logical/relational/LogicalPlan.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.9/src/org/apache/pig/newplan/logical/relational/LogicalPlan.java?rev=1098887&r1=1098886&r2=1098887&view=diff
==============================================================================
--- pig/branches/branch-0.9/src/org/apache/pig/newplan/logical/relational/LogicalPlan.java (original)
+++ pig/branches/branch-0.9/src/org/apache/pig/newplan/logical/relational/LogicalPlan.java Tue May  3 01:31:33 2011
@@ -25,9 +25,11 @@ import java.util.Iterator;
 import java.util.List;
 
 import org.apache.pig.impl.logicalLayer.FrontendException;
+import org.apache.pig.impl.logicalLayer.LOPrinter;
 import org.apache.pig.newplan.BaseOperatorPlan;
 import org.apache.pig.newplan.Operator;
 import org.apache.pig.newplan.OperatorPlan;
+import org.apache.pig.newplan.logical.DotLOPrinter;
 import org.apache.pig.newplan.logical.optimizer.LogicalPlanPrinter;
 
 /**
@@ -76,8 +78,14 @@ public class LogicalPlan extends BaseOpe
         ps.println("# New Logical Plan:");
         ps.println("#-----------------------------------------------");
 
-        LogicalPlanPrinter npp = new LogicalPlanPrinter(this, ps);
-        npp.visit();
+        if (format.equals("text")) {
+            LogicalPlanPrinter npp = new LogicalPlanPrinter(this, ps);
+            npp.visit();
+        } else if (format.equals("dot")) {
+            DotLOPrinter lpp = new DotLOPrinter(this, ps);
+            lpp.dump();
+            ps.println("");
+        }
     }
 
     public Operator findByAlias(String alias) {

Modified: pig/branches/branch-0.9/test/org/apache/pig/test/TestEvalPipelineLocal.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.9/test/org/apache/pig/test/TestEvalPipelineLocal.java?rev=1098887&r1=1098886&r2=1098887&view=diff
==============================================================================
--- pig/branches/branch-0.9/test/org/apache/pig/test/TestEvalPipelineLocal.java (original)
+++ pig/branches/branch-0.9/test/org/apache/pig/test/TestEvalPipelineLocal.java Tue May  3 01:31:33 2011
@@ -18,6 +18,7 @@
 package org.apache.pig.test;
 
 import java.io.File;
+import java.io.FileInputStream;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.PrintStream;
@@ -61,6 +62,8 @@ public class TestEvalPipelineLocal {
     
     private PigServer pigServer;
 
+    static final int MAX_SIZE = 100000;
+    
     TupleFactory mTf = TupleFactory.getInstance();
     
     @Before
@@ -1020,5 +1023,40 @@ public class TestEvalPipelineLocal {
 
         Assert.assertEquals((LOOP_COUNT * LOOP_COUNT)/2, numRows);
     }
-
+    
+    @Test
+    public void testExplainInDotGraph() throws Exception{
+        pigServer.registerQuery("a = load 'student' using " + PigStorage.class.getName() + "(':') as (name, age, gpa);");
+        pigServer.registerQuery("b = load 'voter' using " + PigStorage.class.getName() + "(',') as (name, age, registration, contributions);");
+        pigServer.registerQuery("c = filter a by age < 50;");
+        pigServer.registerQuery("d = filter b by age < 50;");
+        pigServer.registerQuery("e = cogroup c by (name, age), d by (name, age);");
+        pigServer.registerQuery("f = foreach e generate flatten(c), flatten(d);");
+        pigServer.registerQuery("g = group f by registration;");
+        pigServer.registerQuery("h = foreach g generate (chararray)group, SUM(f.d::contributions);");
+        pigServer.registerQuery("i = order h by $1;");
+        
+        File tmpFile = File.createTempFile("test", "txt");
+        PrintStream ps = new PrintStream(new FileOutputStream(tmpFile));
+        pigServer.explain("i", "dot", true, true, ps, System.out, System.out);
+        ps.close();
+        
+        FileInputStream fis1 = new FileInputStream("test/org/apache/pig/test/data/DotFiles/explain1.dot");
+        byte[] b1 = new byte[MAX_SIZE];
+        fis1.read(b1);
+        String goldenPlan = new String(b1);
+        goldenPlan = goldenPlan.trim();
+        // Filter out the random number generated on hash
+        goldenPlan = goldenPlan.replaceAll("\\d{3,}", "");
+        
+        FileInputStream fis2 = new FileInputStream(tmpFile);
+        byte[] b2 = new byte[MAX_SIZE];
+        fis2.read(b2);
+        String realPlan = new String(b2);
+        realPlan = realPlan.trim();
+        // Filter out the random number generated on hash
+        realPlan = realPlan.replaceAll("\\d{3,}", "");
+        
+        Assert.assertEquals(realPlan, goldenPlan);
+    }
 }

Added: pig/branches/branch-0.9/test/org/apache/pig/test/data/DotFiles/explain1.dot
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.9/test/org/apache/pig/test/data/DotFiles/explain1.dot?rev=1098887&view=auto
==============================================================================
--- pig/branches/branch-0.9/test/org/apache/pig/test/data/DotFiles/explain1.dot (added)
+++ pig/branches/branch-0.9/test/org/apache/pig/test/data/DotFiles/explain1.dot Tue May  3 01:31:33 2011
@@ -0,0 +1,211 @@
+#-----------------------------------------------
+# New Logical Plan:
+#-----------------------------------------------
+digraph plan {
+compound=true;
+node [shape=rect];
+s20945327_in [label="", style=invis, height=0, width=0];
+s20945327_out [label="", style=invis, height=0, width=0];
+subgraph cluster_20945327 {
+label="LOSort"labelloc=b;
+8569405 [label="Project0:1"];
+s20945327_in -> 8569405 [style=invis];
+};
+8569405 -> s20945327_out [style=invis];
+s22487327_in [label="", style=invis, height=0, width=0];
+s22487327_out [label="", style=invis, height=0, width=0];
+subgraph cluster_22487327 {
+label="LOForEach"labelloc=b;
+24869286 [label="LOInnerLoad"];
+24086580 [label="LOInnerLoad"];
+s8819824_in [label="", style=invis, height=0, width=0];
+s8819824_out [label="", style=invis, height=0, width=0];
+subgraph cluster_8819824 {
+label="LOGenerate"labelloc=b;
+25512760 [label="Project0:(*)"];
+885172 [label="Cast"];
+25512760 -> 885172
+s8819824_in -> 25512760 [style=invis];
+12482151 [label="Project1:(*)"];
+5210968 [label="Dereference"];
+3687978 [label="UserFunc"];
+12482151 -> 5210968
+5210968 -> 3687978
+s8819824_in -> 12482151 [style=invis];
+};
+885172 -> s8819824_out [style=invis];
+3687978 -> s8819824_out [style=invis];
+24869286 -> s8819824_in [lhead=cluster_8819824]
+24086580 -> s8819824_in [lhead=cluster_8819824]
+s22487327_in -> 24869286 [style=invis];
+s22487327_in -> 24086580 [style=invis];
+};
+s8819824_out -> s22487327_out [style=invis];
+s18259890_out [label="", style=invis, height=0, width=0];
+subgraph cluster_18259890 {
+label="LOCogroup"labelloc=b;
+subgraph cluster_18259890_4750048 {
+label="";
+s18259890_4750048_in [label="", style=invis, height=0, width=0];
+5148380 [label="Project0:5"];
+s18259890_4750048_in -> 5148380 [style=invis];
+};
+};
+5148380 -> s18259890_out [style=invis];
+s4750048_in [label="", style=invis, height=0, width=0];
+s4750048_out [label="", style=invis, height=0, width=0];
+subgraph cluster_4750048 {
+label="LOForEach"labelloc=b;
+2915013 [label="LOInnerLoad"];
+3160672 [label="LOInnerLoad"];
+s23258883_in [label="", style=invis, height=0, width=0];
+s23258883_out [label="", style=invis, height=0, width=0];
+subgraph cluster_23258883 {
+label="LOGenerate"labelloc=b;
+8069610 [label="Project0:(*)"];
+s23258883_in -> 8069610 [style=invis];
+32482448 [label="Project1:(*)"];
+s23258883_in -> 32482448 [style=invis];
+};
+8069610 -> s23258883_out [style=invis];
+32482448 -> s23258883_out [style=invis];
+2915013 -> s23258883_in [lhead=cluster_23258883]
+3160672 -> s23258883_in [lhead=cluster_23258883]
+s4750048_in -> 2915013 [style=invis];
+s4750048_in -> 3160672 [style=invis];
+};
+s23258883_out -> s4750048_out [style=invis];
+s8814509_out [label="", style=invis, height=0, width=0];
+subgraph cluster_8814509 {
+label="LOCogroup"labelloc=b;
+subgraph cluster_8814509_1726320 {
+label="";
+s8814509_1726320_in [label="", style=invis, height=0, width=0];
+2645268 [label="Project0:0"];
+s8814509_1726320_in -> 2645268 [style=invis];
+32960257 [label="Project0:1"];
+s8814509_1726320_in -> 32960257 [style=invis];
+};
+subgraph cluster_8814509_4359463 {
+label="";
+s8814509_4359463_in [label="", style=invis, height=0, width=0];
+12928596 [label="Project1:0"];
+s8814509_4359463_in -> 12928596 [style=invis];
+25979266 [label="Project1:1"];
+s8814509_4359463_in -> 25979266 [style=invis];
+};
+};
+2645268 -> s8814509_out [style=invis];
+32960257 -> s8814509_out [style=invis];
+12928596 -> s8814509_out [style=invis];
+25979266 -> s8814509_out [style=invis];
+s22811631_in [label="", style=invis, height=0, width=0];
+s22811631_out [label="", style=invis, height=0, width=0];
+subgraph cluster_22811631 {
+label="LOFilter"labelloc=b;
+17796836 [label="Project0:1"];
+28488784 [label="Constant"];
+7224872 [label="LessThan"];
+20319379 [label="Cast"];
+20319379 -> 7224872
+28488784 -> 7224872
+17796836 -> 20319379
+s22811631_in -> 17796836 [style=invis];
+s22811631_in -> 28488784 [style=invis];
+};
+7224872 -> s22811631_out [style=invis];
+s4968819_in [label="", style=invis, height=0, width=0];
+s4968819_out [label="", style=invis, height=0, width=0];
+subgraph cluster_4968819 {
+label="LOFilter"labelloc=b;
+24814248 [label="Project0:1"];
+1491648 [label="Constant"];
+5039143 [label="LessThan"];
+24356426 [label="Cast"];
+24356426 -> 5039143
+1491648 -> 5039143
+24814248 -> 24356426
+s4968819_in -> 24814248 [style=invis];
+s4968819_in -> 1491648 [style=invis];
+};
+5039143 -> s4968819_out [style=invis];
+2153655 [label="LOLoad", style="filled", fillcolor="gray"];
+27519670 [label="LOLoad", style="filled", fillcolor="gray"];
+16412781 [label="LOStore", style="filled", fillcolor="gray"];
+s1726320_in [label="", style=invis, height=0, width=0];
+s1726320_out [label="", style=invis, height=0, width=0];
+subgraph cluster_1726320 {
+label="LOForEach"labelloc=b;
+s23235469_in [label="", style=invis, height=0, width=0];
+s23235469_out [label="", style=invis, height=0, width=0];
+subgraph cluster_23235469 {
+label="LOGenerate"labelloc=b;
+8172621 [label="Project0:(*)"];
+s23235469_in -> 8172621 [style=invis];
+18957862 [label="Project1:(*)"];
+s23235469_in -> 18957862 [style=invis];
+25468335 [label="Project2:(*)"];
+s23235469_in -> 25468335 [style=invis];
+};
+8172621 -> s23235469_out [style=invis];
+18957862 -> s23235469_out [style=invis];
+25468335 -> s23235469_out [style=invis];
+12062492 [label="LOInnerLoad"];
+31985466 [label="LOInnerLoad"];
+4179068 [label="LOInnerLoad"];
+12062492 -> s23235469_in [lhead=cluster_23235469]
+31985466 -> s23235469_in [lhead=cluster_23235469]
+4179068 -> s23235469_in [lhead=cluster_23235469]
+s1726320_in -> 12062492 [style=invis];
+s1726320_in -> 31985466 [style=invis];
+s1726320_in -> 4179068 [style=invis];
+};
+s23235469_out -> s1726320_out [style=invis];
+s4359463_in [label="", style=invis, height=0, width=0];
+s4359463_out [label="", style=invis, height=0, width=0];
+subgraph cluster_4359463 {
+label="LOForEach"labelloc=b;
+s20182749_in [label="", style=invis, height=0, width=0];
+s20182749_out [label="", style=invis, height=0, width=0];
+subgraph cluster_20182749 {
+label="LOGenerate"labelloc=b;
+3745812 [label="Project0:(*)"];
+s20182749_in -> 3745812 [style=invis];
+7762850 [label="Project1:(*)"];
+s20182749_in -> 7762850 [style=invis];
+10589182 [label="Project2:(*)"];
+s20182749_in -> 10589182 [style=invis];
+33238777 [label="Project3:(*)"];
+s20182749_in -> 33238777 [style=invis];
+};
+3745812 -> s20182749_out [style=invis];
+7762850 -> s20182749_out [style=invis];
+10589182 -> s20182749_out [style=invis];
+33238777 -> s20182749_out [style=invis];
+9719229 [label="LOInnerLoad"];
+8115306 [label="LOInnerLoad"];
+28745811 [label="LOInnerLoad"];
+14141119 [label="LOInnerLoad"];
+9719229 -> s20182749_in [lhead=cluster_20182749]
+8115306 -> s20182749_in [lhead=cluster_20182749]
+28745811 -> s20182749_in [lhead=cluster_20182749]
+14141119 -> s20182749_in [lhead=cluster_20182749]
+s4359463_in -> 9719229 [style=invis];
+s4359463_in -> 8115306 [style=invis];
+s4359463_in -> 28745811 [style=invis];
+s4359463_in -> 14141119 [style=invis];
+};
+s20182749_out -> s4359463_out [style=invis];
+s20945327_out -> 16412781
+s22487327_out -> s20945327_in [lhead=cluster_20945327]
+s18259890_out -> s22487327_in [lhead=cluster_22487327]
+s4750048_out -> s18259890_4750048_in [lhead=cluster_18259890_4750048]
+s8814509_out -> s4750048_in [lhead=cluster_4750048]
+s22811631_out -> s1726320_in [lhead=cluster_1726320]
+s4968819_out -> s4359463_in [lhead=cluster_4359463]
+2153655 -> s22811631_in [lhead=cluster_22811631]
+27519670 -> s4968819_in [lhead=cluster_4968819]
+s1726320_out -> s8814509_1726320_in [lhead=cluster_8814509_1726320]
+s4359463_out -> s8814509_4359463_in [lhead=cluster_8814509_4359463]
+}
+