You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by da...@apache.org on 2011/05/03 03:31:33 UTC
svn commit: r1098887 - in /pig/branches/branch-0.9: ./
src/org/apache/pig/newplan/ src/org/apache/pig/newplan/logical/
src/org/apache/pig/newplan/logical/relational/ test/org/apache/pig/test/
test/org/apache/pig/test/data/DotFiles/
Author: daijy
Date: Tue May 3 01:31:33 2011
New Revision: 1098887
URL: http://svn.apache.org/viewvc?rev=1098887&view=rev
Log:
PIG-2016: -dot option does not work with explain and new logical plan
Added:
pig/branches/branch-0.9/src/org/apache/pig/newplan/DotPlanDumper.java
pig/branches/branch-0.9/src/org/apache/pig/newplan/PlanDumper.java
pig/branches/branch-0.9/src/org/apache/pig/newplan/logical/DotLOPrinter.java
pig/branches/branch-0.9/test/org/apache/pig/test/data/DotFiles/explain1.dot
Modified:
pig/branches/branch-0.9/CHANGES.txt
pig/branches/branch-0.9/src/org/apache/pig/newplan/logical/relational/LogicalPlan.java
pig/branches/branch-0.9/test/org/apache/pig/test/TestEvalPipelineLocal.java
Modified: pig/branches/branch-0.9/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.9/CHANGES.txt?rev=1098887&r1=1098886&r2=1098887&view=diff
==============================================================================
--- pig/branches/branch-0.9/CHANGES.txt (original)
+++ pig/branches/branch-0.9/CHANGES.txt Tue May 3 01:31:33 2011
@@ -174,6 +174,8 @@ PIG-1696: Performance: Use System.arrayc
BUG FIXES
+PIG-2016: -dot option does not work with explain and new logical plan (daijy)
+
PIG-2018: NPE for co-group with group-by column having complex schema and
different load functions for each input (thejas)
Added: pig/branches/branch-0.9/src/org/apache/pig/newplan/DotPlanDumper.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.9/src/org/apache/pig/newplan/DotPlanDumper.java?rev=1098887&view=auto
==============================================================================
--- pig/branches/branch-0.9/src/org/apache/pig/newplan/DotPlanDumper.java (added)
+++ pig/branches/branch-0.9/src/org/apache/pig/newplan/DotPlanDumper.java Tue May 3 01:31:33 2011
@@ -0,0 +1,345 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.newplan;
+
+import java.io.PrintStream;
+import java.util.Collection;
+import java.util.List;
+import java.util.Set;
+import java.util.HashSet;
+
+import org.apache.pig.impl.util.MultiMap;
+
+/**
+ * This class puts everything that is needed to dump a plan in a
+ * format readable by graphviz's dot algorithm. Out of the box it does
+ * not print any nested plans.
+ */
+public class DotPlanDumper extends PlanDumper {
+
+ protected Set<Operator> mSubgraphs;
+ protected Set<Operator> mMultiInputSubgraphs;
+ protected Set<Operator> mMultiOutputSubgraphs;
+ private boolean isSubGraph = false;
+
+ public DotPlanDumper(BaseOperatorPlan plan, PrintStream ps) {
+ this(plan, ps, false, new HashSet<Operator>(), new HashSet<Operator>(),
+ new HashSet<Operator>());
+ }
+
+ protected DotPlanDumper(BaseOperatorPlan plan, PrintStream ps, boolean isSubGraph,
+ Set<Operator> mSubgraphs,
+ Set<Operator> mMultiInputSubgraphs,
+ Set<Operator> mMultiOutputSubgraphs) {
+ super(plan, ps);
+ this.isSubGraph = isSubGraph;
+ this.mSubgraphs = mSubgraphs;
+ this.mMultiInputSubgraphs = mMultiInputSubgraphs;
+ this.mMultiOutputSubgraphs = mMultiOutputSubgraphs;
+ }
+
+ @Override
+ public void dump() {
+ if (!isSubGraph) {
+ ps.println("digraph plan {");
+ ps.println("compound=true;");
+ ps.println("node [shape=rect];");
+ }
+ super.dump();
+ if (!isSubGraph) {
+ ps.println("}");
+ }
+ }
+
+ @Override
+ protected void dumpMultiInputNestedOperator(Operator op, MultiMap<Operator, BaseOperatorPlan> plans) {
+ dumpInvisibleOutput(op);
+
+ ps.print("subgraph ");
+ ps.print(getClusterID(op));
+ ps.println(" {");
+ join("; ", getAttributes(op));
+ ps.println("labelloc=b;");
+
+ mMultiInputSubgraphs.add(op);
+
+ for (Operator o: plans.keySet()) {
+ ps.print("subgraph ");
+ ps.print(getClusterID(op, o));
+ ps.println(" {");
+ ps.println("label=\"\";");
+ dumpInvisibleInput(op, o);
+ for (BaseOperatorPlan plan : plans.get(o)) {
+ PlanDumper dumper = makeDumper(plan, ps);
+ dumper.dump();
+ connectInvisibleInput(op, o, plan);
+ }
+ ps.println("};");
+ }
+ ps.println("};");
+
+ for (Operator o: plans.keySet()) {
+ for (BaseOperatorPlan plan: plans.get(o)) {
+ connectInvisibleOutput(op, plan);
+ }
+ }
+ }
+
+ @Override
+ protected void dumpMultiOutputNestedOperator(Operator op, Collection<BaseOperatorPlan> plans) {
+ super.dumpMultiOutputNestedOperator(op, plans);
+
+ mMultiOutputSubgraphs.add(op);
+
+ dumpInvisibleOutput(op);
+ for (BaseOperatorPlan plan: plans) {
+ connectInvisibleOutput(op, plan);
+ }
+ }
+
+ @Override
+ protected void dumpNestedOperator(Operator op, Collection<BaseOperatorPlan> plans) {
+ dumpInvisibleOperators(op);
+ ps.print("subgraph ");
+ ps.print(getClusterID(op));
+ ps.println(" {");
+ join("; ", getAttributes(op));
+ ps.println("labelloc=b;");
+
+ mSubgraphs.add(op);
+
+ for (BaseOperatorPlan plan: plans) {
+ PlanDumper dumper = makeDumper(plan, ps);
+ dumper.dump();
+ connectInvisibleInput(op, plan);
+ }
+ ps.println("};");
+
+ for (BaseOperatorPlan plan: plans) {
+ connectInvisibleOutput(op, plan);
+ }
+ }
+
+ @Override
+ protected void dumpOperator(Operator op) {
+ ps.print(getID(op));
+ ps.print(" [");
+ join(", ", getAttributes(op));
+ ps.println("];");
+ }
+
+ @Override
+ protected void dumpEdge(Operator op, Operator suc) {
+ String in = getID(op);
+ String out = getID(suc);
+ String attributes = "";
+
+ if (mMultiInputSubgraphs.contains(op)
+ || mSubgraphs.contains(op)
+ || mMultiOutputSubgraphs.contains(op)) {
+ in = getSubgraphID(op, false);
+ }
+
+ if (mMultiInputSubgraphs.contains(suc)) {
+ out = getSubgraphID(suc, op, true);
+ attributes = " [lhead="+getClusterID(suc,op)+"]";
+ }
+
+ if (mSubgraphs.contains(suc)) {
+ out = getSubgraphID(suc, true);
+ attributes = " [lhead="+getClusterID(suc)+"]";
+ }
+
+ if (reverse(plan)) {
+ ps.print(out);
+ ps.print(" -> ");
+ ps.print(in);
+ } else {
+ ps.print(in);
+ ps.print(" -> ");
+ ps.print(out);
+ }
+ ps.println(attributes);
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ protected PlanDumper makeDumper(BaseOperatorPlan plan, PrintStream ps) {
+ return new DotPlanDumper(plan, ps, true,
+ mSubgraphs, mMultiInputSubgraphs,
+ mMultiOutputSubgraphs);
+ }
+
+ /**
+ * Used to generate the label for an operator.
+ * @param op operator to dump
+ */
+ protected String getName(Operator op) {
+ return op.getName();
+ }
+
+ /**
+ * Used to generate the the attributes of a node
+ * @param op operator
+ */
+ protected String[] getAttributes(Operator op) {
+ String[] attributes = new String[1];
+ attributes[0] = "label=\""+getName(op)+"\"";
+ return attributes;
+ }
+
+
+ private void connectInvisibleInput(Operator op1, Operator op2, BaseOperatorPlan plan) {
+ String in = getSubgraphID(op1, op2, true);
+
+ List<Operator> sources;
+ if (reverse(plan))
+ sources = plan.getSinks();
+ else
+ sources = plan.getSources();
+
+ for (Operator l: sources) {
+ dumpInvisibleEdge(in, getID(l));
+ }
+ }
+
+ private void connectInvisibleInput(Operator op, BaseOperatorPlan plan) {
+ String in = getSubgraphID(op, true);
+
+ List<Operator> sources;
+ if (reverse(plan))
+ sources = plan.getSinks();
+ else
+ sources = plan.getSources();
+
+ for (Operator l: sources) {
+ String out;
+ if (mSubgraphs.contains(l) || mMultiInputSubgraphs.contains(l)) {
+ out = getSubgraphID(l, true);
+ } else {
+ out = getID(l);
+ }
+
+ dumpInvisibleEdge(in, out);
+ }
+ }
+
+ private void connectInvisibleOutput(Operator op,
+ BaseOperatorPlan plan) {
+ String out = getSubgraphID(op, false);
+
+ List<Operator> sinks;
+ if (reverse(plan))
+ sinks = plan.getSources();
+ else
+ sinks = plan.getSinks();
+
+ for (Operator l: sinks) {
+ String in;
+ if (mSubgraphs.contains(l)
+ || mMultiInputSubgraphs.contains(l)
+ || mMultiOutputSubgraphs.contains(l)) {
+ in = getSubgraphID(l, false);
+ } else {
+ in = getID(l);
+ }
+
+ dumpInvisibleEdge(in, out);
+ }
+ }
+
+ private void connectInvisible(Operator op, BaseOperatorPlan plan) {
+ connectInvisibleInput(op, plan);
+ connectInvisibleOutput(op, plan);
+ }
+
+ private void dumpInvisibleInput(Operator op1, Operator op2) {
+ ps.print(getSubgraphID(op1, op2, true));
+ ps.print(" ");
+ ps.print(getInvisibleAttributes(op1));
+ ps.println(";");
+ }
+
+ private void dumpInvisibleInput(Operator op) {
+ ps.print(getSubgraphID(op, true));
+ ps.print(" ");
+ ps.print(getInvisibleAttributes(op));
+ ps.println(";");
+ }
+
+ private void dumpInvisibleOutput(Operator op) {
+ ps.print(getSubgraphID(op, false));
+ ps.print(" ");
+ ps.print(getInvisibleAttributes(op));
+ ps.println(";");
+ }
+
+ protected void dumpInvisibleOperators(Operator op) {
+ dumpInvisibleInput(op);
+ dumpInvisibleOutput(op);
+ }
+
+ private String getClusterID(Operator op1, Operator op2) {
+ return getClusterID(op1)+"_"+getID(op2);
+ }
+
+ private String getClusterID(Operator op) {
+ return "cluster_"+getID(op);
+ }
+
+ private String getSubgraphID(Operator op1, Operator op2, boolean in) {
+ String id = "s"+getID(op1)+"_"+getID(op2);
+ if (in) {
+ id += "_in";
+ }
+ else {
+ id += "_out";
+ }
+ return id;
+ }
+
+ private String getSubgraphID(Operator op, boolean in) {
+ String id = "s"+getID(op);
+ if (in) {
+ id += "_in";
+ }
+ else {
+ id += "_out";
+ }
+ return id;
+ }
+
+ private String getID(Operator op) {
+ return ""+Math.abs(op.hashCode());
+ }
+
+ private String getInvisibleAttributes(Operator op) {
+ return "[label=\"\", style=invis, height=0, width=0]";
+ }
+
+ private void dumpInvisibleEdge(String op, String suc) {
+ ps.print(op);
+ ps.print(" -> ");
+ ps.print(suc);
+ ps.println(" [style=invis];");
+ }
+
+ protected boolean reverse(BaseOperatorPlan plan) {
+ return false;
+ }
+}
Added: pig/branches/branch-0.9/src/org/apache/pig/newplan/PlanDumper.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.9/src/org/apache/pig/newplan/PlanDumper.java?rev=1098887&view=auto
==============================================================================
--- pig/branches/branch-0.9/src/org/apache/pig/newplan/PlanDumper.java (added)
+++ pig/branches/branch-0.9/src/org/apache/pig/newplan/PlanDumper.java Tue May 3 01:31:33 2011
@@ -0,0 +1,216 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.newplan;
+
+import java.io.PrintStream;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.Collection;
+import org.apache.pig.impl.util.MultiMap;
+
+/**
+ * This class dumps a nested plan to a print stream. It does not walk
+ * the graph in any particular fashion it merely iterates over all
+ * operators and edges and calls a corresponding dump function. If a
+ * node of the plan has nested plans this will be dumped when the
+ * node is handled.
+ */
+public class PlanDumper {
+
+ protected PrintStream ps;
+ protected BaseOperatorPlan plan;
+ protected boolean isVerbose = true;
+
+ public PlanDumper(BaseOperatorPlan plan, PrintStream ps) {
+ this.plan = plan;
+ this.ps = ps;
+ }
+
+ public void setVerbose(boolean verbose) {
+ this.isVerbose = verbose;
+ }
+
+ public boolean isVerbose() {
+ return isVerbose;
+ }
+
+ /**
+ * This is the public interface. Dump writes the plan and nested
+ * plans to the stream.
+ */
+ public void dump() {
+ Iterator<Operator> iter = plan.getOperators();
+ while (iter.hasNext()) {
+ Operator op = iter.next();
+ MultiMap<Operator,BaseOperatorPlan> map = getMultiInputNestedPlans(op);
+ if (isVerbose && !map.isEmpty()) {
+ dumpMultiInputNestedOperator(op, map);
+ continue;
+ }
+
+ Collection<BaseOperatorPlan> plans = getMultiOutputNestedPlans(op);
+ if (plans.size() > 0) {
+ dumpMultiOutputNestedOperator(op, plans);
+ continue;
+ }
+
+ plans = getNestedPlans(op);
+ if (isVerbose && plans.size() > 0) {
+ dumpNestedOperator(op, plans);
+ continue;
+ }
+
+ dumpOperator(op);
+ }
+
+ iter = plan.getOperators();
+ while (iter.hasNext()) {
+ Operator op = iter.next();
+ Collection<Operator> successors = plan.getSuccessors(op);
+ if (successors != null) {
+ for (Operator suc: successors) {
+ dumpEdge(op, suc);
+ }
+ }
+ }
+ }
+
+ /**
+ * makeDumper is a factory method. Used by subclasses to specify
+ * what dumper should handle the nested plan.
+ * @param plan Plan that the new dumper should handle
+ * @return the dumper for plan
+ */
+ @SuppressWarnings("unchecked")
+ protected PlanDumper makeDumper(BaseOperatorPlan plan, PrintStream ps) {
+ return new PlanDumper(plan, ps);
+ }
+
+ /**
+ * Will be called to dump a simple operator
+ * @param op the operator to be dumped
+ */
+ protected void dumpOperator(Operator op) {
+ ps.println(op.getName().replace(" ","_"));
+ }
+
+ /**
+ * Will be called when an operator has nested plans, which are
+ * connected to one of the multiple inputs.
+ * @param op the nested operator
+ * @param plans a map of input operator to connected nested plan
+ */
+ protected void dumpMultiInputNestedOperator(Operator op, MultiMap<Operator,BaseOperatorPlan> plans) {
+ dumpOperator(op);
+ for (Operator aop: plans.keySet()) {
+ for (BaseOperatorPlan plan: plans.get(aop)) {
+ PlanDumper dumper = makeDumper(plan, ps);
+ dumper.dump();
+ }
+ }
+ }
+
+ /**
+ * Will be called for nested operators, where the plans represent
+ * how the output of the operator is processed.
+ * @param op the nested operator
+ * @param plans a collection of sub plans.
+ */
+ protected void dumpMultiOutputNestedOperator(Operator op, Collection<BaseOperatorPlan> plans) {
+ dumpOperator(op);
+ for (BaseOperatorPlan plan: plans) {
+ PlanDumper dumper = makeDumper(plan, ps);
+ dumper.dump();
+ for (Operator p: plan.getSources()) {
+ dumpEdge(op, p);
+ }
+ }
+ }
+
+ /**
+ * Will be called for nested operators. The operators are not
+ * specifically connected to any input or output operators of E
+ * @param op the nested operator
+ * @param plans a collection of sub plans.
+ */
+ protected void dumpNestedOperator(Operator op, Collection<BaseOperatorPlan> plans) {
+ dumpOperator(op);
+ for (BaseOperatorPlan plan: plans) {
+ PlanDumper dumper = makeDumper(plan, ps);
+ dumper.dump();
+ }
+ }
+
+ /**
+ * Will be called to dump the edges of the plan. Each edge results
+ * in one call.
+ * @param op tail of the edge
+ * @param suc head of the edge
+ */
+ protected void dumpEdge(Operator op, Operator suc) {
+ ps.println(op.getName()+" -> "+suc.getName());
+ }
+
+ /**
+ * Used to determine if an operator has nested plans, which are
+ * connected to specific input operators.
+ * @param op operator
+ * @return Map describing the input to nested plan relationship.
+ */
+ protected MultiMap<Operator, BaseOperatorPlan> getMultiInputNestedPlans(Operator op) {
+ return new MultiMap<Operator, BaseOperatorPlan>();
+ }
+
+ /**
+ * Used to determine if an operator has nested output plans
+ *
+ * @param op operator
+ * @return Map describing the input to nested plan relationship.
+ */
+ protected Collection<BaseOperatorPlan> getMultiOutputNestedPlans(Operator op) {
+ return new LinkedList<BaseOperatorPlan>();
+ }
+
+ /**
+ * Used to determine if an operator has nested plans (without
+ * connections to in- or output operators.
+ * @param op operator
+ * @return Collection of nested plans.
+ */
+ protected Collection<BaseOperatorPlan> getNestedPlans(Operator op) {
+ return new LinkedList<BaseOperatorPlan>();
+ }
+
+ /**
+ * Helper function to print a string array.
+ * @param sep Separator
+ * @param strings Array to print
+ */
+ protected void join(String sep, String[] strings) {
+ if (strings == null) {
+ return;
+ }
+
+ for (int i = 0; i < strings.length; ++i) {
+ if (i != 0) {
+ ps.print(sep);
+ }
+ ps.print(strings[i]);
+ }
+ }
+}
Added: pig/branches/branch-0.9/src/org/apache/pig/newplan/logical/DotLOPrinter.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.9/src/org/apache/pig/newplan/logical/DotLOPrinter.java?rev=1098887&view=auto
==============================================================================
--- pig/branches/branch-0.9/src/org/apache/pig/newplan/logical/DotLOPrinter.java (added)
+++ pig/branches/branch-0.9/src/org/apache/pig/newplan/logical/DotLOPrinter.java Tue May 3 01:31:33 2011
@@ -0,0 +1,158 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.newplan.logical;
+
+import java.io.PrintStream;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.LinkedList;
+import java.util.Collection;
+import org.apache.pig.impl.util.MultiMap;
+import org.apache.pig.newplan.BaseOperatorPlan;
+import org.apache.pig.newplan.DotPlanDumper;
+import org.apache.pig.newplan.Operator;
+import org.apache.pig.newplan.logical.expression.LogicalExpressionPlan;
+import org.apache.pig.newplan.logical.expression.ProjectExpression;
+import org.apache.pig.newplan.logical.relational.LOCogroup;
+import org.apache.pig.newplan.logical.relational.LOFilter;
+import org.apache.pig.newplan.logical.relational.LOForEach;
+import org.apache.pig.newplan.logical.relational.LOGenerate;
+import org.apache.pig.newplan.logical.relational.LOJoin;
+import org.apache.pig.newplan.logical.relational.LOLoad;
+import org.apache.pig.newplan.logical.relational.LOSort;
+import org.apache.pig.newplan.logical.relational.LOSplitOutput;
+import org.apache.pig.newplan.logical.relational.LOStore;
+
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * This class can print a logical plan in the DOT format. It uses
+ * clusters to illustrate nesting. If "verbose" is off, it will skip
+ * any nesting.
+ */
+public class DotLOPrinter extends DotPlanDumper {
+
+ public DotLOPrinter(BaseOperatorPlan plan, PrintStream ps) {
+ this(plan, ps, false, new HashSet<Operator>(), new HashSet<Operator>(),
+ new HashSet<Operator>());
+ }
+
+ private DotLOPrinter(BaseOperatorPlan plan, PrintStream ps, boolean isSubGraph,
+ Set<Operator> subgraphs,
+ Set<Operator> multiInSubgraphs,
+ Set<Operator> multiOutSubgraphs) {
+ super(plan, ps, isSubGraph, subgraphs,
+ multiInSubgraphs, multiOutSubgraphs);
+ }
+
+ @Override
+ protected DotPlanDumper makeDumper(BaseOperatorPlan plan, PrintStream ps) {
+ return new DotLOPrinter(plan, ps, true, mSubgraphs,
+ mMultiInputSubgraphs,
+ mMultiOutputSubgraphs);
+ }
+
+ @Override
+ protected String getName(Operator op) {
+ StringBuffer info = new StringBuffer(op.getName());
+ if (op instanceof ProjectExpression) {
+ ProjectExpression pr = (ProjectExpression)op;
+ info.append(pr.getInputNum());
+ info.append(":");
+ if (pr.isProjectStar())
+ info.append("(*)");
+ else if (pr.isRangeProject())
+ info.append("[").append(pr.getStartCol()).append(" .. ").append(pr.getEndCol()).append("]");
+ else
+ info.append(pr.getColNum());
+ }
+ return info.toString();
+ }
+
+ @Override
+ protected String[] getAttributes(Operator op) {
+ if (op instanceof LOStore || op instanceof LOLoad) {
+ String[] attributes = new String[3];
+ attributes[0] = "label=\""+getName(op).replace(":",",\\n")+"\"";
+ attributes[1] = "style=\"filled\"";
+ attributes[2] = "fillcolor=\"gray\"";
+ return attributes;
+ }
+ else {
+ return super.getAttributes(op);
+ }
+ }
+
+ @Override
+ protected MultiMap<Operator, BaseOperatorPlan>
+ getMultiInputNestedPlans(Operator op) {
+
+ if(op instanceof LOCogroup){
+ MultiMap<Operator, BaseOperatorPlan> planMap = new MultiMap<Operator, BaseOperatorPlan>();
+ for (Integer i : ((LOCogroup)op).getExpressionPlans().keySet()) {
+ List<BaseOperatorPlan> plans = new ArrayList<BaseOperatorPlan>();
+ plans.addAll(((LOCogroup)op).getExpressionPlans().get(i));
+ Operator pred = plan.getPredecessors(op).get(i);
+ planMap.put(pred, plans);
+ }
+ return planMap;
+ }
+ else if(op instanceof LOJoin){
+ MultiMap<Operator, BaseOperatorPlan> planMap = new MultiMap<Operator, BaseOperatorPlan>();
+ for (Integer i : ((LOJoin)op).getExpressionPlans().keySet()) {
+ List<BaseOperatorPlan> plans = new ArrayList<BaseOperatorPlan>();
+ plans.addAll(((LOJoin)op).getExpressionPlans().get(i));
+ Operator pred = plan.getPredecessors(op).get(i);
+ planMap.put(pred, plans);
+ }
+ return planMap;
+ }
+ return new MultiMap<Operator, BaseOperatorPlan>();
+ }
+
+ @Override
+ protected Collection<BaseOperatorPlan> getNestedPlans(Operator op) {
+ Collection<BaseOperatorPlan> plans = new LinkedList<BaseOperatorPlan>();
+
+ if(op instanceof LOFilter){
+ plans.add(((LOFilter)op).getFilterPlan());
+ }
+ else if(op instanceof LOForEach){
+ plans.add(((LOForEach)op).getInnerPlan());
+ }
+ else if(op instanceof LOGenerate){
+ plans.addAll(((LOGenerate)op).getOutputPlans());
+ }
+ else if(op instanceof LOSort){
+ plans.addAll(((LOSort)op).getSortColPlans());
+ }
+ else if(op instanceof LOSplitOutput){
+ plans.add(((LOSplitOutput)op).getFilterPlan());
+ }
+
+ return plans;
+ }
+
+ @Override
+ protected boolean reverse(BaseOperatorPlan plan) {
+ if (plan instanceof LogicalExpressionPlan)
+ return true;
+ return false;
+ }
+}
Modified: pig/branches/branch-0.9/src/org/apache/pig/newplan/logical/relational/LogicalPlan.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.9/src/org/apache/pig/newplan/logical/relational/LogicalPlan.java?rev=1098887&r1=1098886&r2=1098887&view=diff
==============================================================================
--- pig/branches/branch-0.9/src/org/apache/pig/newplan/logical/relational/LogicalPlan.java (original)
+++ pig/branches/branch-0.9/src/org/apache/pig/newplan/logical/relational/LogicalPlan.java Tue May 3 01:31:33 2011
@@ -25,9 +25,11 @@ import java.util.Iterator;
import java.util.List;
import org.apache.pig.impl.logicalLayer.FrontendException;
+import org.apache.pig.impl.logicalLayer.LOPrinter;
import org.apache.pig.newplan.BaseOperatorPlan;
import org.apache.pig.newplan.Operator;
import org.apache.pig.newplan.OperatorPlan;
+import org.apache.pig.newplan.logical.DotLOPrinter;
import org.apache.pig.newplan.logical.optimizer.LogicalPlanPrinter;
/**
@@ -76,8 +78,14 @@ public class LogicalPlan extends BaseOpe
ps.println("# New Logical Plan:");
ps.println("#-----------------------------------------------");
- LogicalPlanPrinter npp = new LogicalPlanPrinter(this, ps);
- npp.visit();
+ if (format.equals("text")) {
+ LogicalPlanPrinter npp = new LogicalPlanPrinter(this, ps);
+ npp.visit();
+ } else if (format.equals("dot")) {
+ DotLOPrinter lpp = new DotLOPrinter(this, ps);
+ lpp.dump();
+ ps.println("");
+ }
}
public Operator findByAlias(String alias) {
Modified: pig/branches/branch-0.9/test/org/apache/pig/test/TestEvalPipelineLocal.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.9/test/org/apache/pig/test/TestEvalPipelineLocal.java?rev=1098887&r1=1098886&r2=1098887&view=diff
==============================================================================
--- pig/branches/branch-0.9/test/org/apache/pig/test/TestEvalPipelineLocal.java (original)
+++ pig/branches/branch-0.9/test/org/apache/pig/test/TestEvalPipelineLocal.java Tue May 3 01:31:33 2011
@@ -18,6 +18,7 @@
package org.apache.pig.test;
import java.io.File;
+import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintStream;
@@ -61,6 +62,8 @@ public class TestEvalPipelineLocal {
private PigServer pigServer;
+ static final int MAX_SIZE = 100000;
+
TupleFactory mTf = TupleFactory.getInstance();
@Before
@@ -1020,5 +1023,40 @@ public class TestEvalPipelineLocal {
Assert.assertEquals((LOOP_COUNT * LOOP_COUNT)/2, numRows);
}
-
+
+ @Test
+ public void testExplainInDotGraph() throws Exception{
+ pigServer.registerQuery("a = load 'student' using " + PigStorage.class.getName() + "(':') as (name, age, gpa);");
+ pigServer.registerQuery("b = load 'voter' using " + PigStorage.class.getName() + "(',') as (name, age, registration, contributions);");
+ pigServer.registerQuery("c = filter a by age < 50;");
+ pigServer.registerQuery("d = filter b by age < 50;");
+ pigServer.registerQuery("e = cogroup c by (name, age), d by (name, age);");
+ pigServer.registerQuery("f = foreach e generate flatten(c), flatten(d);");
+ pigServer.registerQuery("g = group f by registration;");
+ pigServer.registerQuery("h = foreach g generate (chararray)group, SUM(f.d::contributions);");
+ pigServer.registerQuery("i = order h by $1;");
+
+ File tmpFile = File.createTempFile("test", "txt");
+ PrintStream ps = new PrintStream(new FileOutputStream(tmpFile));
+ pigServer.explain("i", "dot", true, true, ps, System.out, System.out);
+ ps.close();
+
+ FileInputStream fis1 = new FileInputStream("test/org/apache/pig/test/data/DotFiles/explain1.dot");
+ byte[] b1 = new byte[MAX_SIZE];
+ fis1.read(b1);
+ String goldenPlan = new String(b1);
+ goldenPlan = goldenPlan.trim();
+ // Filter out the random number generated on hash
+ goldenPlan = goldenPlan.replaceAll("\\d{3,}", "");
+
+ FileInputStream fis2 = new FileInputStream(tmpFile);
+ byte[] b2 = new byte[MAX_SIZE];
+ fis2.read(b2);
+ String realPlan = new String(b2);
+ realPlan = realPlan.trim();
+ // Filter out the random number generated on hash
+ realPlan = realPlan.replaceAll("\\d{3,}", "");
+
+ Assert.assertEquals(realPlan, goldenPlan);
+ }
}
Added: pig/branches/branch-0.9/test/org/apache/pig/test/data/DotFiles/explain1.dot
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.9/test/org/apache/pig/test/data/DotFiles/explain1.dot?rev=1098887&view=auto
==============================================================================
--- pig/branches/branch-0.9/test/org/apache/pig/test/data/DotFiles/explain1.dot (added)
+++ pig/branches/branch-0.9/test/org/apache/pig/test/data/DotFiles/explain1.dot Tue May 3 01:31:33 2011
@@ -0,0 +1,211 @@
+#-----------------------------------------------
+# New Logical Plan:
+#-----------------------------------------------
+digraph plan {
+compound=true;
+node [shape=rect];
+s20945327_in [label="", style=invis, height=0, width=0];
+s20945327_out [label="", style=invis, height=0, width=0];
+subgraph cluster_20945327 {
+label="LOSort"labelloc=b;
+8569405 [label="Project0:1"];
+s20945327_in -> 8569405 [style=invis];
+};
+8569405 -> s20945327_out [style=invis];
+s22487327_in [label="", style=invis, height=0, width=0];
+s22487327_out [label="", style=invis, height=0, width=0];
+subgraph cluster_22487327 {
+label="LOForEach"labelloc=b;
+24869286 [label="LOInnerLoad"];
+24086580 [label="LOInnerLoad"];
+s8819824_in [label="", style=invis, height=0, width=0];
+s8819824_out [label="", style=invis, height=0, width=0];
+subgraph cluster_8819824 {
+label="LOGenerate"labelloc=b;
+25512760 [label="Project0:(*)"];
+885172 [label="Cast"];
+25512760 -> 885172
+s8819824_in -> 25512760 [style=invis];
+12482151 [label="Project1:(*)"];
+5210968 [label="Dereference"];
+3687978 [label="UserFunc"];
+12482151 -> 5210968
+5210968 -> 3687978
+s8819824_in -> 12482151 [style=invis];
+};
+885172 -> s8819824_out [style=invis];
+3687978 -> s8819824_out [style=invis];
+24869286 -> s8819824_in [lhead=cluster_8819824]
+24086580 -> s8819824_in [lhead=cluster_8819824]
+s22487327_in -> 24869286 [style=invis];
+s22487327_in -> 24086580 [style=invis];
+};
+s8819824_out -> s22487327_out [style=invis];
+s18259890_out [label="", style=invis, height=0, width=0];
+subgraph cluster_18259890 {
+label="LOCogroup"labelloc=b;
+subgraph cluster_18259890_4750048 {
+label="";
+s18259890_4750048_in [label="", style=invis, height=0, width=0];
+5148380 [label="Project0:5"];
+s18259890_4750048_in -> 5148380 [style=invis];
+};
+};
+5148380 -> s18259890_out [style=invis];
+s4750048_in [label="", style=invis, height=0, width=0];
+s4750048_out [label="", style=invis, height=0, width=0];
+subgraph cluster_4750048 {
+label="LOForEach"labelloc=b;
+2915013 [label="LOInnerLoad"];
+3160672 [label="LOInnerLoad"];
+s23258883_in [label="", style=invis, height=0, width=0];
+s23258883_out [label="", style=invis, height=0, width=0];
+subgraph cluster_23258883 {
+label="LOGenerate"labelloc=b;
+8069610 [label="Project0:(*)"];
+s23258883_in -> 8069610 [style=invis];
+32482448 [label="Project1:(*)"];
+s23258883_in -> 32482448 [style=invis];
+};
+8069610 -> s23258883_out [style=invis];
+32482448 -> s23258883_out [style=invis];
+2915013 -> s23258883_in [lhead=cluster_23258883]
+3160672 -> s23258883_in [lhead=cluster_23258883]
+s4750048_in -> 2915013 [style=invis];
+s4750048_in -> 3160672 [style=invis];
+};
+s23258883_out -> s4750048_out [style=invis];
+s8814509_out [label="", style=invis, height=0, width=0];
+subgraph cluster_8814509 {
+label="LOCogroup"labelloc=b;
+subgraph cluster_8814509_1726320 {
+label="";
+s8814509_1726320_in [label="", style=invis, height=0, width=0];
+2645268 [label="Project0:0"];
+s8814509_1726320_in -> 2645268 [style=invis];
+32960257 [label="Project0:1"];
+s8814509_1726320_in -> 32960257 [style=invis];
+};
+subgraph cluster_8814509_4359463 {
+label="";
+s8814509_4359463_in [label="", style=invis, height=0, width=0];
+12928596 [label="Project1:0"];
+s8814509_4359463_in -> 12928596 [style=invis];
+25979266 [label="Project1:1"];
+s8814509_4359463_in -> 25979266 [style=invis];
+};
+};
+2645268 -> s8814509_out [style=invis];
+32960257 -> s8814509_out [style=invis];
+12928596 -> s8814509_out [style=invis];
+25979266 -> s8814509_out [style=invis];
+s22811631_in [label="", style=invis, height=0, width=0];
+s22811631_out [label="", style=invis, height=0, width=0];
+subgraph cluster_22811631 {
+label="LOFilter"labelloc=b;
+17796836 [label="Project0:1"];
+28488784 [label="Constant"];
+7224872 [label="LessThan"];
+20319379 [label="Cast"];
+20319379 -> 7224872
+28488784 -> 7224872
+17796836 -> 20319379
+s22811631_in -> 17796836 [style=invis];
+s22811631_in -> 28488784 [style=invis];
+};
+7224872 -> s22811631_out [style=invis];
+s4968819_in [label="", style=invis, height=0, width=0];
+s4968819_out [label="", style=invis, height=0, width=0];
+subgraph cluster_4968819 {
+label="LOFilter"labelloc=b;
+24814248 [label="Project0:1"];
+1491648 [label="Constant"];
+5039143 [label="LessThan"];
+24356426 [label="Cast"];
+24356426 -> 5039143
+1491648 -> 5039143
+24814248 -> 24356426
+s4968819_in -> 24814248 [style=invis];
+s4968819_in -> 1491648 [style=invis];
+};
+5039143 -> s4968819_out [style=invis];
+2153655 [label="LOLoad", style="filled", fillcolor="gray"];
+27519670 [label="LOLoad", style="filled", fillcolor="gray"];
+16412781 [label="LOStore", style="filled", fillcolor="gray"];
+s1726320_in [label="", style=invis, height=0, width=0];
+s1726320_out [label="", style=invis, height=0, width=0];
+subgraph cluster_1726320 {
+label="LOForEach"labelloc=b;
+s23235469_in [label="", style=invis, height=0, width=0];
+s23235469_out [label="", style=invis, height=0, width=0];
+subgraph cluster_23235469 {
+label="LOGenerate"labelloc=b;
+8172621 [label="Project0:(*)"];
+s23235469_in -> 8172621 [style=invis];
+18957862 [label="Project1:(*)"];
+s23235469_in -> 18957862 [style=invis];
+25468335 [label="Project2:(*)"];
+s23235469_in -> 25468335 [style=invis];
+};
+8172621 -> s23235469_out [style=invis];
+18957862 -> s23235469_out [style=invis];
+25468335 -> s23235469_out [style=invis];
+12062492 [label="LOInnerLoad"];
+31985466 [label="LOInnerLoad"];
+4179068 [label="LOInnerLoad"];
+12062492 -> s23235469_in [lhead=cluster_23235469]
+31985466 -> s23235469_in [lhead=cluster_23235469]
+4179068 -> s23235469_in [lhead=cluster_23235469]
+s1726320_in -> 12062492 [style=invis];
+s1726320_in -> 31985466 [style=invis];
+s1726320_in -> 4179068 [style=invis];
+};
+s23235469_out -> s1726320_out [style=invis];
+s4359463_in [label="", style=invis, height=0, width=0];
+s4359463_out [label="", style=invis, height=0, width=0];
+subgraph cluster_4359463 {
+label="LOForEach"labelloc=b;
+s20182749_in [label="", style=invis, height=0, width=0];
+s20182749_out [label="", style=invis, height=0, width=0];
+subgraph cluster_20182749 {
+label="LOGenerate"labelloc=b;
+3745812 [label="Project0:(*)"];
+s20182749_in -> 3745812 [style=invis];
+7762850 [label="Project1:(*)"];
+s20182749_in -> 7762850 [style=invis];
+10589182 [label="Project2:(*)"];
+s20182749_in -> 10589182 [style=invis];
+33238777 [label="Project3:(*)"];
+s20182749_in -> 33238777 [style=invis];
+};
+3745812 -> s20182749_out [style=invis];
+7762850 -> s20182749_out [style=invis];
+10589182 -> s20182749_out [style=invis];
+33238777 -> s20182749_out [style=invis];
+9719229 [label="LOInnerLoad"];
+8115306 [label="LOInnerLoad"];
+28745811 [label="LOInnerLoad"];
+14141119 [label="LOInnerLoad"];
+9719229 -> s20182749_in [lhead=cluster_20182749]
+8115306 -> s20182749_in [lhead=cluster_20182749]
+28745811 -> s20182749_in [lhead=cluster_20182749]
+14141119 -> s20182749_in [lhead=cluster_20182749]
+s4359463_in -> 9719229 [style=invis];
+s4359463_in -> 8115306 [style=invis];
+s4359463_in -> 28745811 [style=invis];
+s4359463_in -> 14141119 [style=invis];
+};
+s20182749_out -> s4359463_out [style=invis];
+s20945327_out -> 16412781
+s22487327_out -> s20945327_in [lhead=cluster_20945327]
+s18259890_out -> s22487327_in [lhead=cluster_22487327]
+s4750048_out -> s18259890_4750048_in [lhead=cluster_18259890_4750048]
+s8814509_out -> s4750048_in [lhead=cluster_4750048]
+s22811631_out -> s1726320_in [lhead=cluster_1726320]
+s4968819_out -> s4359463_in [lhead=cluster_4359463]
+2153655 -> s22811631_in [lhead=cluster_22811631]
+27519670 -> s4968819_in [lhead=cluster_4968819]
+s1726320_out -> s8814509_1726320_in [lhead=cluster_8814509_1726320]
+s4359463_out -> s8814509_4359463_in [lhead=cluster_8814509_4359463]
+}
+