You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ch...@apache.org on 2012/11/28 23:40:48 UTC

svn commit: r1414976 - in /pig/branches/branch-0.11: ./ contrib/penny/ src/docs/src/documentation/content/xdocs/ src/docs/src/documentation/content/xdocs/images/

Author: cheolsoo
Date: Wed Nov 28 22:40:47 2012
New Revision: 1414976

URL: http://svn.apache.org/viewvc?rev=1414976&view=rev
Log:
PIG-3034: Remove Penny code from Pig repository (gates via cheolsoo)

Removed:
    pig/branches/branch-0.11/contrib/penny/
    pig/branches/branch-0.11/src/docs/src/documentation/content/xdocs/images/penny-archt.jpg
Modified:
    pig/branches/branch-0.11/CHANGES.txt
    pig/branches/branch-0.11/build.xml
    pig/branches/branch-0.11/src/docs/src/documentation/content/xdocs/pig-index.xml
    pig/branches/branch-0.11/src/docs/src/documentation/content/xdocs/test.xml

Modified: pig/branches/branch-0.11/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.11/CHANGES.txt?rev=1414976&r1=1414975&r2=1414976&view=diff
==============================================================================
--- pig/branches/branch-0.11/CHANGES.txt (original)
+++ pig/branches/branch-0.11/CHANGES.txt Wed Nov 28 22:40:47 2012
@@ -22,6 +22,8 @@ Release 0.11.0 (unreleased)
 
 INCOMPATIBLE CHANGES
 
+PIG-3034: Remove Penny code from Pig repository (gates via cheolsoo)
+
 PIG-2931: $ signs in the replacement string make parameter substitution fail (cheolsoo via jcoveney)
 
 PIG-1891 Enable StoreFunc to make intelligent decision based on job success or failure (initialcontext via gates)

Modified: pig/branches/branch-0.11/build.xml
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.11/build.xml?rev=1414976&r1=1414975&r2=1414976&view=diff
==============================================================================
--- pig/branches/branch-0.11/build.xml (original)
+++ pig/branches/branch-0.11/build.xml Wed Nov 28 22:40:47 2012
@@ -373,10 +373,6 @@
         <ant target="clean" dir="contrib/zebra" inheritAll="false"/>
     </target>
 
-    <target name="clean-penny" description="Cleanup Penny">
-        <ant target="clean" dir="contrib/penny/java" inheritAll="false"/>
-    </target>
-
     <target name="clean-tutorial" description="Cleanup Tutorial">
         <ant target="clean" dir="tutorial" inheritAll="false"/>
     </target>
@@ -1247,7 +1243,7 @@
     <!-- ================================================================== -->
     <!-- Make release tarball                                               -->
     <!-- ================================================================== -->
-    <target name="src-release" depends="clean, clean-piggybank, clean-zebra, clean-penny, clean-test-e2e, clean-tutorial" description="Source distribution">
+    <target name="src-release" depends="clean, clean-piggybank, clean-zebra, clean-test-e2e, clean-tutorial" description="Source distribution">
         <mkdir dir="${build.dir}"/>
         <tar compression="gzip" longfile="gnu"
              destfile="${build.dir}/${final.name}-src-${pig.version}.tar.gz">

Modified: pig/branches/branch-0.11/src/docs/src/documentation/content/xdocs/pig-index.xml
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.11/src/docs/src/documentation/content/xdocs/pig-index.xml?rev=1414976&r1=1414975&r2=1414976&view=diff
==============================================================================
--- pig/branches/branch-0.11/src/docs/src/documentation/content/xdocs/pig-index.xml (original)
+++ pig/branches/branch-0.11/src/docs/src/documentation/content/xdocs/pig-index.xml Wed Nov 28 22:40:47 2012
@@ -240,7 +240,6 @@
 <p>debugging
 <br></br>&nbsp;&nbsp;&nbsp; <a href="test.html#diagnostic-ops">diagnostic operators</a>
 <br></br>&nbsp;&nbsp;&nbsp; <a href="cmds.html#exec-debug">with exec and run commands</a>
-<br></br>&nbsp;&nbsp;&nbsp; <a href="test.html#penny">and Penny </a> 
 <br></br>&nbsp;&nbsp;&nbsp; <a href="start.html#debug">and Pig Latin</a>
 </p>
 
@@ -681,8 +680,6 @@
 
 <p><a href="basic.html#comparison">pattern matching</a></p>
 
-<p><a href="test.html#penny">Penny</a> (monitoring and debugging)</p>
-
 <p>performance (writing efficient code)
 <br></br>&nbsp;&nbsp;&nbsp; <a href="perf.html#optimization-rules">optimization rules for</a>
 <br></br>&nbsp;&nbsp;&nbsp; <a href="perf.html#performance-enhancers">performance enhancers</a>

Modified: pig/branches/branch-0.11/src/docs/src/documentation/content/xdocs/test.xml
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.11/src/docs/src/documentation/content/xdocs/test.xml?rev=1414976&r1=1414975&r2=1414976&view=diff
==============================================================================
--- pig/branches/branch-0.11/src/docs/src/documentation/content/xdocs/test.xml (original)
+++ pig/branches/branch-0.11/src/docs/src/documentation/content/xdocs/test.xml Wed Nov 28 22:40:47 2012
@@ -990,244 +990,5 @@ test.runScript();
     </section>
     </section>
     
-    
-    
-<!-- =========================================================================== -->
-<!-- PENNY -->    
-
-  <section id="penny">
-      <title>Penny</title>
-      <p><strong>Note:</strong> <em>Penny is an experimental feature.</em></p>
-      <p></p>
-      <p>Penny is a framework for creating Pig monitoring and debugging tools. Penny comes with a library of tools (see <a href="http://wiki.apache.org/pig/PennyToolLibrary">Penny Tool Library</a>). However, the real power of Penny is in creating your own custom monitoring and debugging tools using Penny's simple API.</p>
-
-<!-- +++++++++++++++++++++++++++++++++++++++++++++++++-->    
-<section>
-<title>How it Works</title>
-<p>Before you can create a tool, you need to understand how Penny instruments Pig scripts (called "dataflow programs" in the following diagram).</p>
-
-<figure src="images/penny-archt.jpg" align="left" alt="Penny Architecture"/>
-
-<p>As shown in the diagram, Penny inserts one or more monitor agents (called "Penny agent" in the diagram) between steps of the Pig script, which observe data flowing between the Pig script steps. Monitor agents run arbitrary Java code as needed for your tool, which has access to some primitives for tagging records and communicating with other agents and with a central coordinator process (called "Penny coordinator" in the diagram). The coordinator also runs arbitrary code defined by your tool. </p>
-
-<p>The whole thing is kicked off by the tool's Main program (called "application" in the diagram), which receives instructions from the user (e.g. "please figure out why this Pig script keeps crashing"), launches one or more runs of the Pig script instrumented with monitor agents, and reports the outcome back to the user (e.g. "the crash appears to be caused by one of these records: ..."). </p>
-</section>
-
-<!-- +++++++++++++++++++++++++++++++++++++++++++++++++-->          
-<section>
-<title>API</title>
-<p>You need to write three Java classes: a Main class, a Coordinator class, and a MonitorAgent class (for certain, fancy tools, you may need multiple MonitorAgent classes). You can find many examples of Main/Coordinator/MonitorAgent classes that define Penny tools in the Penny source code (<a href="http://svn.apache.org/viewvc/pig/trunk/contrib/penny/java/src/main/java/">/pig/trunk/contrib/penny/java/src/main/java/</a>) under org.apache.pig.penny.apps. All of the tools described in <a href="http://wiki.apache.org/pig/PennyToolLibrary">Penny Tool Library</a> are written using this API, so you've got plenty of examples to work with. We'll paste a few code fragments below to get you going -- in fact the entire code for the "data samples" tool (all 97 lines of Java) is included below.</p>
-   
-      <!-- +++++++++++++++++++++++++++++++++++++++++++++++++-->       
-      <section>
-      <title>Main Class</title>
-      <p>Your Main class is the "shell" of your application. It receives instructions from the user, and configures and launches one or more Penny-instrumented runs of the user's Pig script. </p>
-      
-      <p>You talk to Penny via the PennyServer class. You can do two things: (1) parse a user's Pig script and (2) launch an Penny-instrumented run of the Pig script. Here is the Main class for the data samples tool, described in <a href="http://wiki.apache.org/pig/PennyToolLibrary">Penny Tool Library</a>:</p>
-      
-      <source>
-import java.util.HashMap;
-import java.util.Map;
-import org.apache.pig.penny.ClassWithArgs;
-import org.apache.pig.penny.ParsedPigScript;
-import org.apache.pig.penny.PennyServer;
-
-/**
- * Data samples app.
- */
-public class Main {
-    public static void main(String[] args) throws Exception {
-        PennyServer pennyServer = new PennyServer();
-        String pigScriptFilename = args[0];
-        ParsedPigScript parsedPigScript = pennyServer.parse(pigScriptFilename);
-        Map&lt;String, ClassWithArgs&gt; monitorClasses = new HashMap&lt;String, ClassWithArgs&gt;();
-        for (String alias : parsedPigScript.aliases()) {
-            monitorClasses.put(alias, new ClassWithArgs(DSMonitorAgent.class));
-        }
-        parsedPigScript.trace(DSCoordinator.class, monitorClasses);
-    }
-}      
-</source>
-    
-<p>The "monitorClasses" map dictates which monitor agent (if any) to place after each dataflow step (steps are identified by Pig script aliases). You can also pass arguments to each monitor agent, and/or to the coordinator, as shown in this example for the "data histograms" tool: </p>
-    
-<source>
-import java.util.HashMap;
-import java.util.Map;
-import java.util.TreeMap;
-import org.apache.pig.penny.ClassWithArgs;
-import org.apache.pig.penny.ParsedPigScript;
-import org.apache.pig.penny.PennyServer;
-
-/**
- * Data summaries app. that computes a histogram of one of the fields of one of the intermediate data sets.
- */
-
-public class Main {
-    public static void main(String[] args) throws Exception {
-        PennyServer pennyServer = new PennyServer();
-        String pigScriptFilename = args[0];
-        ParsedPigScript parsedPigScript = pennyServer.parse(pigScriptFilename);
-        String alias = args[1]; // which alias to create histogram for
-        int fieldNo = Integer.parseInt(args[2]); // which field to create histogram for
-        int min = Integer.parseInt(args[3]); // min field value
-        int max = Integer.parseInt(args[4]); // max field value
-        int bucketSize = Integer.parseInt(args[5]); // histogram bucket size
-        if (!parsedPigScript.aliases().contains(alias)) throw new IllegalArgumentException("No such alias.");
-        Map&lt;String, ClassWithArgs&gt; monitorClasses = new HashMap&lt;String, ClassWithArgs&gt;();
-        monitorClasses.put(alias, new ClassWithArgs(DHMonitorAgent.class, fieldNo, min, max, bucketSize));
-        TreeMap&lt;Integer, Integer&gt; histogram = (TreeMap&lt;Integer, Integer&gt;) parsedPigScript.trace(DHCoordinator.class, monitorClasses);
-        System.out.println("Histogram: " + histogram);
-    }
-}    
-</source>
-</section>
-
-      <!-- +++++++++++++++++++++++++++++++++++++++++++++++++-->     
-      <section>
-      <title>MonitorAgent Class</title>
-      <p>Monitor agents implement the following API: </p>
-      <source>
- /**
-  * Furnish set of fields to monitor. (Null means monitor all fields ('*').)
-  * /
- public abstract Set&lt;Integer&gt; furnishFieldsToMonitor(); /**
-  * Initialize, using any arguments passed from higher layer.
-  * /
- public abstract void init(Serializable[] args);
- /**
-  * Process a tuple that passes through the monitoring point.
-  *
-  * @param t   the tuple
-  * @param tag t's tags
-  * @return FILTER_OUT to remove the tuple from the data stream; 
-  *    NO_TAGS to let it pass through and not give it any tags; 
-  *    a set of tags to let it pass through and assign those tags
-  */
- public abstract Set&lt;String&gt; observeTuple(Tuple t, Set&lt;String&gt; tags) throws ExecException;
- /**
-  * Process an incoming (synchronous or asynchronous) message.
-  */
- public abstract void receiveMessage(Location source, Tuple message);
- /**
-  * No more tuples are going to pass through the monitoring point. Finish any ongoing processing.
-  */
- public abstract void finish();      
-      </source>
-      
-      <p>Here's an example from the "data samples" tool:</p>
-      <source>
-import java.io.Serializable; import java.util.Set;
-
-import org.apache.pig.backend.executionengine.ExecException; import org.apache.pig.data.Tuple; 
-
-import org.apache.pig.penny.Location; import org.apache.pig.penny.MonitorAgent;
-
-public class DSMonitorAgent extends MonitorAgent {
-
-    private final static int NUM_SAMPLES = 5;
-    private int tupleCount = 0;
-    public void finish() { }
-    public Set&lt;Integer&gt; furnishFieldsToMonitor() {
-        return null;
-    }
-    public void init(Serializable[] args) { }
-    public Set&lt;String&gt; observeTuple(Tuple t, Set&lt;String&gt; tags) throws ExecException {
-        if (tupleCount++ &lt; NUM_SAMPLES) {
-            communicator().sendToCoordinator(t);
-        }
-        return tags;
-    }
-    public void receiveMessage(Location source, Tuple message) { }
-}      
-      </source>
-      
-      <p>Monitor agents have access to a "communicator" object, which is the gateway for sending messages to other agents or to the coordinator. The communicator API is: </p>
-      <source>
-  /**
-  * Find out my (physical) location.
-  * /
- public abstract Location myLocation();
- /**
-  * Send an message to the coordinator, asynchronously.
-  * /
- public abstract void sendToCoordinator(Tuple message);
- /**
-  * Send a message to immediate downstream neighbor(s), synchronously.
-  * If downstream neighbor(s) span a task boundary, all instances will receive it; otherwise only same-task instances will receive it.
-  * If there is no downstream neighbor, an exception will be thrown.
-  * /
- public abstract void sendDownstream(Tuple message) throws NoSuchLocationException;
- /**
-  * Send a message to immediate upstream neighbor(s), synchronously.
-  * If upstream neighbor(s) are non-existent or span a task boundary, an exception will be thrown.
-  * /
- public abstract void sendUpstream(Tuple message) throws NoSuchLocationException;
- /**
-  * Send a message to current/future instances of a given logical location.
-  * Instances that have already terminated will not receive the message (obviously).
-  * Instances that are currently executing will receive it asynchronously (or perhaps not at all, if they terminate before the message arrives).
-  * Instances that have not yet started will receive the message prior to beginning processing of tuples.
-  * /
- public abstract void sendToAgents(LogicalLocation destination, Tuple message) throws NoSuchLocationException; 
- // The following methods mirror the ones above, but take care of packaging a list of objects into a tuple (you're welcome!) ...
- public void sendToCoordinator(Object ... message) {
-  . sendToCoordinator(makeTuple(message));
- }
- public void sendDownstream(Object ... message) throws NoSuchLocationException {
-  . sendDownstream(makeTuple(message));
- }
- public void sendUpstream(Object ... message) throws NoSuchLocationException {
-  . sendUpstream(makeTuple(message));
- }
- public void sendToAgents(LogicalLocation destination, Object ... message) throws NoSuchLocationException {
-  . sendToAgents(destination, makeTuple(message));
- }     
-</source>
-</section>
-
-      <!-- +++++++++++++++++++++++++++++++++++++++++++++++++-->  
-      <section>
-      <title>Coordinator Class</title>
-      <p>Your tool's coordinator implements the following API: </p>
-      <source>
- /**
-  * Initialize, using any arguments passed from higher layer.
-  * /
- public abstract void init(Serializable[] args);
- /**
-  * Process an incoming (synchronous or asynchronous) message.
-  * /
- public abstract void receiveMessage(Location source, Tuple message); /**
-  * The data flow has completed and all messages have been delivered. Finish processing.
-  *   * @return              final output to pass back to application
-  * /
- public abstract Object finish();      
-      </source>
-      
-      <p>The coordinator for the "data samples" tool is: </p>
-<source>
-import java.io.Serializable;
-import org.apache.pig.data.Tuple;
-import org.apache.pig.penny.Coordinator;
-import org.apache.pig.penny.Location;
-public class DSCoordinator extends Coordinator {
-    public void init(Serializable[] args) { }
-    public Object finish() {
-        return null;
-    }
-    public void receiveMessage(Location source, Tuple message) {
-        System.out.println("*** SAMPLE RECORD AT ALIAS " + source.logId() + ": " + truncate(message));
-    }
-    private String truncate(Tuple t) {
-        String s = t.toString();
-        return s.substring(0, Math.min(s.length(), 100));
-    }
-}      
-</source>
-</section>
-</section>
-</section>
-   
 </body>
 </document>