You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ga...@apache.org on 2010/08/19 06:33:34 UTC

svn commit: r987014 - in /hadoop/pig/trunk: ./ ivy/ src/docs/src/documentation/content/xdocs/ test/ test/data/ test/data/pigunit/ test/org/apache/pig/pigunit/ test/org/apache/pig/pigunit/pig/ test/org/apache/pig/test/pigunit/ test/org/apache/pig/test/p...

Author: gates
Date: Thu Aug 19 04:33:33 2010
New Revision: 987014

URL: http://svn.apache.org/viewvc?rev=987014&view=rev
Log:
PIG-1404: added PigUnit, a framework fo building unit tests of Pig Latin scripts.

Added:
    hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/pigunit.xml
    hadoop/pig/trunk/test/data/
    hadoop/pig/trunk/test/data/pigunit/
    hadoop/pig/trunk/test/data/pigunit/top_queries.pig
    hadoop/pig/trunk/test/data/pigunit/top_queries_expected_top_3.txt
    hadoop/pig/trunk/test/data/pigunit/top_queries_input_data.txt
    hadoop/pig/trunk/test/data/pigunit/top_queries_params.txt
    hadoop/pig/trunk/test/org/apache/pig/pigunit/
    hadoop/pig/trunk/test/org/apache/pig/pigunit/Cluster.java
    hadoop/pig/trunk/test/org/apache/pig/pigunit/MiniClusterRunner.java
    hadoop/pig/trunk/test/org/apache/pig/pigunit/PigTest.java
    hadoop/pig/trunk/test/org/apache/pig/pigunit/pig/
    hadoop/pig/trunk/test/org/apache/pig/pigunit/pig/GruntParser.java
    hadoop/pig/trunk/test/org/apache/pig/pigunit/pig/PigServer.java
    hadoop/pig/trunk/test/org/apache/pig/test/pigunit/
    hadoop/pig/trunk/test/org/apache/pig/test/pigunit/TestPigTest.java
    hadoop/pig/trunk/test/org/apache/pig/test/pigunit/pig/
    hadoop/pig/trunk/test/org/apache/pig/test/pigunit/pig/TestGruntParser.java
    hadoop/pig/trunk/test/pigunit-tests
Modified:
    hadoop/pig/trunk/CHANGES.txt
    hadoop/pig/trunk/build.xml
    hadoop/pig/trunk/ivy.xml
    hadoop/pig/trunk/ivy/libraries.properties
    hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/site.xml

Modified: hadoop/pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=987014&r1=987013&r2=987014&view=diff
==============================================================================
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Thu Aug 19 04:33:33 2010
@@ -26,6 +26,8 @@ PIG-1249: Safe-guards against misconfigu
 
 IMPROVEMENTS
 
+PIG-1404: added PigUnit, a framework fo building unit tests of Pig Latin scripts (romainr via gates)
+
 PIG-1452: to remove hadoop20.jar from lib and use hadoop from the apache maven
 repo. (rding)
 

Modified: hadoop/pig/trunk/build.xml
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/build.xml?rev=987014&r1=987013&r2=987014&view=diff
==============================================================================
--- hadoop/pig/trunk/build.xml (original)
+++ hadoop/pig/trunk/build.xml Thu Aug 19 04:33:33 2010
@@ -84,6 +84,10 @@
     <property name="test.unit.file" value="${test.src.dir}/unit-tests"/>
     <property name="test.smoke.file" value="${test.src.dir}/smoke-tests"/>
     <property name="test.all.file" value="${test.src.dir}/all-tests"/>
+    <property name="pigunit.jarfile" value="pigunit.jar" />
+    <property name="test.pigunit.src.dir" value="${test.src.dir}/org/apache/pig/test/pigunit" />
+    <property name="commons-lang.jarfile" value="commons-lang-2.4.jar" />    
+    <property name="test.pigunit.file" value="${test.src.dir}/pigunit-tests"/>
 
 
     <!-- test configuration, use ${user.home}/build.properties to configure values  -->
@@ -607,6 +611,7 @@
                     <exclude name="**/TestOrderBy2.java" />
                     <exclude name="**/TestPi.java" />
                     <exclude name="**/nightly/**" />
+                    <!-- <exclude name="**/pigunit/**" /> -->
                     <exclude name="**/${exclude.testcase}.java" if="exclude.testcase" />
                 </fileset>
             </batchtest>
@@ -638,6 +643,22 @@
     </target>	
 
     <!-- ================================================================== -->
+    <!-- Pigunit                                                            -->
+    <!-- ================================================================== -->
+
+    <target depends="compile-test" name="pigunit-jar" description="create the pigunit jar file">
+        <echo> *** Creating pigunit.jar ***</echo>
+      <jar destfile="${pigunit.jarfile}">
+        <fileset dir="${test.build.classes}/org/apache/pig/pigunit/"/>
+      	<zipfileset src="${ivy.lib.dir}/${commons-lang.jarfile}" />
+      </jar>
+    </target>
+
+    <target name="test-pigunit" depends="compile-test,jar-withouthadoop, pigunit-jar" description="Run tests that test PigUnit">
+        <macro-test-runner test.file="${test.pigunit.file}" />
+    </target>
+
+    <!-- ================================================================== -->
     <!-- D I S T R I B U T I O N                                            -->
     <!-- ================================================================== -->
     <target name="package" depends="docs, api-report" description="Create a Pig release">

Modified: hadoop/pig/trunk/ivy.xml
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/ivy.xml?rev=987014&r1=987013&r2=987014&view=diff
==============================================================================
--- hadoop/pig/trunk/ivy.xml (original)
+++ hadoop/pig/trunk/ivy.xml Thu Aug 19 04:33:33 2010
@@ -84,8 +84,11 @@
     <dependency org="org.codehaus.jackson" name="jackson-core-asl" rev="${jackson.version}"
       conf="compile->master"/>
     <dependency org="joda-time" name="joda-time" rev="${joda-time.version}" conf="compile->master"/>
+  	<dependency org="commons-lang" name="commons-lang" rev="${commons-lang.version}"
+  	  conf="compile->master"/>
     <dependency org="com.google.guava" name="guava" rev="${guava.version}" conf="compile->master" />
-	<dependency org="org.python" name="jython" rev="${jython.version}" conf="compile->master"/>
+  	<dependency org="org.python" name="jython" rev="${jython.version}" conf="compile->master"/>
+
     <!--ATM hbase, hbase-test.jar, hadoop.jar are resolved from the lib dir--> 	
     <dependency org="hsqldb" name="hsqldb" rev="${hsqldb.version}"
       conf="test->default" />

Modified: hadoop/pig/trunk/ivy/libraries.properties
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/ivy/libraries.properties?rev=987014&r1=987013&r2=987014&view=diff
==============================================================================
--- hadoop/pig/trunk/ivy/libraries.properties (original)
+++ hadoop/pig/trunk/ivy/libraries.properties Thu Aug 19 04:33:33 2010
@@ -21,6 +21,7 @@ commons-beanutils.version=1.7.0
 commons-cli.version=1.0
 commons-el.version=1.0
 commons-logging.version=1.0.3
+commons-lang.version=2.4
 checkstyle.version=4.2
 
 ivy.version=2.2.0-rc1

Added: hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/pigunit.xml
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/pigunit.xml?rev=987014&view=auto
==============================================================================
--- hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/pigunit.xml (added)
+++ hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/pigunit.xml Thu Aug 19 04:33:33 2010
@@ -0,0 +1,271 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+  <!--
+    Copyright 2002-2004 The Apache Software Foundation Licensed under the Apache License, Version
+    2.0 (the "License"); you may not use this file except in compliance with the License. You may
+    obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by
+    applicable law or agreed to in writing, software distributed under the License is distributed on
+    an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See
+    the License for the specific language governing permissions and limitations under the License.
+  -->
+
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN"
+          "http://forrest.apache.org/dtd/document-v20.dtd">
+
+<document>
+  <header>
+    <title>PigUnit - Pig script testing simplified.</title>
+  </header>
+  <body>
+
+    <section>
+      <title>Overview</title>
+      <p>The goal is to provide a simple xUnit framework that enables our Pig scripts to be easily:
+      </p>
+      <ol>
+        <li>
+          <p>unit tested</p>
+        </li>
+        <li>
+          <p>regression tested</p>
+        </li>
+        <li>
+          <p>quickly prototyped</p>
+        </li>
+      </ol>
+
+      <p>No cluster set up is required.</p>
+    </section>
+
+    <section>
+      <title>PigUnit Example</title>
+      <p>Computing top queries, specifying the input data and expected output of the script.</p>
+      <p>Java test</p>
+      <source>
+  @Test
+  public void testTop3Queries() {
+    String[] args = {
+        "n=3",        
+        };
+    test = new PigTest("top_queries.pig", args);
+
+    String[] input = {
+        "yahoo\t10",
+        "twitter\t7",
+        "facebook\t10",
+        "yahoo\t15",
+        "facebook\t5",
+        ....
+    };
+
+    String[] output = {
+        "(yahoo,25L)",
+        "(facebook,15L)",
+        "(twitter,7L)",
+    };
+
+    test.assertOutput("data", input, "queries_limit", output);
+  }
+ </source>
+      <p>top_queries.pig</p>
+      <source>
+data =
+    LOAD '$input'
+    AS (query:CHARARRAY, count:INT);
+     
+    ... 
+    
+queries_sum = 
+    FOREACH queries_group 
+    GENERATE 
+        group AS query, 
+        SUM(queries.count) AS count;
+        
+    ...
+            
+queries_limit = LIMIT queries_ordered $n;
+
+STORE queries_limit INTO '$output';
+</source>
+
+      <p>You just need two jar files in your classpath:</p>
+      <ol>
+        <li>pig.jar</li>
+        <li>pigunit.jar</li>
+      </ol>
+
+      <p>
+        Many examples are available in the
+        <a
+          href="http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/pigunit/TestPigTest.java"
+        >PigUnit tests</a>
+        .
+      </p>
+    </section>
+
+    <section>
+      <title>Cluster</title>
+
+      <p>They are 2 main modes:</p>
+      <ol>
+        <li>LOCAL</li>
+        <li>MAPREDUCE</li>
+      </ol>
+
+      <section>
+        <title>LOCAL</title>
+        <p>
+          This is using the local mode of Pig.
+          It will be used by default.
+        </p>
+
+        <p>It will go fast and use your local file system as a HDFS cluster.</p>
+      </section>
+
+
+      <section>
+        <title>MAPREDUCE</title>
+        <p>This is using a real Hadoop cluster.
+          The cluster selected will be the first specified in
+          the CLASSPATH (same
+          way as the HADOOP_CONF_DIR variable works). You
+          can also choose to have
+          a test cluster automatically
+          starting/stopping or you cab reuse an already
+          running cluster.
+        </p>
+
+        <section>
+          <title>On demand cluster</title>
+          <p>
+            The default mode is using a local MiniCluster that is started at the very beginning
+            and
+            shutdown automatically at the end of the test run.
+            No setup needed which is really
+            helpful. The cluster will contain no data each time it is
+            started, but data can be
+            copied
+            to it as shown in the examples.
+
+            You can select this mode by setting the Java property
+            <code>"pigunit.exectype.minicluster"</code>
+            to "true".
+            </p>
+            <p>It can be set in Java or on the command line:</p>
+            <ol>
+              <li>
+                <code>System.setProperty("pigunit.exectype.cluster", "true");</code>
+              </li>
+              <li>
+                <code>-Dpigunit.exectype.cluste=true</code>
+              </li>
+            </ol>
+            <p>
+            The
+            <code>HADOOP_CONF_DIR</code>
+            path will be
+            <code>~/pigtest/conf</code>
+            and it will be required in the CLASSPATH.
+            The path to the log directory is set by the
+            Java property
+            <code>"hadoop.log.dir"</code>
+            (default is "/tmp/pigunit").
+          </p>
+        </section>
+
+        <section>
+          <title>Existing cluster</title>
+          <p>
+            If
+            <code>"pigunit.exectype.cluster"</code>
+            property is set, the first xml configuration of an Hadoop cluster found in the
+            CLASSPATH
+            will be used.
+
+            Notice that PigUnit comes with a standalone MiniCluster that
+            can be started
+            externally with:
+          </p>
+          <source>
+java -cp .../pig.jar:.../pigunit.jar org.apache.pig.pigunit.MiniClusterRunner
+</source>
+          <p>This is really useful when doing some prototyping in order to have a test cluster
+            ready.</p>
+        </section>
+      </section>
+    </section>
+
+    <section>
+      <title>Building</title>
+      <p>In order to compile pigunit.jar, go in pig trunk:</p>
+      <source>
+$pig_trunk ant compile-test
+$pig_trunk ant
+$pig_trunk ant pigunit-jar   
+</source>
+    </section>
+
+    <section>
+      <title>Troubleshooting</title>
+      <p>Common problems</p>
+      <section>
+        <title>CLASSPATH in MAPREDUCE mode</title>
+        <p>When used in MAPREDUCE mode, do not forget the HADOOP_CONF_DIR of your cluster in
+          your
+          CLASSPATH.</p>
+        <p>
+          It is
+          <code>~/pigtest/conf</code>
+          by default
+        </p>
+        <source>
+org.apache.pig.backend.executionengine.ExecException: ERROR 4010: Cannot find hadoop configurations in classpath (neither hadoop-site.xml nor core-site.xml was found in the classpath).If you plan to use local mode, please put -x local option in command line
+         </source>
+      </section>
+
+      <section>
+        <title>UDF jars not found</title>
+        <p>This error means that you are missing some jars in your test environment.</p>
+        <source>
+WARN util.JarManager: Couldn't find the jar for org.apache.pig.piggybank.evaluation.string.LOWER, skip it
+         </source>
+      </section>
+
+      <section>
+        <title>STORING data</title>
+        <p>Currently pig is dropping all the STORE/DUMP commands but you can tell PigUnit to
+          keep
+          them and execute the script.</p>
+        <source>
+test = new PigTest(PIG_SCRIPT, args);   
+test.unoverride("STORE");
+test.runScript();
+</source>
+      </section>
+
+      <section>
+        <title>Cache archive</title>
+        <p>It works, your test environment will need to have the cache archive options
+          specified by
+          Java properties or in an additional XML configuration in its
+          CLASSPATH.</p>
+        <p>If you use a local cluster, you will need to set the required environment
+          variables before
+          starting it, e.g.</p>
+        <source>export LD_LIBRARY_PATH=/home/path/to/lib</source>
+      </section>
+    </section>
+
+    <section>
+      <title>Future</title>
+      <p>Improvement and other components based on PigUnit that could be built later.</p>
+      <p>We could build on top of PigTest a PigTestCase and PigTestSuite in order to have:</p>
+      <ol>
+        <li>notion of workspaces for each test</li>
+        <li>removing the boiler plate code appearing when there is more than one test
+          methods</li>
+        <li>standalone utility that reads test configuration and generates a test report...</li>
+      </ol>
+    </section>
+  </body>
+</document>

Modified: hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/site.xml
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/site.xml?rev=987014&r1=987013&r2=987014&view=diff
==============================================================================
--- hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/site.xml (original)
+++ hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/site.xml Thu Aug 19 04:33:33 2010
@@ -47,6 +47,7 @@ See http://forrest.apache.org/docs/linki
     <plref2 label="Pig Latin 2"	href="piglatin_ref2.html" />
     <cookbook label="Cookbook" 		href="cookbook.html" />
     <udf label="UDFs" href="udf.html" />
+    <udf label="PigUnit" href="pigunit.html" />
     </docs>  
     
     <docs label="Zebra"> 

Added: hadoop/pig/trunk/test/data/pigunit/top_queries.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/data/pigunit/top_queries.pig?rev=987014&view=auto
==============================================================================
--- hadoop/pig/trunk/test/data/pigunit/top_queries.pig (added)
+++ hadoop/pig/trunk/test/data/pigunit/top_queries.pig Thu Aug 19 04:33:33 2010
@@ -0,0 +1,27 @@
+---------------------------------------------------------------------
+-- Top N Queries.
+---------------------------------------------------------------------
+
+data =
+    LOAD '$input'
+    AS (query:CHARARRAY, count:INT);
+
+queries_group = 
+    GROUP data 
+    BY query
+    PARALLEL $reducers;
+
+queries_sum = 
+    FOREACH queries_group 
+    GENERATE 
+        group AS query, 
+        SUM(data.count) AS count;
+
+queries_ordered = 
+    ORDER queries_sum 
+    BY count DESC
+    PARALLEL $reducers;
+            
+queries_limit = LIMIT queries_ordered $n;
+
+STORE queries_limit INTO '$output';

Added: hadoop/pig/trunk/test/data/pigunit/top_queries_expected_top_3.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/data/pigunit/top_queries_expected_top_3.txt?rev=987014&view=auto
==============================================================================
--- hadoop/pig/trunk/test/data/pigunit/top_queries_expected_top_3.txt (added)
+++ hadoop/pig/trunk/test/data/pigunit/top_queries_expected_top_3.txt Thu Aug 19 04:33:33 2010
@@ -0,0 +1,3 @@
+(yahoo,25)
+(facebook,15)
+(twitter,7)
\ No newline at end of file

Added: hadoop/pig/trunk/test/data/pigunit/top_queries_input_data.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/data/pigunit/top_queries_input_data.txt?rev=987014&view=auto
==============================================================================
--- hadoop/pig/trunk/test/data/pigunit/top_queries_input_data.txt (added)
+++ hadoop/pig/trunk/test/data/pigunit/top_queries_input_data.txt Thu Aug 19 04:33:33 2010
@@ -0,0 +1,10 @@
+yahoo	10
+twitter	7
+facebook	10
+yahoo	15
+facebook	5
+a	1
+b	2
+c	3
+d	4
+e	5
\ No newline at end of file

Added: hadoop/pig/trunk/test/data/pigunit/top_queries_params.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/data/pigunit/top_queries_params.txt?rev=987014&view=auto
==============================================================================
--- hadoop/pig/trunk/test/data/pigunit/top_queries_params.txt (added)
+++ hadoop/pig/trunk/test/data/pigunit/top_queries_params.txt Thu Aug 19 04:33:33 2010
@@ -0,0 +1,4 @@
+n=3
+reducers=1
+input=top_queries_input_data.txt
+output=top_3_queries

Added: hadoop/pig/trunk/test/org/apache/pig/pigunit/Cluster.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/pigunit/Cluster.java?rev=987014&view=auto
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/pigunit/Cluster.java (added)
+++ hadoop/pig/trunk/test/org/apache/pig/pigunit/Cluster.java Thu Aug 19 04:33:33 2010
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the
+ * NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is
+ * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and limitations under the License.
+ */
+package org.apache.pig.pigunit;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.pig.backend.hadoop.datastorage.ConfigurationUtil;
+import org.apache.pig.impl.PigContext;
+import org.apache.pig.test.Util;
+
+/**
+ * Encapsulates all the file system operations.
+ *
+ * <p>Mainly used for copying data to the test cluster.
+ */
+public class Cluster {
+  private final Configuration configuration;
+
+  public Cluster(PigContext context) {
+    configuration = ConfigurationUtil.toConfiguration(context.getProperties());
+  }
+
+  public boolean exists(Path destination) throws IOException {
+    FileSystem fs = destination.getFileSystem(configuration);
+    return fs.exists(destination);
+  }
+
+  /**
+   * If file size has changed, or if destination does not exist yet, copy it.
+   *
+   * @param local Path to the local file to copy to the cluster.
+   * @param destination Destination path on the cluster.
+   * @throws IOException If the copy failed.
+   */
+  public void update(Path local, Path destination) throws IOException {
+    if (! exists(destination) || ! sameSize(local, destination)) {
+      copyFromLocalFile(local, destination, true);
+    }
+  }
+
+  public void copyFromLocalFile(Path local, Path destination) throws IOException {
+    copyFromLocalFile(local, destination, true);
+  }
+
+  public void copyFromLocalFile(Path local, Path destination, boolean overwrite)
+      throws IOException {
+    FileSystem fs = local.getFileSystem(configuration);
+    fs.copyFromLocalFile(false, overwrite, local, destination);
+  }
+
+  public void copyFromLocalFile(String[] content, String destination) throws IOException {
+    copyFromLocalFile(content, destination, true);
+  }
+
+  public void copyFromLocalFile(String[] content, String destination, boolean overwrite)
+      throws IOException {
+    Path file = new Path(destination);
+    FileSystem fs = file.getFileSystem(configuration);
+
+    if (overwrite && fs.exists(file)) {
+      fs.delete(file, true);
+    }
+
+    Util.createInputFile(fs, destination, content);
+  }
+
+  public void copyFromLocalFile(String[][] data) throws IOException {
+    copyFromLocalFile(data, false);
+  }
+
+  public void copyFromLocalFile(String[][] data, boolean overwrite) throws IOException {
+    for (int i = 0; i < data.length; i++) {
+      copyFromLocalFile(new Path(data[i][0]), new Path(data[i][1]), overwrite);
+    }
+  }
+
+  public FileStatus[] listStatus(Path path) throws IOException {
+    FileSystem fs = path.getFileSystem(configuration);
+    return fs.listStatus(path);
+  }
+
+  public boolean delete(Path path) throws IOException {
+    FileSystem fs = path.getFileSystem(configuration);
+    return fs.delete(path, true);
+  }
+
+  private boolean sameSize(Path local, Path destination) throws IOException {
+    FileSystem fs1 = FileSystem.getLocal(configuration);
+    FileSystem fs2 = destination.getFileSystem(configuration);
+
+    return fs1.getFileStatus(local).getLen() == fs2.getFileStatus(destination).getLen();
+  }
+}

Added: hadoop/pig/trunk/test/org/apache/pig/pigunit/MiniClusterRunner.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/pigunit/MiniClusterRunner.java?rev=987014&view=auto
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/pigunit/MiniClusterRunner.java (added)
+++ hadoop/pig/trunk/test/org/apache/pig/pigunit/MiniClusterRunner.java Thu Aug 19 04:33:33 2010
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the
+ * NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is
+ * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and limitations under the License.
+ */
+package org.apache.pig.pigunit;
+
+import org.apache.pig.test.MiniCluster;
+
+
+/**
+ * Starts an on-demand mini cluster that requires no set up.
+ *
+ * <p>It can be useful if you don't want to restart the cluster between each run of test and don't
+ * want to set up a real cluster.
+ *
+ * <p>CLASSPATH needs to contain: pig.jar and piggybank.jar
+ * <pre>
+ * export CLASSPATH=/path/pig.jar:/path/piggybank.jar
+ * java org.apache.pig.pigunit.MiniClusterRunner
+ * </pre>
+ *
+ * <p>Possible improvements
+ * <ul>
+ *   <li>add a main in MiniCluster</li>
+ *   <li>make MiniCluster configurable (number of maps...)</li>
+ *   <li>make MiniCluster use a default properties for chosing the hadoop conf dir
+ *       (e.g. minicluster.conf.dir) instead of always using
+ *       System.getProperty("user.home"), "pigtest/conf/"</li>
+ *   <li>use CLI option</li>
+ *   <li>make a shell wrapper</li>
+ * </ul>
+ */
+public class MiniClusterRunner {
+  public static void main(String[] args) {
+    System.setProperty("hadoop.log.dir", "/tmp/pigunit");
+    MiniCluster.buildCluster();
+  }
+}

Added: hadoop/pig/trunk/test/org/apache/pig/pigunit/PigTest.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/pigunit/PigTest.java?rev=987014&view=auto
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/pigunit/PigTest.java (added)
+++ hadoop/pig/trunk/test/org/apache/pig/pigunit/PigTest.java Thu Aug 19 04:33:33 2010
@@ -0,0 +1,290 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the
+ * NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is
+ * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and limitations under the License.
+ */
+package org.apache.pig.pigunit;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.io.StringReader;
+import java.io.StringWriter;
+import java.nio.MappedByteBuffer;
+import java.nio.channels.FileChannel;
+import java.nio.charset.Charset;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+
+import junit.framework.Assert;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.log4j.Logger;
+import org.apache.pig.ExecType;
+import org.apache.pig.backend.executionengine.ExecException;
+import org.apache.pig.data.DataType;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.impl.logicalLayer.schema.Schema;
+import org.apache.pig.pigunit.pig.PigServer;
+import org.apache.pig.test.MiniCluster;
+import org.apache.pig.tools.parameters.ParameterSubstitutionPreprocessor;
+import org.apache.pig.tools.parameters.ParseException;
+
+/**
+ * Pig Unit
+ *
+ * <p>Equivalent of xUnit for testing Pig.
+ *
+ * <p>Call {@link PigTest#getCluster()} then construct a test and call an assert method.
+ * Have a look to the test of this class for more example.
+ */
+public class PigTest {
+  /** The text of the Pig script to test with no substitution or change. */
+  private final String originalTextPigScript;
+  /** The list of arguments of the script. */
+  private final String[] args;
+  /** The list of file arguments of the script. */
+  private final String[] argFiles;
+  /** The list of aliases to override in the script. */
+  private final Map<String, String> aliasOverrides;
+
+  private static PigServer pig;
+  private static Cluster cluster;
+  private static final Logger LOG = Logger.getLogger(PigTest.class);
+  private static final String EXEC_MINI_CLUSTER = "pigunit.exectype.minicluster";
+  private static final String EXEC_CLUSTER = "pigunit.exectype.cluster";
+
+  /**
+   * Initializes the Pig test.
+   *
+   * @param args The list of arguments of the script.
+   * @param argFiles The list of file arguments of the script.
+   * @param pigTextScript The text of the Pig script to test with no substitution or change.
+   */
+  @SuppressWarnings("serial")
+  PigTest(String[] args, String[] argFiles, String pigTextScript) {
+    this.originalTextPigScript = pigTextScript;
+    this.args = args;
+    this.argFiles = argFiles;
+    this.aliasOverrides = new HashMap<String, String>() {{
+      put("STORE", "");
+      put("DUMP", "");
+    }};
+  }
+
+  public PigTest(String scriptPath) throws IOException {
+    this(null, null, readFile(scriptPath));
+  }
+
+  public PigTest(String[] script) {
+    this(null, null, StringUtils.join(script, "\n"));
+  }
+
+  public PigTest(String scriptPath, String[] args) throws IOException {
+    this(args, null, readFile(scriptPath));
+  }
+
+  public PigTest(String[] script, String[] args) {
+    this(args, null, StringUtils.join(script, "\n"));
+  }
+
+  public PigTest(String[] script, String[] args, String[] argsFile) {
+    this(args, argsFile, StringUtils.join(script, "\n"));
+  }
+
+  public PigTest(String scriptPath, String[] args, String[] argFiles) throws IOException {
+    this(args, argFiles, readFile(scriptPath));
+  }
+
+  public PigTest(String scriptPath, String[] args, PigServer pig, Cluster cluster)
+      throws IOException {
+    this(args, null, readFile(scriptPath));
+    PigTest.pig = pig;
+    PigTest.cluster = cluster;
+  }
+
+  /**
+   * Connects and starts if needed the PigServer.
+   *
+   * @return The cluster where input files can be copied.
+   * @throws ExecException If the PigServer can't be started.
+   */
+  public static Cluster getCluster() throws ExecException {
+    if (cluster == null) {
+      LOG.info("Using mini cluster mode");
+      if (System.getProperties().containsKey(EXEC_MINI_CLUSTER)) {
+        if (! System.getProperties().containsKey("hadoop.log.dir")) {
+          System.setProperty("hadoop.log.dir", "/tmp/pigunit");
+        }
+        MiniCluster.buildCluster();
+        pig = new PigServer(ExecType.MAPREDUCE);
+      } else if (System.getProperties().containsKey(EXEC_CLUSTER)) {
+        LOG.info("Using cluster mode");
+        pig = new PigServer(ExecType.MAPREDUCE);
+      } else {
+        LOG.info("Using default local mode");
+        pig = new PigServer(ExecType.LOCAL);
+      }
+
+      cluster = new Cluster(pig.getPigContext());
+    }
+
+    return cluster;
+  }
+
+  /**
+   * Registers a pig scripts with its variables substituted.
+   *
+   * @throws IOException If a temp file containing the pig script could not be created.
+   * @throws ParseException The pig script could not have all its variables substituted.
+   */
+  protected void registerScript() throws IOException, ParseException {
+    BufferedReader pigIStream = new BufferedReader(new StringReader(this.originalTextPigScript));
+    StringWriter pigOStream = new StringWriter();
+
+    ParameterSubstitutionPreprocessor ps = new ParameterSubstitutionPreprocessor(50);
+    ps.genSubstitutedFile(pigIStream, pigOStream, args, argFiles);
+
+    String substitutedPig = pigOStream.toString();
+    System.out.println(substitutedPig);
+
+    File f = File.createTempFile("tmp", "pigunit");
+    PrintWriter pw = new PrintWriter(f);
+    pw.println(substitutedPig);
+    pw.close();
+
+    String pigSubstitutedFile = f.getCanonicalPath();
+    pig.registerScript(pigSubstitutedFile, aliasOverrides);
+  }
+
+  /**
+   * Executes the Pig script with its current overrides.
+   *
+   * @throws IOException If a temp file containing the pig script could not be created.
+   * @throws ParseException The pig script could not have all its variables substituted.
+   */
+  public void runScript() throws IOException, ParseException {
+    registerScript();
+  }
+
+  /**
+   * Gets an iterator on the content of one alias of the script.
+   *
+   * <p>For now use a giant String in order to display all the differences in one time. It might not
+   * work with giant expected output.
+   * @throws ParseException If the Pig script could not be parsed.
+   * @throws IOException If the Pig script could not be executed correctly.
+   */
+  public Iterator<Tuple> getAlias(String alias) throws IOException, ParseException {
+    registerScript();
+    return pig.openIterator(alias);
+  }
+
+  /**
+   * Gets an iterator on the content of the latest STORE alias of the script.
+   *
+   * @throws ParseException If the Pig script could not be parsed.
+   * @throws IOException If the Pig script could not be executed correctly.
+   */
+  public Iterator<Tuple> getAlias() throws IOException, ParseException {
+    registerScript();
+    String alias = aliasOverrides.get("LAST_STORE_ALIAS");
+
+    return getAlias(alias);
+  }
+
+  /**
+   * Replaces the query of an aliases by another query.
+   *
+   * <p>For example:
+   *
+   * <pre>
+   * B = FILTER A BY count > 5;
+   * overridden with:
+   * &lt;B, B = FILTER A BY name == 'Pig';&gt;
+   * becomes
+   * B = FILTER A BY name == 'Pig';
+   * </pre>
+   *
+   * @param alias The alias to override.
+   * @param query The new value of the alias.
+   */
+  public void override(String alias, String query) {
+    aliasOverrides.put(alias, query);
+  }
+
+  public void unoverride(String alias) {
+    aliasOverrides.remove(alias);
+  }
+
+  public void assertOutput(String[] expected) throws IOException, ParseException {
+    registerScript();
+    String alias = aliasOverrides.get("LAST_STORE_ALIAS");
+
+    assertEquals(StringUtils.join(expected, "\n"), StringUtils.join(getAlias(alias), "\n"));
+  }
+
+  public void assertOutput(String alias, String[] expected) throws IOException, ParseException {
+    registerScript();
+
+    assertEquals(StringUtils.join(expected, "\n"), StringUtils.join(getAlias(alias), "\n"));
+  }
+
+  public void assertOutput(File expected) throws IOException, ParseException {
+    registerScript();
+    String alias = aliasOverrides.get("LAST_STORE_ALIAS");
+
+    assertEquals(readFile(expected), StringUtils.join(getAlias(alias), "\n"));
+  }
+
+  public void assertOutput(String alias, File expected) throws IOException, ParseException {
+    registerScript();
+
+    assertEquals(readFile(expected), StringUtils.join(getAlias(alias), "\n"));
+  }
+
+  public void assertOutput(String aliasInput, String[] input, String alias, String[] expected)
+      throws IOException, ParseException {
+    registerScript();
+
+    StringBuilder sb = new StringBuilder();
+    Schema.stringifySchema(sb, pig.dumpSchema(aliasInput), DataType.TUPLE) ;
+
+    final String destination = "pigunit-input-overriden.txt";
+    cluster.copyFromLocalFile(input, destination, true);
+    override(aliasInput,
+        String.format("%s = LOAD '%s' AS %s;", aliasInput, destination, sb.toString()));
+
+    assertOutput(alias, expected);
+  }
+
+  protected void assertEquals(String expected, String current) {
+    Assert.assertEquals(expected, current);
+  }
+
+  private static String readFile(String path) throws IOException {
+    return readFile(new File(path));
+  }
+
+  private static String readFile(File file) throws IOException {
+    FileInputStream stream = new FileInputStream(file);
+    try {
+      FileChannel fc = stream.getChannel();
+      MappedByteBuffer bb = fc.map(FileChannel.MapMode.READ_ONLY, 0, fc.size());
+      return Charset.defaultCharset().decode(bb).toString();
+    }
+    finally {
+      stream.close();
+    }
+  }
+}

Added: hadoop/pig/trunk/test/org/apache/pig/pigunit/pig/GruntParser.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/pigunit/pig/GruntParser.java?rev=987014&view=auto
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/pigunit/pig/GruntParser.java (added)
+++ hadoop/pig/trunk/test/org/apache/pig/pigunit/pig/GruntParser.java Thu Aug 19 04:33:33 2010
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the
+ * NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is
+ * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and limitations under the License.
+ */
+package org.apache.pig.pigunit.pig;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+
+/**
+ * Slightly modified GruntParser that accepts a list of aliases to override.
+ *
+ * <p>This is a way to replace a pig query by another query.
+ *
+ * <p>For example, if we have this map of overrides: Map&lt;alias,query&gt;
+ * <ul>
+ *   <li>&lt;A, A = LOAD '/path'&gt; --> replace the alias A by A = LOAD '/path'</li>
+ *   <li>&lt;DUMP, &gt; --> remove the DUMP queries</li>
+ * </ul>
+ *
+ * <p>It might be possible to do the same thing in a less hacky way.
+ * e.g. pig.registerQuery replace the query of a certain alias...
+ */
+public class GruntParser extends org.apache.pig.tools.grunt.GruntParser {
+  /** A mapping <alias,query> to apply to the pig script. */
+  private final Map<String, String> aliasOverride;
+
+  /**
+   * Initializes the Pig parser with its list of aliases to override.
+   *
+   * @param stream The Pig script stream.
+   * @param aliasOverride The list of aliases to override in the Pig script.
+   */
+  public GruntParser(Reader stream, Map<String, String> aliasOverride) {
+    super(stream);
+    this.aliasOverride = aliasOverride;
+  }
+
+  /**
+   * Pig relations that have been blanked are dropped.
+   */
+  @Override
+  protected void processPig(String cmd) throws IOException {
+    String command = override(cmd);
+
+    if (! command.equals("")) {
+      super.processPig(command);
+    }
+  }
+
+  /**
+   * Overrides the relations of the pig script that we want to change.
+   *
+   * @param query The current pig query processed by the parser.
+   * @return The same query, or a modified query, or blank.
+   */
+  public String override(String query) {
+    Map<String, String> metaData = new HashMap<String, String>();
+
+    for (Entry<String, String> alias : aliasOverride.entrySet()) {
+      saveLastStoreAlias(query, metaData);
+
+      if (query.toLowerCase().startsWith(alias.getKey().toLowerCase() + " ")) {
+        System.out.println(
+            String.format("%s\n--> %s", query, alias.getValue() == "" ? "none" : alias.getValue()));
+        query = alias.getValue();
+      }
+    }
+
+    aliasOverride.putAll(metaData);
+
+    return query;
+  }
+
+  /**
+   * Saves the name of the alias of the last store.
+   *
+   * <p>Maybe better to replace it by PigServer.getPigContext().getLastAlias().
+   */
+  void saveLastStoreAlias(String cmd, Map<String, String> metaData) {
+    if (cmd.toUpperCase().startsWith("STORE")) {
+      Pattern outputFile = Pattern.compile("STORE +([^']+) INTO.*", Pattern.CASE_INSENSITIVE);
+      Matcher matcher = outputFile.matcher(cmd);
+      if (matcher.matches()) {
+        metaData.put("LAST_STORE_ALIAS", matcher.group(1));
+      }
+    }
+  }
+}

Added: hadoop/pig/trunk/test/org/apache/pig/pigunit/pig/PigServer.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/pigunit/pig/PigServer.java?rev=987014&view=auto
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/pigunit/pig/PigServer.java (added)
+++ hadoop/pig/trunk/test/org/apache/pig/pigunit/pig/PigServer.java Thu Aug 19 04:33:33 2010
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the
+ * NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is
+ * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and limitations under the License.
+ */
+package org.apache.pig.pigunit.pig;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.pig.ExecType;
+import org.apache.pig.backend.executionengine.ExecException;
+
+/**
+ * Slightly modified PigServer that accepts a list of Pig aliases to override.
+ *
+ * <p>The list is given to the GruntParser.
+ */
+public class PigServer extends org.apache.pig.PigServer {
+
+  public PigServer(ExecType execType, Properties properties) throws ExecException {
+    super(execType, properties);
+  }
+
+  public PigServer(ExecType execType) throws ExecException {
+    super(execType);
+  }
+
+  /**
+   * Parses and registers the pig script.
+   *
+   * @param fileName The Pig script file.
+   * @param aliasOverride The list of aliases to override in the Pig script.
+   * @throws IOException If the Pig script can't be parsed correctly.
+   */
+  public void registerScript(String fileName, Map<String, String> aliasOverride)
+      throws IOException {
+    try {
+      GruntParser grunt = new GruntParser(new FileReader(new File(fileName)), aliasOverride);
+      grunt.setInteractive(false);
+      grunt.setParams(this);
+      grunt.parseStopOnError(true);
+    } catch (FileNotFoundException e) {
+      e.printStackTrace();
+      throw new IOException(e.getCause());
+    } catch (org.apache.pig.tools.pigscript.parser.ParseException e) {
+      e.printStackTrace();
+      throw new IOException(e.getCause());
+    }
+  }
+}

Added: hadoop/pig/trunk/test/org/apache/pig/test/pigunit/TestPigTest.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/pigunit/TestPigTest.java?rev=987014&view=auto
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/pigunit/TestPigTest.java (added)
+++ hadoop/pig/trunk/test/org/apache/pig/test/pigunit/TestPigTest.java Thu Aug 19 04:33:33 2010
@@ -0,0 +1,301 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the
+ * NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is
+ * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and limitations under the License.
+ */
+package org.apache.pig.test.pigunit;
+
+import java.io.File;
+import java.io.IOException;
+
+import junit.framework.TestCase;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.fs.Path;
+import org.apache.pig.pigunit.Cluster;
+import org.apache.pig.pigunit.PigTest;
+import org.apache.pig.pigunit.pig.PigServer;
+import org.apache.pig.tools.parameters.ParseException;
+import org.junit.BeforeClass;
+import org.junit.Ignore;
+import org.junit.Test;
+
+/**
+ * <p>Various examples about how to use PigUnit.
+ *
+ * <p>Requires in CLASSPATH:
+ * <ul>
+ *   <li>pig.jar</li>
+ *   <li>pigunit.jar</li>
+ *   <li>hadoop_conf_dir to current/future cluster if not using LOCAL mode</li>
+ * </ul>
+ */
+public class TestPigTest {
+  private PigTest test;
+  private static Cluster cluster;
+  private static final String PIG_SCRIPT = "test/data/pigunit/top_queries.pig";
+
+  @BeforeClass
+  public static void setUpOnce() throws IOException {
+    cluster = PigTest.getCluster();
+
+    cluster.update(
+        new Path("test/data/pigunit/top_queries_input_data.txt"),
+        new Path("top_queries_input_data.txt"));
+  }
+
+  @Test
+  public void testNtoN() throws ParseException, IOException {
+    String[] args = {
+        "n=3",
+        "reducers=1",
+        "input=top_queries_input_data.txt",
+        "output=top_3_queries",
+        };
+    test = new PigTest(PIG_SCRIPT, args);
+
+    String[] output = {
+        "(yahoo,25)",
+        "(facebook,15)",
+        "(twitter,7)",
+    };
+
+    test.assertOutput("queries_limit", output);
+  }
+
+  @Test
+  public void testImplicitNtoN() throws ParseException, IOException {
+    String[] args = {
+        "n=3",
+        "reducers=1",
+        "input=top_queries_input_data.txt",
+        "output=top_3_queries",
+        };
+    test = new PigTest(PIG_SCRIPT, args);
+
+    String[] output = {
+        "(yahoo,25)",
+        "(facebook,15)",
+        "(twitter,7)",
+    };
+
+    test.assertOutput(output);
+  }
+
+  @Test
+  public void testTextInput() throws ParseException, IOException  {
+    String[] args = {
+        "n=3",
+        "reducers=1",
+        "input=top_queries_input_data.txt",
+        "output=top_3_queries",
+        };
+    test = new PigTest(PIG_SCRIPT, args);
+
+    String[] input = {
+        "yahoo\t10",
+        "twitter\t7",
+        "facebook\t10",
+        "yahoo\t15",
+        "facebook\t5",
+        "a\t1",
+        "b\t2",
+        "c\t3",
+        "d\t4",
+        "e\t5",
+    };
+
+    String[] output = {
+        "(yahoo,25)",
+        "(facebook,15)",
+        "(twitter,7)",
+    };
+
+    test.assertOutput("data", input, "queries_limit", output);
+  }
+
+  @Test
+  public void testSubset() throws ParseException, IOException  {
+    String[] args = {
+        "n=3",
+        "reducers=1",
+        "input=top_queries_input_data.txt",
+        "output=top_3_queries",
+        };
+    test = new PigTest(PIG_SCRIPT, args);
+
+    String[] input = {
+        "yahoo\t10",
+        "twitter\t7",
+        "facebook\t10",
+        "yahoo\t15",
+        "facebook\t5",
+        "a\t1",
+        "b\t2",
+        "c\t3",
+        "d\t4",
+        "e\t5",
+    };
+
+    String[] output = {
+        "(yahoo,25)",
+        "(facebook,15)",
+        "(twitter,7)",
+    };
+
+    test.assertOutput("data", input, "queries_limit", output);
+  }
+
+  @Test
+  public void testOverride() throws ParseException, IOException  {
+    String[] args = {
+        "n=3",
+        "reducers=1",
+        "input=top_queries_input_data.txt",
+        "output=top_3_queries",
+        };
+    test = new PigTest(PIG_SCRIPT, args);
+
+    test.override("queries_limit", "queries_limit = LIMIT queries_ordered 2;");
+
+    String[] output = {
+        "(yahoo,25)",
+        "(facebook,15)",
+    };
+
+    test.assertOutput(output);
+  }
+
+  @Test
+  public void testInlinePigScript() throws ParseException, IOException  {
+    String[] script = {
+        "data = LOAD 'top_queries_input_data.txt' AS (query:CHARARRAY, count:INT);",
+        "queries_group = GROUP data BY query PARALLEL 1;",
+        "queries_sum = FOREACH queries_group GENERATE group AS query, SUM(data.count) AS count;",
+        "queries_ordered = ORDER queries_sum BY count DESC PARALLEL 1;",
+        "queries_limit = LIMIT queries_ordered 3;",
+        "STORE queries_limit INTO 'top_3_queries';",
+    };
+
+    test = new PigTest(script);
+
+    String[] output = {
+        "(yahoo,25)",
+        "(facebook,15)",
+        "(twitter,7)",
+    };
+
+    test.assertOutput(output);
+  }
+
+  @Test
+  public void testFileOutput() throws ParseException, IOException {
+    String[] args = {
+        "n=3",
+        "reducers=1",
+        "input=top_queries_input_data.txt",
+        "output=top_3_queries",
+        };
+    test = new PigTest(PIG_SCRIPT, args);
+
+    test.assertOutput(new File("test/data/pigunit/top_queries_expected_top_3.txt"));
+  }
+
+  @Test
+  public void testArgFiles() throws ParseException, IOException {
+    String[] argsFile = {
+        "test/data/pigunit/top_queries_params.txt"
+    };
+
+    test = new PigTest(PIG_SCRIPT, null, argsFile);
+
+    test.assertOutput(new File("test/data/pigunit/top_queries_expected_top_3.txt"));
+  }
+
+  @Test
+  public void testGetLastAlias() throws ParseException, IOException  {
+    String[] script = {
+        "data = LOAD 'top_queries_input_data.txt' AS (query:CHARARRAY, count:INT);",
+        "queries_group = GROUP data BY query PARALLEL 1;",
+        "queries_sum = FOREACH queries_group GENERATE group AS query, SUM(data.count) AS count;",
+        "queries_ordered = ORDER queries_sum BY count DESC PARALLEL 1;",
+        "queries_limit = LIMIT queries_ordered 3;",
+        "STORE queries_limit INTO 'top_3_queries';",
+    };
+
+    test = new PigTest(script);
+
+    String expected =
+        "(yahoo,25)\n" +
+        "(facebook,15)\n" +
+        "(twitter,7)";
+
+    TestCase.assertEquals(expected, StringUtils.join(test.getAlias("queries_limit"), "\n"));
+  }
+
+  @Test
+  public void testWithUdf() throws ParseException, IOException  {
+    String[] script = {
+     // "REGISTER myIfNeeded.jar;",
+        "DEFINE TOKENIZE TOKENIZE();",
+        "data = LOAD 'top_queries_input_data.txt' AS (query:CHARARRAY, count:INT);",
+        "queries = FOREACH data GENERATE query, TOKENIZE(query) AS query_tokens;",
+        "queries_ordered = ORDER queries BY query DESC PARALLEL 1;",
+        "queries_limit = LIMIT queries_ordered 3;",
+        "STORE queries_limit INTO 'top_3_queries';",
+    };
+
+    test = new PigTest(script);
+
+    String[] output = {
+        "(yahoo,{(yahoo)})",
+        "(yahoo,{(yahoo)})",
+        "(twitter,{(twitter)})",
+    };
+
+    test.assertOutput(output);
+  }
+
+  @Test
+  public void testStore() throws ParseException, IOException {
+    String[] args = {
+        "n=3",
+        "reducers=1",
+        "input=top_queries_input_data.txt",
+        "output=top_3_queries",
+        };
+    test = new PigTest(PIG_SCRIPT, args);
+
+    // By default PigUnit removes all the STORE and DUMP
+    test.unoverride("STORE");
+
+    test.runScript();
+
+    TestCase.assertTrue(cluster.delete(new Path("top_3_queries")));
+  }
+
+  @Ignore("Not ready yet")
+  @Test
+  public void testWithMock() throws ParseException, IOException {
+    String[] args = {
+        "n=3",
+        "reducers=1",
+        "input=top_queries_input_data.txt",
+        "output=top_3_queries",
+        };
+
+    PigServer mockServer = null;
+    Cluster mockCluster = null;
+
+    test = new PigTest(PIG_SCRIPT, args, mockServer, mockCluster);
+
+    test.assertOutput(new File("data/top_queries_expected_top_3.txt"));
+  }
+}

Added: hadoop/pig/trunk/test/org/apache/pig/test/pigunit/pig/TestGruntParser.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/pigunit/pig/TestGruntParser.java?rev=987014&view=auto
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/pigunit/pig/TestGruntParser.java (added)
+++ hadoop/pig/trunk/test/org/apache/pig/test/pigunit/pig/TestGruntParser.java Thu Aug 19 04:33:33 2010
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the
+ * NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is
+ * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and limitations under the License.
+ */
+package org.apache.pig.test.pigunit.pig;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.HashMap;
+import java.util.Map;
+
+import junit.framework.Assert;
+
+import org.apache.pig.pigunit.pig.GruntParser;
+import org.junit.Before;
+import org.junit.Test;
+
+
+public class TestGruntParser {
+  private GruntParser parser;
+  private Map<String, String> override;
+
+  @SuppressWarnings("serial")
+  @Before
+  public void setUp() {
+    override = new HashMap<String, String>() {{
+      put("STORE", "");
+      put("DUMP", "");
+    }};
+    parser = new GruntParser(new StringReader(""), override);
+  }
+
+  @Test
+  public void testRemoveStores() throws IOException {
+    Assert.assertEquals("", parser.override("STORE output INTO '/path';"));
+
+    override.remove("STORE");
+    Assert.assertEquals(
+        "STORE output INTO '/path';", parser.override("STORE output INTO '/path';"));
+  }
+
+  @Test
+  public void testRemoveDumps() throws IOException {
+    Assert.assertEquals("", parser.override("DUMP output;"));
+
+    override.remove("DUMP");
+    Assert.assertEquals("DUMP output;", parser.override("DUMP output;"));
+  }
+
+  @Test
+  public void testReplaceLoad() throws IOException {
+    override.put("A", "A = LOAD 'file';");
+    Assert.assertEquals(
+        "A = LOAD 'file';",
+        parser.override("A = LOAD 'input.txt' AS (query:CHARARRAY);"));
+  }
+
+  @Test
+  public void testGetStoreAlias() throws IOException {
+    override.remove("STORE");
+    parser.override("STORE output INTO '/path'");
+    Assert.assertEquals("output", override.get("LAST_STORE_ALIAS"));
+  }
+}

Added: hadoop/pig/trunk/test/pigunit-tests
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/pigunit-tests?rev=987014&view=auto
==============================================================================
--- hadoop/pig/trunk/test/pigunit-tests (added)
+++ hadoop/pig/trunk/test/pigunit-tests Thu Aug 19 04:33:33 2010
@@ -0,0 +1,2 @@
+**/TestPigTest.java
+**/TestGruntParser.java