You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ga...@apache.org on 2010/08/19 06:33:34 UTC
svn commit: r987014 - in /hadoop/pig/trunk: ./ ivy/
src/docs/src/documentation/content/xdocs/ test/ test/data/
test/data/pigunit/ test/org/apache/pig/pigunit/
test/org/apache/pig/pigunit/pig/ test/org/apache/pig/test/pigunit/
test/org/apache/pig/test/p...
Author: gates
Date: Thu Aug 19 04:33:33 2010
New Revision: 987014
URL: http://svn.apache.org/viewvc?rev=987014&view=rev
Log:
PIG-1404: added PigUnit, a framework fo building unit tests of Pig Latin scripts.
Added:
hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/pigunit.xml
hadoop/pig/trunk/test/data/
hadoop/pig/trunk/test/data/pigunit/
hadoop/pig/trunk/test/data/pigunit/top_queries.pig
hadoop/pig/trunk/test/data/pigunit/top_queries_expected_top_3.txt
hadoop/pig/trunk/test/data/pigunit/top_queries_input_data.txt
hadoop/pig/trunk/test/data/pigunit/top_queries_params.txt
hadoop/pig/trunk/test/org/apache/pig/pigunit/
hadoop/pig/trunk/test/org/apache/pig/pigunit/Cluster.java
hadoop/pig/trunk/test/org/apache/pig/pigunit/MiniClusterRunner.java
hadoop/pig/trunk/test/org/apache/pig/pigunit/PigTest.java
hadoop/pig/trunk/test/org/apache/pig/pigunit/pig/
hadoop/pig/trunk/test/org/apache/pig/pigunit/pig/GruntParser.java
hadoop/pig/trunk/test/org/apache/pig/pigunit/pig/PigServer.java
hadoop/pig/trunk/test/org/apache/pig/test/pigunit/
hadoop/pig/trunk/test/org/apache/pig/test/pigunit/TestPigTest.java
hadoop/pig/trunk/test/org/apache/pig/test/pigunit/pig/
hadoop/pig/trunk/test/org/apache/pig/test/pigunit/pig/TestGruntParser.java
hadoop/pig/trunk/test/pigunit-tests
Modified:
hadoop/pig/trunk/CHANGES.txt
hadoop/pig/trunk/build.xml
hadoop/pig/trunk/ivy.xml
hadoop/pig/trunk/ivy/libraries.properties
hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/site.xml
Modified: hadoop/pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=987014&r1=987013&r2=987014&view=diff
==============================================================================
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Thu Aug 19 04:33:33 2010
@@ -26,6 +26,8 @@ PIG-1249: Safe-guards against misconfigu
IMPROVEMENTS
+PIG-1404: added PigUnit, a framework fo building unit tests of Pig Latin scripts (romainr via gates)
+
PIG-1452: to remove hadoop20.jar from lib and use hadoop from the apache maven
repo. (rding)
Modified: hadoop/pig/trunk/build.xml
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/build.xml?rev=987014&r1=987013&r2=987014&view=diff
==============================================================================
--- hadoop/pig/trunk/build.xml (original)
+++ hadoop/pig/trunk/build.xml Thu Aug 19 04:33:33 2010
@@ -84,6 +84,10 @@
<property name="test.unit.file" value="${test.src.dir}/unit-tests"/>
<property name="test.smoke.file" value="${test.src.dir}/smoke-tests"/>
<property name="test.all.file" value="${test.src.dir}/all-tests"/>
+ <property name="pigunit.jarfile" value="pigunit.jar" />
+ <property name="test.pigunit.src.dir" value="${test.src.dir}/org/apache/pig/test/pigunit" />
+ <property name="commons-lang.jarfile" value="commons-lang-2.4.jar" />
+ <property name="test.pigunit.file" value="${test.src.dir}/pigunit-tests"/>
<!-- test configuration, use ${user.home}/build.properties to configure values -->
@@ -607,6 +611,7 @@
<exclude name="**/TestOrderBy2.java" />
<exclude name="**/TestPi.java" />
<exclude name="**/nightly/**" />
+ <!-- <exclude name="**/pigunit/**" /> -->
<exclude name="**/${exclude.testcase}.java" if="exclude.testcase" />
</fileset>
</batchtest>
@@ -638,6 +643,22 @@
</target>
<!-- ================================================================== -->
+ <!-- Pigunit -->
+ <!-- ================================================================== -->
+
+ <target depends="compile-test" name="pigunit-jar" description="create the pigunit jar file">
+ <echo> *** Creating pigunit.jar ***</echo>
+ <jar destfile="${pigunit.jarfile}">
+ <fileset dir="${test.build.classes}/org/apache/pig/pigunit/"/>
+ <zipfileset src="${ivy.lib.dir}/${commons-lang.jarfile}" />
+ </jar>
+ </target>
+
+ <target name="test-pigunit" depends="compile-test,jar-withouthadoop, pigunit-jar" description="Run tests that test PigUnit">
+ <macro-test-runner test.file="${test.pigunit.file}" />
+ </target>
+
+ <!-- ================================================================== -->
<!-- D I S T R I B U T I O N -->
<!-- ================================================================== -->
<target name="package" depends="docs, api-report" description="Create a Pig release">
Modified: hadoop/pig/trunk/ivy.xml
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/ivy.xml?rev=987014&r1=987013&r2=987014&view=diff
==============================================================================
--- hadoop/pig/trunk/ivy.xml (original)
+++ hadoop/pig/trunk/ivy.xml Thu Aug 19 04:33:33 2010
@@ -84,8 +84,11 @@
<dependency org="org.codehaus.jackson" name="jackson-core-asl" rev="${jackson.version}"
conf="compile->master"/>
<dependency org="joda-time" name="joda-time" rev="${joda-time.version}" conf="compile->master"/>
+ <dependency org="commons-lang" name="commons-lang" rev="${commons-lang.version}"
+ conf="compile->master"/>
<dependency org="com.google.guava" name="guava" rev="${guava.version}" conf="compile->master" />
- <dependency org="org.python" name="jython" rev="${jython.version}" conf="compile->master"/>
+ <dependency org="org.python" name="jython" rev="${jython.version}" conf="compile->master"/>
+
<!--ATM hbase, hbase-test.jar, hadoop.jar are resolved from the lib dir-->
<dependency org="hsqldb" name="hsqldb" rev="${hsqldb.version}"
conf="test->default" />
Modified: hadoop/pig/trunk/ivy/libraries.properties
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/ivy/libraries.properties?rev=987014&r1=987013&r2=987014&view=diff
==============================================================================
--- hadoop/pig/trunk/ivy/libraries.properties (original)
+++ hadoop/pig/trunk/ivy/libraries.properties Thu Aug 19 04:33:33 2010
@@ -21,6 +21,7 @@ commons-beanutils.version=1.7.0
commons-cli.version=1.0
commons-el.version=1.0
commons-logging.version=1.0.3
+commons-lang.version=2.4
checkstyle.version=4.2
ivy.version=2.2.0-rc1
Added: hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/pigunit.xml
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/pigunit.xml?rev=987014&view=auto
==============================================================================
--- hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/pigunit.xml (added)
+++ hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/pigunit.xml Thu Aug 19 04:33:33 2010
@@ -0,0 +1,271 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+ <!--
+ Copyright 2002-2004 The Apache Software Foundation Licensed under the Apache License, Version
+ 2.0 (the "License"); you may not use this file except in compliance with the License. You may
+ obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by
+ applicable law or agreed to in writing, software distributed under the License is distributed on
+ an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See
+ the License for the specific language governing permissions and limitations under the License.
+ -->
+
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN"
+ "http://forrest.apache.org/dtd/document-v20.dtd">
+
+<document>
+ <header>
+ <title>PigUnit - Pig script testing simplified.</title>
+ </header>
+ <body>
+
+ <section>
+ <title>Overview</title>
+ <p>The goal is to provide a simple xUnit framework that enables our Pig scripts to be easily:
+ </p>
+ <ol>
+ <li>
+ <p>unit tested</p>
+ </li>
+ <li>
+ <p>regression tested</p>
+ </li>
+ <li>
+ <p>quickly prototyped</p>
+ </li>
+ </ol>
+
+ <p>No cluster set up is required.</p>
+ </section>
+
+ <section>
+ <title>PigUnit Example</title>
+ <p>Computing top queries, specifying the input data and expected output of the script.</p>
+ <p>Java test</p>
+ <source>
+ @Test
+ public void testTop3Queries() {
+ String[] args = {
+ "n=3",
+ };
+ test = new PigTest("top_queries.pig", args);
+
+ String[] input = {
+ "yahoo\t10",
+ "twitter\t7",
+ "facebook\t10",
+ "yahoo\t15",
+ "facebook\t5",
+ ....
+ };
+
+ String[] output = {
+ "(yahoo,25L)",
+ "(facebook,15L)",
+ "(twitter,7L)",
+ };
+
+ test.assertOutput("data", input, "queries_limit", output);
+ }
+ </source>
+ <p>top_queries.pig</p>
+ <source>
+data =
+ LOAD '$input'
+ AS (query:CHARARRAY, count:INT);
+
+ ...
+
+queries_sum =
+ FOREACH queries_group
+ GENERATE
+ group AS query,
+ SUM(queries.count) AS count;
+
+ ...
+
+queries_limit = LIMIT queries_ordered $n;
+
+STORE queries_limit INTO '$output';
+</source>
+
+ <p>You just need two jar files in your classpath:</p>
+ <ol>
+ <li>pig.jar</li>
+ <li>pigunit.jar</li>
+ </ol>
+
+ <p>
+ Many examples are available in the
+ <a
+ href="http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/pigunit/TestPigTest.java"
+ >PigUnit tests</a>
+ .
+ </p>
+ </section>
+
+ <section>
+ <title>Cluster</title>
+
+ <p>They are 2 main modes:</p>
+ <ol>
+ <li>LOCAL</li>
+ <li>MAPREDUCE</li>
+ </ol>
+
+ <section>
+ <title>LOCAL</title>
+ <p>
+ This is using the local mode of Pig.
+ It will be used by default.
+ </p>
+
+ <p>It will go fast and use your local file system as a HDFS cluster.</p>
+ </section>
+
+
+ <section>
+ <title>MAPREDUCE</title>
+ <p>This is using a real Hadoop cluster.
+ The cluster selected will be the first specified in
+ the CLASSPATH (same
+ way as the HADOOP_CONF_DIR variable works). You
+ can also choose to have
+ a test cluster automatically
+ starting/stopping or you cab reuse an already
+ running cluster.
+ </p>
+
+ <section>
+ <title>On demand cluster</title>
+ <p>
+ The default mode is using a local MiniCluster that is started at the very beginning
+ and
+ shutdown automatically at the end of the test run.
+ No setup needed which is really
+ helpful. The cluster will contain no data each time it is
+ started, but data can be
+ copied
+ to it as shown in the examples.
+
+ You can select this mode by setting the Java property
+ <code>"pigunit.exectype.minicluster"</code>
+ to "true".
+ </p>
+ <p>It can be set in Java or on the command line:</p>
+ <ol>
+ <li>
+ <code>System.setProperty("pigunit.exectype.cluster", "true");</code>
+ </li>
+ <li>
+ <code>-Dpigunit.exectype.cluste=true</code>
+ </li>
+ </ol>
+ <p>
+ The
+ <code>HADOOP_CONF_DIR</code>
+ path will be
+ <code>~/pigtest/conf</code>
+ and it will be required in the CLASSPATH.
+ The path to the log directory is set by the
+ Java property
+ <code>"hadoop.log.dir"</code>
+ (default is "/tmp/pigunit").
+ </p>
+ </section>
+
+ <section>
+ <title>Existing cluster</title>
+ <p>
+ If
+ <code>"pigunit.exectype.cluster"</code>
+ property is set, the first xml configuration of an Hadoop cluster found in the
+ CLASSPATH
+ will be used.
+
+ Notice that PigUnit comes with a standalone MiniCluster that
+ can be started
+ externally with:
+ </p>
+ <source>
+java -cp .../pig.jar:.../pigunit.jar org.apache.pig.pigunit.MiniClusterRunner
+</source>
+ <p>This is really useful when doing some prototyping in order to have a test cluster
+ ready.</p>
+ </section>
+ </section>
+ </section>
+
+ <section>
+ <title>Building</title>
+ <p>In order to compile pigunit.jar, go in pig trunk:</p>
+ <source>
+$pig_trunk ant compile-test
+$pig_trunk ant
+$pig_trunk ant pigunit-jar
+</source>
+ </section>
+
+ <section>
+ <title>Troubleshooting</title>
+ <p>Common problems</p>
+ <section>
+ <title>CLASSPATH in MAPREDUCE mode</title>
+ <p>When used in MAPREDUCE mode, do not forget the HADOOP_CONF_DIR of your cluster in
+ your
+ CLASSPATH.</p>
+ <p>
+ It is
+ <code>~/pigtest/conf</code>
+ by default
+ </p>
+ <source>
+org.apache.pig.backend.executionengine.ExecException: ERROR 4010: Cannot find hadoop configurations in classpath (neither hadoop-site.xml nor core-site.xml was found in the classpath).If you plan to use local mode, please put -x local option in command line
+ </source>
+ </section>
+
+ <section>
+ <title>UDF jars not found</title>
+ <p>This error means that you are missing some jars in your test environment.</p>
+ <source>
+WARN util.JarManager: Couldn't find the jar for org.apache.pig.piggybank.evaluation.string.LOWER, skip it
+ </source>
+ </section>
+
+ <section>
+ <title>STORING data</title>
+ <p>Currently pig is dropping all the STORE/DUMP commands but you can tell PigUnit to
+ keep
+ them and execute the script.</p>
+ <source>
+test = new PigTest(PIG_SCRIPT, args);
+test.unoverride("STORE");
+test.runScript();
+</source>
+ </section>
+
+ <section>
+ <title>Cache archive</title>
+ <p>It works, your test environment will need to have the cache archive options
+ specified by
+ Java properties or in an additional XML configuration in its
+ CLASSPATH.</p>
+ <p>If you use a local cluster, you will need to set the required environment
+ variables before
+ starting it, e.g.</p>
+ <source>export LD_LIBRARY_PATH=/home/path/to/lib</source>
+ </section>
+ </section>
+
+ <section>
+ <title>Future</title>
+ <p>Improvement and other components based on PigUnit that could be built later.</p>
+ <p>We could build on top of PigTest a PigTestCase and PigTestSuite in order to have:</p>
+ <ol>
+ <li>notion of workspaces for each test</li>
+ <li>removing the boiler plate code appearing when there is more than one test
+ methods</li>
+ <li>standalone utility that reads test configuration and generates a test report...</li>
+ </ol>
+ </section>
+ </body>
+</document>
Modified: hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/site.xml
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/site.xml?rev=987014&r1=987013&r2=987014&view=diff
==============================================================================
--- hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/site.xml (original)
+++ hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/site.xml Thu Aug 19 04:33:33 2010
@@ -47,6 +47,7 @@ See http://forrest.apache.org/docs/linki
<plref2 label="Pig Latin 2" href="piglatin_ref2.html" />
<cookbook label="Cookbook" href="cookbook.html" />
<udf label="UDFs" href="udf.html" />
+ <udf label="PigUnit" href="pigunit.html" />
</docs>
<docs label="Zebra">
Added: hadoop/pig/trunk/test/data/pigunit/top_queries.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/data/pigunit/top_queries.pig?rev=987014&view=auto
==============================================================================
--- hadoop/pig/trunk/test/data/pigunit/top_queries.pig (added)
+++ hadoop/pig/trunk/test/data/pigunit/top_queries.pig Thu Aug 19 04:33:33 2010
@@ -0,0 +1,27 @@
+---------------------------------------------------------------------
+-- Top N Queries.
+---------------------------------------------------------------------
+
+data =
+ LOAD '$input'
+ AS (query:CHARARRAY, count:INT);
+
+queries_group =
+ GROUP data
+ BY query
+ PARALLEL $reducers;
+
+queries_sum =
+ FOREACH queries_group
+ GENERATE
+ group AS query,
+ SUM(data.count) AS count;
+
+queries_ordered =
+ ORDER queries_sum
+ BY count DESC
+ PARALLEL $reducers;
+
+queries_limit = LIMIT queries_ordered $n;
+
+STORE queries_limit INTO '$output';
Added: hadoop/pig/trunk/test/data/pigunit/top_queries_expected_top_3.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/data/pigunit/top_queries_expected_top_3.txt?rev=987014&view=auto
==============================================================================
--- hadoop/pig/trunk/test/data/pigunit/top_queries_expected_top_3.txt (added)
+++ hadoop/pig/trunk/test/data/pigunit/top_queries_expected_top_3.txt Thu Aug 19 04:33:33 2010
@@ -0,0 +1,3 @@
+(yahoo,25)
+(facebook,15)
+(twitter,7)
\ No newline at end of file
Added: hadoop/pig/trunk/test/data/pigunit/top_queries_input_data.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/data/pigunit/top_queries_input_data.txt?rev=987014&view=auto
==============================================================================
--- hadoop/pig/trunk/test/data/pigunit/top_queries_input_data.txt (added)
+++ hadoop/pig/trunk/test/data/pigunit/top_queries_input_data.txt Thu Aug 19 04:33:33 2010
@@ -0,0 +1,10 @@
+yahoo 10
+twitter 7
+facebook 10
+yahoo 15
+facebook 5
+a 1
+b 2
+c 3
+d 4
+e 5
\ No newline at end of file
Added: hadoop/pig/trunk/test/data/pigunit/top_queries_params.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/data/pigunit/top_queries_params.txt?rev=987014&view=auto
==============================================================================
--- hadoop/pig/trunk/test/data/pigunit/top_queries_params.txt (added)
+++ hadoop/pig/trunk/test/data/pigunit/top_queries_params.txt Thu Aug 19 04:33:33 2010
@@ -0,0 +1,4 @@
+n=3
+reducers=1
+input=top_queries_input_data.txt
+output=top_3_queries
Added: hadoop/pig/trunk/test/org/apache/pig/pigunit/Cluster.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/pigunit/Cluster.java?rev=987014&view=auto
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/pigunit/Cluster.java (added)
+++ hadoop/pig/trunk/test/org/apache/pig/pigunit/Cluster.java Thu Aug 19 04:33:33 2010
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the
+ * NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is
+ * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and limitations under the License.
+ */
+package org.apache.pig.pigunit;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.pig.backend.hadoop.datastorage.ConfigurationUtil;
+import org.apache.pig.impl.PigContext;
+import org.apache.pig.test.Util;
+
+/**
+ * Encapsulates all the file system operations.
+ *
+ * <p>Mainly used for copying data to the test cluster.
+ */
+public class Cluster {
+ private final Configuration configuration;
+
+ public Cluster(PigContext context) {
+ configuration = ConfigurationUtil.toConfiguration(context.getProperties());
+ }
+
+ public boolean exists(Path destination) throws IOException {
+ FileSystem fs = destination.getFileSystem(configuration);
+ return fs.exists(destination);
+ }
+
+ /**
+ * If file size has changed, or if destination does not exist yet, copy it.
+ *
+ * @param local Path to the local file to copy to the cluster.
+ * @param destination Destination path on the cluster.
+ * @throws IOException If the copy failed.
+ */
+ public void update(Path local, Path destination) throws IOException {
+ if (! exists(destination) || ! sameSize(local, destination)) {
+ copyFromLocalFile(local, destination, true);
+ }
+ }
+
+ public void copyFromLocalFile(Path local, Path destination) throws IOException {
+ copyFromLocalFile(local, destination, true);
+ }
+
+ public void copyFromLocalFile(Path local, Path destination, boolean overwrite)
+ throws IOException {
+ FileSystem fs = local.getFileSystem(configuration);
+ fs.copyFromLocalFile(false, overwrite, local, destination);
+ }
+
+ public void copyFromLocalFile(String[] content, String destination) throws IOException {
+ copyFromLocalFile(content, destination, true);
+ }
+
+ public void copyFromLocalFile(String[] content, String destination, boolean overwrite)
+ throws IOException {
+ Path file = new Path(destination);
+ FileSystem fs = file.getFileSystem(configuration);
+
+ if (overwrite && fs.exists(file)) {
+ fs.delete(file, true);
+ }
+
+ Util.createInputFile(fs, destination, content);
+ }
+
+ public void copyFromLocalFile(String[][] data) throws IOException {
+ copyFromLocalFile(data, false);
+ }
+
+ public void copyFromLocalFile(String[][] data, boolean overwrite) throws IOException {
+ for (int i = 0; i < data.length; i++) {
+ copyFromLocalFile(new Path(data[i][0]), new Path(data[i][1]), overwrite);
+ }
+ }
+
+ public FileStatus[] listStatus(Path path) throws IOException {
+ FileSystem fs = path.getFileSystem(configuration);
+ return fs.listStatus(path);
+ }
+
+ public boolean delete(Path path) throws IOException {
+ FileSystem fs = path.getFileSystem(configuration);
+ return fs.delete(path, true);
+ }
+
+ private boolean sameSize(Path local, Path destination) throws IOException {
+ FileSystem fs1 = FileSystem.getLocal(configuration);
+ FileSystem fs2 = destination.getFileSystem(configuration);
+
+ return fs1.getFileStatus(local).getLen() == fs2.getFileStatus(destination).getLen();
+ }
+}
Added: hadoop/pig/trunk/test/org/apache/pig/pigunit/MiniClusterRunner.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/pigunit/MiniClusterRunner.java?rev=987014&view=auto
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/pigunit/MiniClusterRunner.java (added)
+++ hadoop/pig/trunk/test/org/apache/pig/pigunit/MiniClusterRunner.java Thu Aug 19 04:33:33 2010
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the
+ * NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is
+ * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and limitations under the License.
+ */
+package org.apache.pig.pigunit;
+
+import org.apache.pig.test.MiniCluster;
+
+
+/**
+ * Starts an on-demand mini cluster that requires no set up.
+ *
+ * <p>It can be useful if you don't want to restart the cluster between each run of test and don't
+ * want to set up a real cluster.
+ *
+ * <p>CLASSPATH needs to contain: pig.jar and piggybank.jar
+ * <pre>
+ * export CLASSPATH=/path/pig.jar:/path/piggybank.jar
+ * java org.apache.pig.pigunit.MiniClusterRunner
+ * </pre>
+ *
+ * <p>Possible improvements
+ * <ul>
+ * <li>add a main in MiniCluster</li>
+ * <li>make MiniCluster configurable (number of maps...)</li>
+ * <li>make MiniCluster use a default properties for chosing the hadoop conf dir
+ * (e.g. minicluster.conf.dir) instead of always using
+ * System.getProperty("user.home"), "pigtest/conf/"</li>
+ * <li>use CLI option</li>
+ * <li>make a shell wrapper</li>
+ * </ul>
+ */
+public class MiniClusterRunner {
+ public static void main(String[] args) {
+ System.setProperty("hadoop.log.dir", "/tmp/pigunit");
+ MiniCluster.buildCluster();
+ }
+}
Added: hadoop/pig/trunk/test/org/apache/pig/pigunit/PigTest.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/pigunit/PigTest.java?rev=987014&view=auto
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/pigunit/PigTest.java (added)
+++ hadoop/pig/trunk/test/org/apache/pig/pigunit/PigTest.java Thu Aug 19 04:33:33 2010
@@ -0,0 +1,290 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the
+ * NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is
+ * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and limitations under the License.
+ */
+package org.apache.pig.pigunit;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.io.StringReader;
+import java.io.StringWriter;
+import java.nio.MappedByteBuffer;
+import java.nio.channels.FileChannel;
+import java.nio.charset.Charset;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+
+import junit.framework.Assert;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.log4j.Logger;
+import org.apache.pig.ExecType;
+import org.apache.pig.backend.executionengine.ExecException;
+import org.apache.pig.data.DataType;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.impl.logicalLayer.schema.Schema;
+import org.apache.pig.pigunit.pig.PigServer;
+import org.apache.pig.test.MiniCluster;
+import org.apache.pig.tools.parameters.ParameterSubstitutionPreprocessor;
+import org.apache.pig.tools.parameters.ParseException;
+
+/**
+ * Pig Unit
+ *
+ * <p>Equivalent of xUnit for testing Pig.
+ *
+ * <p>Call {@link PigTest#getCluster()} then construct a test and call an assert method.
+ * Have a look to the test of this class for more example.
+ */
+public class PigTest {
+ /** The text of the Pig script to test with no substitution or change. */
+ private final String originalTextPigScript;
+ /** The list of arguments of the script. */
+ private final String[] args;
+ /** The list of file arguments of the script. */
+ private final String[] argFiles;
+ /** The list of aliases to override in the script. */
+ private final Map<String, String> aliasOverrides;
+
+ private static PigServer pig;
+ private static Cluster cluster;
+ private static final Logger LOG = Logger.getLogger(PigTest.class);
+ private static final String EXEC_MINI_CLUSTER = "pigunit.exectype.minicluster";
+ private static final String EXEC_CLUSTER = "pigunit.exectype.cluster";
+
+ /**
+ * Initializes the Pig test.
+ *
+ * @param args The list of arguments of the script.
+ * @param argFiles The list of file arguments of the script.
+ * @param pigTextScript The text of the Pig script to test with no substitution or change.
+ */
+ @SuppressWarnings("serial")
+ PigTest(String[] args, String[] argFiles, String pigTextScript) {
+ this.originalTextPigScript = pigTextScript;
+ this.args = args;
+ this.argFiles = argFiles;
+ this.aliasOverrides = new HashMap<String, String>() {{
+ put("STORE", "");
+ put("DUMP", "");
+ }};
+ }
+
+ public PigTest(String scriptPath) throws IOException {
+ this(null, null, readFile(scriptPath));
+ }
+
+ public PigTest(String[] script) {
+ this(null, null, StringUtils.join(script, "\n"));
+ }
+
+ public PigTest(String scriptPath, String[] args) throws IOException {
+ this(args, null, readFile(scriptPath));
+ }
+
+ public PigTest(String[] script, String[] args) {
+ this(args, null, StringUtils.join(script, "\n"));
+ }
+
+ public PigTest(String[] script, String[] args, String[] argsFile) {
+ this(args, argsFile, StringUtils.join(script, "\n"));
+ }
+
+ public PigTest(String scriptPath, String[] args, String[] argFiles) throws IOException {
+ this(args, argFiles, readFile(scriptPath));
+ }
+
+ public PigTest(String scriptPath, String[] args, PigServer pig, Cluster cluster)
+ throws IOException {
+ this(args, null, readFile(scriptPath));
+ PigTest.pig = pig;
+ PigTest.cluster = cluster;
+ }
+
+ /**
+ * Connects and starts if needed the PigServer.
+ *
+ * @return The cluster where input files can be copied.
+ * @throws ExecException If the PigServer can't be started.
+ */
+ public static Cluster getCluster() throws ExecException {
+ if (cluster == null) {
+ LOG.info("Using mini cluster mode");
+ if (System.getProperties().containsKey(EXEC_MINI_CLUSTER)) {
+ if (! System.getProperties().containsKey("hadoop.log.dir")) {
+ System.setProperty("hadoop.log.dir", "/tmp/pigunit");
+ }
+ MiniCluster.buildCluster();
+ pig = new PigServer(ExecType.MAPREDUCE);
+ } else if (System.getProperties().containsKey(EXEC_CLUSTER)) {
+ LOG.info("Using cluster mode");
+ pig = new PigServer(ExecType.MAPREDUCE);
+ } else {
+ LOG.info("Using default local mode");
+ pig = new PigServer(ExecType.LOCAL);
+ }
+
+ cluster = new Cluster(pig.getPigContext());
+ }
+
+ return cluster;
+ }
+
+ /**
+ * Registers a pig scripts with its variables substituted.
+ *
+ * @throws IOException If a temp file containing the pig script could not be created.
+ * @throws ParseException The pig script could not have all its variables substituted.
+ */
+ protected void registerScript() throws IOException, ParseException {
+ BufferedReader pigIStream = new BufferedReader(new StringReader(this.originalTextPigScript));
+ StringWriter pigOStream = new StringWriter();
+
+ ParameterSubstitutionPreprocessor ps = new ParameterSubstitutionPreprocessor(50);
+ ps.genSubstitutedFile(pigIStream, pigOStream, args, argFiles);
+
+ String substitutedPig = pigOStream.toString();
+ System.out.println(substitutedPig);
+
+ File f = File.createTempFile("tmp", "pigunit");
+ PrintWriter pw = new PrintWriter(f);
+ pw.println(substitutedPig);
+ pw.close();
+
+ String pigSubstitutedFile = f.getCanonicalPath();
+ pig.registerScript(pigSubstitutedFile, aliasOverrides);
+ }
+
+ /**
+ * Executes the Pig script with its current overrides.
+ *
+ * @throws IOException If a temp file containing the pig script could not be created.
+ * @throws ParseException The pig script could not have all its variables substituted.
+ */
+ public void runScript() throws IOException, ParseException {
+ registerScript();
+ }
+
+ /**
+ * Gets an iterator on the content of one alias of the script.
+ *
+ * <p>For now use a giant String in order to display all the differences in one time. It might not
+ * work with giant expected output.
+ * @throws ParseException If the Pig script could not be parsed.
+ * @throws IOException If the Pig script could not be executed correctly.
+ */
+ public Iterator<Tuple> getAlias(String alias) throws IOException, ParseException {
+ registerScript();
+ return pig.openIterator(alias);
+ }
+
+ /**
+ * Gets an iterator on the content of the latest STORE alias of the script.
+ *
+ * @throws ParseException If the Pig script could not be parsed.
+ * @throws IOException If the Pig script could not be executed correctly.
+ */
+ public Iterator<Tuple> getAlias() throws IOException, ParseException {
+ registerScript();
+ String alias = aliasOverrides.get("LAST_STORE_ALIAS");
+
+ return getAlias(alias);
+ }
+
+ /**
+ * Replaces the query of an aliases by another query.
+ *
+ * <p>For example:
+ *
+ * <pre>
+ * B = FILTER A BY count > 5;
+ * overridden with:
+ * <B, B = FILTER A BY name == 'Pig';>
+ * becomes
+ * B = FILTER A BY name == 'Pig';
+ * </pre>
+ *
+ * @param alias The alias to override.
+ * @param query The new value of the alias.
+ */
+ public void override(String alias, String query) {
+ aliasOverrides.put(alias, query);
+ }
+
+ public void unoverride(String alias) {
+ aliasOverrides.remove(alias);
+ }
+
+ public void assertOutput(String[] expected) throws IOException, ParseException {
+ registerScript();
+ String alias = aliasOverrides.get("LAST_STORE_ALIAS");
+
+ assertEquals(StringUtils.join(expected, "\n"), StringUtils.join(getAlias(alias), "\n"));
+ }
+
+ public void assertOutput(String alias, String[] expected) throws IOException, ParseException {
+ registerScript();
+
+ assertEquals(StringUtils.join(expected, "\n"), StringUtils.join(getAlias(alias), "\n"));
+ }
+
+ public void assertOutput(File expected) throws IOException, ParseException {
+ registerScript();
+ String alias = aliasOverrides.get("LAST_STORE_ALIAS");
+
+ assertEquals(readFile(expected), StringUtils.join(getAlias(alias), "\n"));
+ }
+
+ public void assertOutput(String alias, File expected) throws IOException, ParseException {
+ registerScript();
+
+ assertEquals(readFile(expected), StringUtils.join(getAlias(alias), "\n"));
+ }
+
+ public void assertOutput(String aliasInput, String[] input, String alias, String[] expected)
+ throws IOException, ParseException {
+ registerScript();
+
+ StringBuilder sb = new StringBuilder();
+ Schema.stringifySchema(sb, pig.dumpSchema(aliasInput), DataType.TUPLE) ;
+
+ final String destination = "pigunit-input-overriden.txt";
+ cluster.copyFromLocalFile(input, destination, true);
+ override(aliasInput,
+ String.format("%s = LOAD '%s' AS %s;", aliasInput, destination, sb.toString()));
+
+ assertOutput(alias, expected);
+ }
+
+ protected void assertEquals(String expected, String current) {
+ Assert.assertEquals(expected, current);
+ }
+
+ private static String readFile(String path) throws IOException {
+ return readFile(new File(path));
+ }
+
+ private static String readFile(File file) throws IOException {
+ FileInputStream stream = new FileInputStream(file);
+ try {
+ FileChannel fc = stream.getChannel();
+ MappedByteBuffer bb = fc.map(FileChannel.MapMode.READ_ONLY, 0, fc.size());
+ return Charset.defaultCharset().decode(bb).toString();
+ }
+ finally {
+ stream.close();
+ }
+ }
+}
Added: hadoop/pig/trunk/test/org/apache/pig/pigunit/pig/GruntParser.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/pigunit/pig/GruntParser.java?rev=987014&view=auto
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/pigunit/pig/GruntParser.java (added)
+++ hadoop/pig/trunk/test/org/apache/pig/pigunit/pig/GruntParser.java Thu Aug 19 04:33:33 2010
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the
+ * NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is
+ * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and limitations under the License.
+ */
+package org.apache.pig.pigunit.pig;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+
+/**
+ * Slightly modified GruntParser that accepts a list of aliases to override.
+ *
+ * <p>This is a way to replace a pig query by another query.
+ *
+ * <p>For example, if we have this map of overrides: Map<alias,query>
+ * <ul>
+ * <li><A, A = LOAD '/path'> --> replace the alias A by A = LOAD '/path'</li>
+ * <li><DUMP, > --> remove the DUMP queries</li>
+ * </ul>
+ *
+ * <p>It might be possible to do the same thing in a less hacky way.
+ * e.g. pig.registerQuery replace the query of a certain alias...
+ */
+public class GruntParser extends org.apache.pig.tools.grunt.GruntParser {
+ /** A mapping <alias,query> to apply to the pig script. */
+ private final Map<String, String> aliasOverride;
+
+ /**
+ * Initializes the Pig parser with its list of aliases to override.
+ *
+ * @param stream The Pig script stream.
+ * @param aliasOverride The list of aliases to override in the Pig script.
+ */
+ public GruntParser(Reader stream, Map<String, String> aliasOverride) {
+ super(stream);
+ this.aliasOverride = aliasOverride;
+ }
+
+ /**
+ * Pig relations that have been blanked are dropped.
+ */
+ @Override
+ protected void processPig(String cmd) throws IOException {
+ String command = override(cmd);
+
+ if (! command.equals("")) {
+ super.processPig(command);
+ }
+ }
+
+ /**
+ * Overrides the relations of the pig script that we want to change.
+ *
+ * @param query The current pig query processed by the parser.
+ * @return The same query, or a modified query, or blank.
+ */
+ public String override(String query) {
+ Map<String, String> metaData = new HashMap<String, String>();
+
+ for (Entry<String, String> alias : aliasOverride.entrySet()) {
+ saveLastStoreAlias(query, metaData);
+
+ if (query.toLowerCase().startsWith(alias.getKey().toLowerCase() + " ")) {
+ System.out.println(
+ String.format("%s\n--> %s", query, alias.getValue() == "" ? "none" : alias.getValue()));
+ query = alias.getValue();
+ }
+ }
+
+ aliasOverride.putAll(metaData);
+
+ return query;
+ }
+
+ /**
+ * Saves the name of the alias of the last store.
+ *
+ * <p>Maybe better to replace it by PigServer.getPigContext().getLastAlias().
+ */
+ void saveLastStoreAlias(String cmd, Map<String, String> metaData) {
+ if (cmd.toUpperCase().startsWith("STORE")) {
+ Pattern outputFile = Pattern.compile("STORE +([^']+) INTO.*", Pattern.CASE_INSENSITIVE);
+ Matcher matcher = outputFile.matcher(cmd);
+ if (matcher.matches()) {
+ metaData.put("LAST_STORE_ALIAS", matcher.group(1));
+ }
+ }
+ }
+}
Added: hadoop/pig/trunk/test/org/apache/pig/pigunit/pig/PigServer.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/pigunit/pig/PigServer.java?rev=987014&view=auto
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/pigunit/pig/PigServer.java (added)
+++ hadoop/pig/trunk/test/org/apache/pig/pigunit/pig/PigServer.java Thu Aug 19 04:33:33 2010
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the
+ * NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is
+ * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and limitations under the License.
+ */
+package org.apache.pig.pigunit.pig;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.pig.ExecType;
+import org.apache.pig.backend.executionengine.ExecException;
+
+/**
+ * Slightly modified PigServer that accepts a list of Pig aliases to override.
+ *
+ * <p>The list is given to the GruntParser.
+ */
+public class PigServer extends org.apache.pig.PigServer {
+
+ public PigServer(ExecType execType, Properties properties) throws ExecException {
+ super(execType, properties);
+ }
+
+ public PigServer(ExecType execType) throws ExecException {
+ super(execType);
+ }
+
+ /**
+ * Parses and registers the pig script.
+ *
+ * @param fileName The Pig script file.
+ * @param aliasOverride The list of aliases to override in the Pig script.
+ * @throws IOException If the Pig script can't be parsed correctly.
+ */
+ public void registerScript(String fileName, Map<String, String> aliasOverride)
+ throws IOException {
+ try {
+ GruntParser grunt = new GruntParser(new FileReader(new File(fileName)), aliasOverride);
+ grunt.setInteractive(false);
+ grunt.setParams(this);
+ grunt.parseStopOnError(true);
+ } catch (FileNotFoundException e) {
+ e.printStackTrace();
+ throw new IOException(e.getCause());
+ } catch (org.apache.pig.tools.pigscript.parser.ParseException e) {
+ e.printStackTrace();
+ throw new IOException(e.getCause());
+ }
+ }
+}
Added: hadoop/pig/trunk/test/org/apache/pig/test/pigunit/TestPigTest.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/pigunit/TestPigTest.java?rev=987014&view=auto
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/pigunit/TestPigTest.java (added)
+++ hadoop/pig/trunk/test/org/apache/pig/test/pigunit/TestPigTest.java Thu Aug 19 04:33:33 2010
@@ -0,0 +1,301 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the
+ * NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is
+ * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and limitations under the License.
+ */
+package org.apache.pig.test.pigunit;
+
+import java.io.File;
+import java.io.IOException;
+
+import junit.framework.TestCase;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.fs.Path;
+import org.apache.pig.pigunit.Cluster;
+import org.apache.pig.pigunit.PigTest;
+import org.apache.pig.pigunit.pig.PigServer;
+import org.apache.pig.tools.parameters.ParseException;
+import org.junit.BeforeClass;
+import org.junit.Ignore;
+import org.junit.Test;
+
+/**
+ * <p>Various examples about how to use PigUnit.
+ *
+ * <p>Requires in CLASSPATH:
+ * <ul>
+ * <li>pig.jar</li>
+ * <li>pigunit.jar</li>
+ * <li>hadoop_conf_dir to current/future cluster if not using LOCAL mode</li>
+ * </ul>
+ */
+public class TestPigTest {
+ private PigTest test;
+ private static Cluster cluster;
+ private static final String PIG_SCRIPT = "test/data/pigunit/top_queries.pig";
+
+ @BeforeClass
+ public static void setUpOnce() throws IOException {
+ cluster = PigTest.getCluster();
+
+ cluster.update(
+ new Path("test/data/pigunit/top_queries_input_data.txt"),
+ new Path("top_queries_input_data.txt"));
+ }
+
+ @Test
+ public void testNtoN() throws ParseException, IOException {
+ String[] args = {
+ "n=3",
+ "reducers=1",
+ "input=top_queries_input_data.txt",
+ "output=top_3_queries",
+ };
+ test = new PigTest(PIG_SCRIPT, args);
+
+ String[] output = {
+ "(yahoo,25)",
+ "(facebook,15)",
+ "(twitter,7)",
+ };
+
+ test.assertOutput("queries_limit", output);
+ }
+
+ @Test
+ public void testImplicitNtoN() throws ParseException, IOException {
+ String[] args = {
+ "n=3",
+ "reducers=1",
+ "input=top_queries_input_data.txt",
+ "output=top_3_queries",
+ };
+ test = new PigTest(PIG_SCRIPT, args);
+
+ String[] output = {
+ "(yahoo,25)",
+ "(facebook,15)",
+ "(twitter,7)",
+ };
+
+ test.assertOutput(output);
+ }
+
+ @Test
+ public void testTextInput() throws ParseException, IOException {
+ String[] args = {
+ "n=3",
+ "reducers=1",
+ "input=top_queries_input_data.txt",
+ "output=top_3_queries",
+ };
+ test = new PigTest(PIG_SCRIPT, args);
+
+ String[] input = {
+ "yahoo\t10",
+ "twitter\t7",
+ "facebook\t10",
+ "yahoo\t15",
+ "facebook\t5",
+ "a\t1",
+ "b\t2",
+ "c\t3",
+ "d\t4",
+ "e\t5",
+ };
+
+ String[] output = {
+ "(yahoo,25)",
+ "(facebook,15)",
+ "(twitter,7)",
+ };
+
+ test.assertOutput("data", input, "queries_limit", output);
+ }
+
+ @Test
+ public void testSubset() throws ParseException, IOException {
+ String[] args = {
+ "n=3",
+ "reducers=1",
+ "input=top_queries_input_data.txt",
+ "output=top_3_queries",
+ };
+ test = new PigTest(PIG_SCRIPT, args);
+
+ String[] input = {
+ "yahoo\t10",
+ "twitter\t7",
+ "facebook\t10",
+ "yahoo\t15",
+ "facebook\t5",
+ "a\t1",
+ "b\t2",
+ "c\t3",
+ "d\t4",
+ "e\t5",
+ };
+
+ String[] output = {
+ "(yahoo,25)",
+ "(facebook,15)",
+ "(twitter,7)",
+ };
+
+ test.assertOutput("data", input, "queries_limit", output);
+ }
+
+ @Test
+ public void testOverride() throws ParseException, IOException {
+ String[] args = {
+ "n=3",
+ "reducers=1",
+ "input=top_queries_input_data.txt",
+ "output=top_3_queries",
+ };
+ test = new PigTest(PIG_SCRIPT, args);
+
+ test.override("queries_limit", "queries_limit = LIMIT queries_ordered 2;");
+
+ String[] output = {
+ "(yahoo,25)",
+ "(facebook,15)",
+ };
+
+ test.assertOutput(output);
+ }
+
+ @Test
+ public void testInlinePigScript() throws ParseException, IOException {
+ String[] script = {
+ "data = LOAD 'top_queries_input_data.txt' AS (query:CHARARRAY, count:INT);",
+ "queries_group = GROUP data BY query PARALLEL 1;",
+ "queries_sum = FOREACH queries_group GENERATE group AS query, SUM(data.count) AS count;",
+ "queries_ordered = ORDER queries_sum BY count DESC PARALLEL 1;",
+ "queries_limit = LIMIT queries_ordered 3;",
+ "STORE queries_limit INTO 'top_3_queries';",
+ };
+
+ test = new PigTest(script);
+
+ String[] output = {
+ "(yahoo,25)",
+ "(facebook,15)",
+ "(twitter,7)",
+ };
+
+ test.assertOutput(output);
+ }
+
+ @Test
+ public void testFileOutput() throws ParseException, IOException {
+ String[] args = {
+ "n=3",
+ "reducers=1",
+ "input=top_queries_input_data.txt",
+ "output=top_3_queries",
+ };
+ test = new PigTest(PIG_SCRIPT, args);
+
+ test.assertOutput(new File("test/data/pigunit/top_queries_expected_top_3.txt"));
+ }
+
+ @Test
+ public void testArgFiles() throws ParseException, IOException {
+ String[] argsFile = {
+ "test/data/pigunit/top_queries_params.txt"
+ };
+
+ test = new PigTest(PIG_SCRIPT, null, argsFile);
+
+ test.assertOutput(new File("test/data/pigunit/top_queries_expected_top_3.txt"));
+ }
+
+ @Test
+ public void testGetLastAlias() throws ParseException, IOException {
+ String[] script = {
+ "data = LOAD 'top_queries_input_data.txt' AS (query:CHARARRAY, count:INT);",
+ "queries_group = GROUP data BY query PARALLEL 1;",
+ "queries_sum = FOREACH queries_group GENERATE group AS query, SUM(data.count) AS count;",
+ "queries_ordered = ORDER queries_sum BY count DESC PARALLEL 1;",
+ "queries_limit = LIMIT queries_ordered 3;",
+ "STORE queries_limit INTO 'top_3_queries';",
+ };
+
+ test = new PigTest(script);
+
+ String expected =
+ "(yahoo,25)\n" +
+ "(facebook,15)\n" +
+ "(twitter,7)";
+
+ TestCase.assertEquals(expected, StringUtils.join(test.getAlias("queries_limit"), "\n"));
+ }
+
+ @Test
+ public void testWithUdf() throws ParseException, IOException {
+ String[] script = {
+ // "REGISTER myIfNeeded.jar;",
+ "DEFINE TOKENIZE TOKENIZE();",
+ "data = LOAD 'top_queries_input_data.txt' AS (query:CHARARRAY, count:INT);",
+ "queries = FOREACH data GENERATE query, TOKENIZE(query) AS query_tokens;",
+ "queries_ordered = ORDER queries BY query DESC PARALLEL 1;",
+ "queries_limit = LIMIT queries_ordered 3;",
+ "STORE queries_limit INTO 'top_3_queries';",
+ };
+
+ test = new PigTest(script);
+
+ String[] output = {
+ "(yahoo,{(yahoo)})",
+ "(yahoo,{(yahoo)})",
+ "(twitter,{(twitter)})",
+ };
+
+ test.assertOutput(output);
+ }
+
+ @Test
+ public void testStore() throws ParseException, IOException {
+ String[] args = {
+ "n=3",
+ "reducers=1",
+ "input=top_queries_input_data.txt",
+ "output=top_3_queries",
+ };
+ test = new PigTest(PIG_SCRIPT, args);
+
+ // By default PigUnit removes all the STORE and DUMP
+ test.unoverride("STORE");
+
+ test.runScript();
+
+ TestCase.assertTrue(cluster.delete(new Path("top_3_queries")));
+ }
+
+ @Ignore("Not ready yet")
+ @Test
+ public void testWithMock() throws ParseException, IOException {
+ String[] args = {
+ "n=3",
+ "reducers=1",
+ "input=top_queries_input_data.txt",
+ "output=top_3_queries",
+ };
+
+ PigServer mockServer = null;
+ Cluster mockCluster = null;
+
+ test = new PigTest(PIG_SCRIPT, args, mockServer, mockCluster);
+
+ test.assertOutput(new File("data/top_queries_expected_top_3.txt"));
+ }
+}
Added: hadoop/pig/trunk/test/org/apache/pig/test/pigunit/pig/TestGruntParser.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/pigunit/pig/TestGruntParser.java?rev=987014&view=auto
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/pigunit/pig/TestGruntParser.java (added)
+++ hadoop/pig/trunk/test/org/apache/pig/test/pigunit/pig/TestGruntParser.java Thu Aug 19 04:33:33 2010
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the
+ * NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is
+ * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and limitations under the License.
+ */
+package org.apache.pig.test.pigunit.pig;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.HashMap;
+import java.util.Map;
+
+import junit.framework.Assert;
+
+import org.apache.pig.pigunit.pig.GruntParser;
+import org.junit.Before;
+import org.junit.Test;
+
+
+public class TestGruntParser {
+ private GruntParser parser;
+ private Map<String, String> override;
+
+ @SuppressWarnings("serial")
+ @Before
+ public void setUp() {
+ override = new HashMap<String, String>() {{
+ put("STORE", "");
+ put("DUMP", "");
+ }};
+ parser = new GruntParser(new StringReader(""), override);
+ }
+
+ @Test
+ public void testRemoveStores() throws IOException {
+ Assert.assertEquals("", parser.override("STORE output INTO '/path';"));
+
+ override.remove("STORE");
+ Assert.assertEquals(
+ "STORE output INTO '/path';", parser.override("STORE output INTO '/path';"));
+ }
+
+ @Test
+ public void testRemoveDumps() throws IOException {
+ Assert.assertEquals("", parser.override("DUMP output;"));
+
+ override.remove("DUMP");
+ Assert.assertEquals("DUMP output;", parser.override("DUMP output;"));
+ }
+
+ @Test
+ public void testReplaceLoad() throws IOException {
+ override.put("A", "A = LOAD 'file';");
+ Assert.assertEquals(
+ "A = LOAD 'file';",
+ parser.override("A = LOAD 'input.txt' AS (query:CHARARRAY);"));
+ }
+
+ @Test
+ public void testGetStoreAlias() throws IOException {
+ override.remove("STORE");
+ parser.override("STORE output INTO '/path'");
+ Assert.assertEquals("output", override.get("LAST_STORE_ALIAS"));
+ }
+}
Added: hadoop/pig/trunk/test/pigunit-tests
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/pigunit-tests?rev=987014&view=auto
==============================================================================
--- hadoop/pig/trunk/test/pigunit-tests (added)
+++ hadoop/pig/trunk/test/pigunit-tests Thu Aug 19 04:33:33 2010
@@ -0,0 +1,2 @@
+**/TestPigTest.java
+**/TestGruntParser.java