You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ga...@apache.org on 2009/05/07 21:23:29 UTC
svn commit: r772750 - in /hadoop/pig/trunk: ./
src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/
test/org/apache/pig/test/
Author: gates
Date: Thu May 7 19:23:25 2009
New Revision: 772750
URL: http://svn.apache.org/viewvc?rev=772750&view=rev
Log:
PIG-800: Fix distinct and order in local mode to not go into an infinite loop.
Modified:
hadoop/pig/trunk/CHANGES.txt
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/PODistinct.java
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POSort.java
hadoop/pig/trunk/test/org/apache/pig/test/TestLocal2.java
Modified: hadoop/pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=772750&r1=772749&r2=772750&view=diff
==============================================================================
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Thu May 7 19:23:25 2009
@@ -63,6 +63,9 @@
PIG-774: Pig does not handle Chinese characters (in both the parameter subsitution
using -param_file or embedded in the Pig script) correctly (daijy)
+PIG-800: Fix distinct and order in local mode to not go into an infinite loop
+(gates).
+
Release 0.2.0
INCOMPATIBLE CHANGES
Modified: hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/PODistinct.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/PODistinct.java?rev=772750&r1=772749&r2=772750&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/PODistinct.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/PODistinct.java Thu May 7 19:23:25 2009
@@ -79,8 +79,14 @@
while (in.returnStatus != POStatus.STATUS_EOP) {
if (in.returnStatus == POStatus.STATUS_ERR) {
log.error("Error in reading from inputs");
- continue;
+ return in;
+ //continue;
} else if (in.returnStatus == POStatus.STATUS_NULL) {
+ // Ignore the null, read the next tuple. It's not clear
+ // to me that we should ever get this, or if we should,
+ // how it differs from EOP. But ignoring it here seems
+ // to work.
+ in = processInput();
continue;
}
distinctBag.add((Tuple) in.result);
Modified: hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POSort.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POSort.java?rev=772750&r1=772749&r2=772750&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POSort.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POSort.java Thu May 7 19:23:25 2009
@@ -258,8 +258,11 @@
while (res.returnStatus != POStatus.STATUS_EOP) {
if (res.returnStatus == POStatus.STATUS_ERR) {
log.error("Error in reading from the inputs");
- continue;
+ return res;
+ //continue;
} else if (res.returnStatus == POStatus.STATUS_NULL) {
+ // ignore the null, read the next tuple.
+ res = processInput();
continue;
}
sortedBag.add((Tuple) res.result);
Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestLocal2.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestLocal2.java?rev=772750&r1=772749&r2=772750&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestLocal2.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestLocal2.java Thu May 7 19:23:25 2009
@@ -20,13 +20,19 @@
import junit.framework.Assert;
import junit.framework.TestCase;
+
+import org.apache.pig.EvalFunc;
import org.apache.pig.PigServer;
+import org.apache.pig.data.BagFactory;
+import org.apache.pig.data.DataBag;
import org.apache.pig.data.Tuple;
import org.apache.pig.test.utils.TestHelper;
import org.junit.Test;
import java.io.File;
+import java.io.FileOutputStream;
import java.io.IOException;
+import java.io.PrintStream;
import java.text.DecimalFormat;
import java.util.Iterator;
@@ -91,6 +97,53 @@
verifyUnion( "c", 30 + 50 );
}
+
+ @Test
+ public void testPig800Distinct() throws Exception {
+ // Regression test for Pig-800
+ File fp1 = File.createTempFile("test", "txt");
+ PrintStream ps = new PrintStream(new FileOutputStream(fp1));
+
+ ps.println("1\t1}");
+ ps.close();
+
+ pig.registerQuery("A = load '" + Util.generateURI(fp1.toString()) + "'; ");
+ pig.registerQuery("B = foreach A generate flatten(" + Pig800Udf.class.getName() + "($0));");
+ pig.registerQuery("C = distinct B;");
+
+ Iterator<Tuple> iter = pig.openIterator("C");
+ // Before PIG-800 was fixed this went into an infinite loop, so just
+ // managing to open the iterator is sufficient.
+
+ }
+
+ @Test
+ public void testPig800Sort() throws Exception {
+ // Regression test for Pig-800
+ File fp1 = File.createTempFile("test", "txt");
+ PrintStream ps = new PrintStream(new FileOutputStream(fp1));
+
+ ps.println("1\t1}");
+ ps.close();
+
+ pig.registerQuery("A = load '" + Util.generateURI(fp1.toString()) + "'; ");
+ pig.registerQuery("B = foreach A generate flatten(" + Pig800Udf.class.getName() + "($0));");
+ pig.registerQuery("C = order B by $0;");
+
+ Iterator<Tuple> iter = pig.openIterator("C");
+ // Before PIG-800 was fixed this went into an infinite loop, so just
+ // managing to open the iterator is sufficient.
+
+ }
+
+ static public class Pig800Udf extends EvalFunc<DataBag> {
+
+ @Override
+ public DataBag exec(Tuple input) throws IOException {
+ DataBag output = BagFactory.getInstance().newDefaultBag();
+ return output;
+ }
+ }
//verifies results
public void verifyUnion(String id, int actualCount ) throws Exception {