You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ga...@apache.org on 2009/05/07 21:23:29 UTC

svn commit: r772750 - in /hadoop/pig/trunk: ./ src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/ test/org/apache/pig/test/

Author: gates
Date: Thu May  7 19:23:25 2009
New Revision: 772750

URL: http://svn.apache.org/viewvc?rev=772750&view=rev
Log:
PIG-800: Fix distinct and order in local mode to not go into an infinite loop.


Modified:
    hadoop/pig/trunk/CHANGES.txt
    hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/PODistinct.java
    hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POSort.java
    hadoop/pig/trunk/test/org/apache/pig/test/TestLocal2.java

Modified: hadoop/pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=772750&r1=772749&r2=772750&view=diff
==============================================================================
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Thu May  7 19:23:25 2009
@@ -63,6 +63,9 @@
 PIG-774: Pig does not handle Chinese characters (in both the parameter subsitution
 using -param_file or embedded in the Pig script) correctly (daijy)
 
+PIG-800: Fix distinct and order in local mode to not go into an infinite loop
+(gates).
+
 Release 0.2.0
 
 INCOMPATIBLE CHANGES

Modified: hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/PODistinct.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/PODistinct.java?rev=772750&r1=772749&r2=772750&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/PODistinct.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/PODistinct.java Thu May  7 19:23:25 2009
@@ -79,8 +79,14 @@
             while (in.returnStatus != POStatus.STATUS_EOP) {
                 if (in.returnStatus == POStatus.STATUS_ERR) {
                     log.error("Error in reading from inputs");
-                    continue;
+                    return in;
+                    //continue;
                 } else if (in.returnStatus == POStatus.STATUS_NULL) {
+                    // Ignore the null, read the next tuple.  It's not clear
+                    // to me that we should ever get this, or if we should, 
+                    // how it differs from EOP.  But ignoring it here seems
+                    // to work.
+                    in = processInput();
                     continue;
                 }
                 distinctBag.add((Tuple) in.result);

Modified: hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POSort.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POSort.java?rev=772750&r1=772749&r2=772750&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POSort.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POSort.java Thu May  7 19:23:25 2009
@@ -258,8 +258,11 @@
 			while (res.returnStatus != POStatus.STATUS_EOP) {
 				if (res.returnStatus == POStatus.STATUS_ERR) {
 					log.error("Error in reading from the inputs");
-					continue;
+					return res;
+					//continue;
 				} else if (res.returnStatus == POStatus.STATUS_NULL) {
+                    // ignore the null, read the next tuple.
+                    res = processInput();
 					continue;
 				}
 				sortedBag.add((Tuple) res.result);

Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestLocal2.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestLocal2.java?rev=772750&r1=772749&r2=772750&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestLocal2.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestLocal2.java Thu May  7 19:23:25 2009
@@ -20,13 +20,19 @@
 
 import junit.framework.Assert;
 import junit.framework.TestCase;
+
+import org.apache.pig.EvalFunc;
 import org.apache.pig.PigServer;
+import org.apache.pig.data.BagFactory;
+import org.apache.pig.data.DataBag;
 import org.apache.pig.data.Tuple;
 import org.apache.pig.test.utils.TestHelper;
 import org.junit.Test;
 
 import java.io.File;
+import java.io.FileOutputStream;
 import java.io.IOException;
+import java.io.PrintStream;
 import java.text.DecimalFormat;
 import java.util.Iterator;
 
@@ -91,6 +97,53 @@
 
         verifyUnion( "c", 30 + 50 );
     }
+    
+    @Test
+    public void testPig800Distinct() throws Exception {
+        // Regression test for Pig-800
+        File fp1 = File.createTempFile("test", "txt");
+        PrintStream ps = new PrintStream(new FileOutputStream(fp1));
+        
+        ps.println("1\t1}");
+        ps.close();
+        
+        pig.registerQuery("A = load '" + Util.generateURI(fp1.toString()) + "'; ");
+        pig.registerQuery("B = foreach A generate flatten(" + Pig800Udf.class.getName() + "($0));");
+        pig.registerQuery("C = distinct B;");
+        
+        Iterator<Tuple> iter = pig.openIterator("C");
+        // Before PIG-800 was fixed this went into an infinite loop, so just
+        // managing to open the iterator is sufficient.
+        
+    }
+    
+    @Test
+    public void testPig800Sort() throws Exception {
+        // Regression test for Pig-800
+        File fp1 = File.createTempFile("test", "txt");
+        PrintStream ps = new PrintStream(new FileOutputStream(fp1));
+        
+        ps.println("1\t1}");
+        ps.close();
+        
+        pig.registerQuery("A = load '" + Util.generateURI(fp1.toString()) + "'; ");
+        pig.registerQuery("B = foreach A generate flatten(" + Pig800Udf.class.getName() + "($0));");
+        pig.registerQuery("C = order B by $0;");
+        
+        Iterator<Tuple> iter = pig.openIterator("C");
+        // Before PIG-800 was fixed this went into an infinite loop, so just
+        // managing to open the iterator is sufficient.
+        
+    }
+    
+    static public class Pig800Udf extends EvalFunc<DataBag> {
+        
+        @Override
+        public DataBag exec(Tuple input) throws IOException {
+            DataBag output = BagFactory.getInstance().newDefaultBag();
+            return output;
+        }
+    }
 
     //verifies results
     public void verifyUnion(String id, int actualCount ) throws Exception {