You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by pr...@apache.org on 2009/01/26 22:27:16 UTC

svn commit: r737863 - in /hadoop/pig/trunk: CHANGES.txt src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POUnion.java test/org/apache/pig/test/TestUnion.java

Author: pradeepkth
Date: Mon Jan 26 21:27:16 2009
New Revision: 737863

URL: http://svn.apache.org/viewvc?rev=737863&view=rev
Log:
PIG-634: When POUnion is one of the roots of a map plan, POUnion.getNext() gives a null pointer exception

Modified:
    hadoop/pig/trunk/CHANGES.txt
    hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POUnion.java
    hadoop/pig/trunk/test/org/apache/pig/test/TestUnion.java

Modified: hadoop/pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=737863&r1=737862&r2=737863&view=diff
==============================================================================
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Mon Jan 26 21:27:16 2009
@@ -378,3 +378,6 @@
     PIG-615: Wrong number of jobs with limit (shravanmn via sms)
 
     PIG-635: POCast.java has incorrect formatting (sms)
+
+    PIG-634: When POUnion is one of the roots of a map plan, POUnion.getNext()
+    gives a null pointer exception (pradeepk)

Modified: hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POUnion.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POUnion.java?rev=737863&r1=737862&r2=737863&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POUnion.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POUnion.java Mon Jan 26 21:27:16 2009
@@ -124,7 +124,29 @@
 
         // Case 1 : Normal connected plan
         if (!isInputAttached()) {
-
+            
+            if (inputs == null || inputs.size()==0) {
+                // Neither does this Union have predecessors nor
+                // was any input attached! This can happen when we have
+                // a plan like below
+                // POUnion
+                // |
+                // |--POLocalRearrange
+                // |    |
+                // |    |-POUnion (root 2)--> This union's getNext() can lead the code here
+                // |
+                // |--POLocalRearrange (root 1)
+                
+                // The inner POUnion above is a root in the plan which has 2 roots.
+                // So these 2 roots would have input coming from different input
+                // sources (dfs files). So certain maps would be working on input only
+                // meant for "root 1" above and some maps would work on input
+                // meant only for "root 2". In the former case, "root 2" would
+                // neither get input attached to it nor does it have predecessors
+                // which is the case which can lead us here.
+                return eopResult;
+            }
+          
             while(true){
                 if (done.nextClearBit(0) >= inputs.size()) {
                     clearDone();

Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestUnion.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestUnion.java?rev=737863&r1=737862&r2=737863&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestUnion.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestUnion.java Mon Jan 26 21:27:16 2009
@@ -21,8 +21,11 @@
 
 import java.io.File;
 import java.io.IOException;
+import java.util.Iterator;
 
+import org.apache.pig.ExecType;
 import org.apache.pig.FuncSpec;
+import org.apache.pig.PigServer;
 import org.apache.pig.backend.executionengine.ExecException;
 import org.apache.pig.builtin.PigStorage;
 import org.apache.pig.data.DataBag;
@@ -167,4 +170,50 @@
         assertEquals(true, TestHelper.compareBags(expBag, outBag));
     }
 
+    // Test the case when POUnion is one of the roots in a map reduce
+    // plan and the input to it can be null
+    // This can happen when we have
+    // a plan like below
+    // POUnion
+    // |
+    // |--POLocalRearrange
+    // |    |
+    // |    |-POUnion (root 2)--> This union's getNext() can lead the code here
+    // |
+    // |--POLocalRearrange (root 1)
+    
+    // The inner POUnion above is a root in the plan which has 2 roots.
+    // So these 2 roots would have input coming from different input
+    // sources (dfs files). So certain maps would be working on input only
+    // meant for "root 1" above and some maps would work on input
+    // meant only for "root 2". In the former case, "root 2" would
+    // neither get input attached to it nor does it have predecessors
+    @Test
+    public void testGetNextNullInput() throws Exception {
+        Util.createInputFile(cluster, "a.txt", new String[] {"1\t2\t3", "4\t5\t6"});
+        Util.createInputFile(cluster, "b.txt", new String[] {"7\t8\t9", "1\t200\t300"});
+        Util.createInputFile(cluster, "c.txt", new String[] {"1\t20\t30"});
+        PigServer pig = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
+        pig.registerQuery("a = load 'a.txt' ;");
+        pig.registerQuery("b = load 'b.txt';");
+        pig.registerQuery("c = union a, b;");
+        pig.registerQuery("d = load 'c.txt' ;");
+        pig.registerQuery("e = cogroup c by $0 inner, d by $0 inner;");
+        pig.explain("e", System.err);
+        // output should be 
+        // (1,{(1,2,3),(1,200,300)},{(1,20,30)})
+        Tuple expectedResult = new DefaultTuple();
+        expectedResult.append(new DataByteArray("1"));
+        Tuple[] secondFieldContents = new DefaultTuple[2];
+        secondFieldContents[0] = Util.createTuple(Util.toDataByteArrays(new String[] {"1", "2", "3"}));
+        secondFieldContents[1] = Util.createTuple(Util.toDataByteArrays(new String[] {"1", "200", "300"}));
+        DataBag secondField = Util.createBag(secondFieldContents);
+        expectedResult.append(secondField);
+        DataBag thirdField = Util.createBag(new Tuple[]{Util.createTuple(Util.toDataByteArrays(new String[]{"1", "20", "30"}))});
+        expectedResult.append(thirdField);
+        Iterator<Tuple> it = pig.openIterator("e");
+        assertEquals(expectedResult, it.next());
+        assertFalse(it.hasNext());
+    }
+    
 }