You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ol...@apache.org on 2009/01/06 02:11:41 UTC

svn commit: r731800 - in /hadoop/pig/branches/types: CHANGES.txt src/org/apache/pig/data/DataReaderWriter.java test/org/apache/pig/test/TestEvalPipeline.java

Author: olga
Date: Mon Jan  5 17:11:41 2009
New Revision: 731800

URL: http://svn.apache.org/viewvc?rev=731800&view=rev
Log:
PIG-558: Distinct followed by a Join results in Invalid size 0 for a tuple error

Modified:
    hadoop/pig/branches/types/CHANGES.txt
    hadoop/pig/branches/types/src/org/apache/pig/data/DataReaderWriter.java
    hadoop/pig/branches/types/test/org/apache/pig/test/TestEvalPipeline.java

Modified: hadoop/pig/branches/types/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/types/CHANGES.txt?rev=731800&r1=731799&r2=731800&view=diff
==============================================================================
--- hadoop/pig/branches/types/CHANGES.txt (original)
+++ hadoop/pig/branches/types/CHANGES.txt Mon Jan  5 17:11:41 2009
@@ -347,3 +347,7 @@
     PIG-563: support for multiple combiner invocations (pradeepk via olgan)
 
     PIG-580: using combiner to compute distinct aggs (pradeepk via olgan)
+
+    PIG-558: Distinct followed by a Join results in Invalid size 0 for a tuple
+    error (pradeepk via olgan)
+

Modified: hadoop/pig/branches/types/src/org/apache/pig/data/DataReaderWriter.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/types/src/org/apache/pig/data/DataReaderWriter.java?rev=731800&r1=731799&r2=731800&view=diff
==============================================================================
--- hadoop/pig/branches/types/src/org/apache/pig/data/DataReaderWriter.java (original)
+++ hadoop/pig/branches/types/src/org/apache/pig/data/DataReaderWriter.java Mon Jan  5 17:11:41 2009
@@ -53,7 +53,9 @@
                 // front and then filling in the spaces.
                 // Read the size.
                 int sz = in.readInt();
-                if (sz < 1) {
+                // if sz == 0, we construct an "empty" tuple -
+                // presumably the writer wrote an empty tuple!
+                if (sz < 0) {
                     throw new IOException("Invalid size " + sz +
                         " for a tuple");
                 }

Modified: hadoop/pig/branches/types/test/org/apache/pig/test/TestEvalPipeline.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/types/test/org/apache/pig/test/TestEvalPipeline.java?rev=731800&r1=731799&r2=731800&view=diff
==============================================================================
--- hadoop/pig/branches/types/test/org/apache/pig/test/TestEvalPipeline.java (original)
+++ hadoop/pig/branches/types/test/org/apache/pig/test/TestEvalPipeline.java Mon Jan  5 17:11:41 2009
@@ -60,6 +60,7 @@
     private PigServer pigServer;
 
     TupleFactory mTf = TupleFactory.getInstance();
+    BagFactory mBf = BagFactory.getInstance();
     
     @Before
     @Override
@@ -984,5 +985,64 @@
 
         assertEquals((LOOP_COUNT * LOOP_COUNT)/2, numRows);
     }
+    
+    @Test
+    public void testCogroupAfterDistinct() throws Exception {
+        String[] input1 = {
+                "abc",
+                "abc",
+                "def",
+                "def",
+                "def",
+                "abc",
+                "def",
+                "ghi"
+                };
+        String[] input2 = {
+            "ghi	4",
+            "rst	12344",
+            "uvw	1",
+            "xyz	4141"
+            };
+        Util.createInputFile(cluster, "table1", input1);
+        Util.createInputFile(cluster, "table2", input2);
+        
+        pigServer.registerQuery("nonuniqtable1 = LOAD 'table1' AS (f1:chararray);");
+        pigServer.registerQuery("table1 = DISTINCT nonuniqtable1;");
+        pigServer.registerQuery("table2 = LOAD 'table2' AS (f1:chararray, f2:int);");
+        pigServer.registerQuery("temp = COGROUP table1 BY f1 INNER, table2 BY f1;");
+        Iterator<Tuple> it = pigServer.openIterator("temp");
+        
+        // results should be:
+        // (abc,{(abc)},{})
+        // (def,{(def)},{})
+        // (ghi,{(ghi)},{(ghi,4)})
+        HashMap<String, Tuple> results = new HashMap<String, Tuple>();
+        Object[] row = new Object[] { "abc",
+                Util.createBagOfOneColumn(new String[] { "abc"}), mBf.newDefaultBag() };
+        results.put("abc", Util.createTuple(row)); 
+        row = new Object[] { "def",
+                Util.createBagOfOneColumn(new String[] { "def"}), mBf.newDefaultBag() };
+        results.put("def", Util.createTuple(row));
+        Object[] thirdColContents = new Object[] { "ghi", 4 };
+        Tuple t = Util.createTuple(thirdColContents);
+        row = new Object[] { "ghi",
+                Util.createBagOfOneColumn(new String[] { "ghi"}), Util.createBag(new Tuple[] { t })};
+        results.put("ghi", Util.createTuple(row));
+
+        while(it.hasNext()) {
+            Tuple tup = it.next();
+            List<Object> fields = tup.getAll();
+            Tuple expected = results.get((String)fields.get(0));
+            int i = 0;
+            for (Object field : fields) {
+                assertEquals(expected.get(i++), field);
+            }
+        }
+        
+        Util.deleteFile(cluster, "table1");
+        Util.deleteFile(cluster, "table2");
+    }
+    
 
 }