You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ol...@apache.org on 2009/01/06 02:11:41 UTC
svn commit: r731800 - in /hadoop/pig/branches/types: CHANGES.txt
src/org/apache/pig/data/DataReaderWriter.java
test/org/apache/pig/test/TestEvalPipeline.java
Author: olga
Date: Mon Jan 5 17:11:41 2009
New Revision: 731800
URL: http://svn.apache.org/viewvc?rev=731800&view=rev
Log:
PIG-558: Distinct followed by a Join results in Invalid size 0 for a tuple error
Modified:
hadoop/pig/branches/types/CHANGES.txt
hadoop/pig/branches/types/src/org/apache/pig/data/DataReaderWriter.java
hadoop/pig/branches/types/test/org/apache/pig/test/TestEvalPipeline.java
Modified: hadoop/pig/branches/types/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/types/CHANGES.txt?rev=731800&r1=731799&r2=731800&view=diff
==============================================================================
--- hadoop/pig/branches/types/CHANGES.txt (original)
+++ hadoop/pig/branches/types/CHANGES.txt Mon Jan 5 17:11:41 2009
@@ -347,3 +347,7 @@
PIG-563: support for multiple combiner invocations (pradeepk via olgan)
PIG-580: using combiner to compute distinct aggs (pradeepk via olgan)
+
+ PIG-558: Distinct followed by a Join results in Invalid size 0 for a tuple
+ error (pradeepk via olgan)
+
Modified: hadoop/pig/branches/types/src/org/apache/pig/data/DataReaderWriter.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/types/src/org/apache/pig/data/DataReaderWriter.java?rev=731800&r1=731799&r2=731800&view=diff
==============================================================================
--- hadoop/pig/branches/types/src/org/apache/pig/data/DataReaderWriter.java (original)
+++ hadoop/pig/branches/types/src/org/apache/pig/data/DataReaderWriter.java Mon Jan 5 17:11:41 2009
@@ -53,7 +53,9 @@
// front and then filling in the spaces.
// Read the size.
int sz = in.readInt();
- if (sz < 1) {
+ // if sz == 0, we construct an "empty" tuple -
+ // presumably the writer wrote an empty tuple!
+ if (sz < 0) {
throw new IOException("Invalid size " + sz +
" for a tuple");
}
Modified: hadoop/pig/branches/types/test/org/apache/pig/test/TestEvalPipeline.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/types/test/org/apache/pig/test/TestEvalPipeline.java?rev=731800&r1=731799&r2=731800&view=diff
==============================================================================
--- hadoop/pig/branches/types/test/org/apache/pig/test/TestEvalPipeline.java (original)
+++ hadoop/pig/branches/types/test/org/apache/pig/test/TestEvalPipeline.java Mon Jan 5 17:11:41 2009
@@ -60,6 +60,7 @@
private PigServer pigServer;
TupleFactory mTf = TupleFactory.getInstance();
+ BagFactory mBf = BagFactory.getInstance();
@Before
@Override
@@ -984,5 +985,64 @@
assertEquals((LOOP_COUNT * LOOP_COUNT)/2, numRows);
}
+
+ @Test
+ public void testCogroupAfterDistinct() throws Exception {
+ String[] input1 = {
+ "abc",
+ "abc",
+ "def",
+ "def",
+ "def",
+ "abc",
+ "def",
+ "ghi"
+ };
+ String[] input2 = {
+ "ghi 4",
+ "rst 12344",
+ "uvw 1",
+ "xyz 4141"
+ };
+ Util.createInputFile(cluster, "table1", input1);
+ Util.createInputFile(cluster, "table2", input2);
+
+ pigServer.registerQuery("nonuniqtable1 = LOAD 'table1' AS (f1:chararray);");
+ pigServer.registerQuery("table1 = DISTINCT nonuniqtable1;");
+ pigServer.registerQuery("table2 = LOAD 'table2' AS (f1:chararray, f2:int);");
+ pigServer.registerQuery("temp = COGROUP table1 BY f1 INNER, table2 BY f1;");
+ Iterator<Tuple> it = pigServer.openIterator("temp");
+
+ // results should be:
+ // (abc,{(abc)},{})
+ // (def,{(def)},{})
+ // (ghi,{(ghi)},{(ghi,4)})
+ HashMap<String, Tuple> results = new HashMap<String, Tuple>();
+ Object[] row = new Object[] { "abc",
+ Util.createBagOfOneColumn(new String[] { "abc"}), mBf.newDefaultBag() };
+ results.put("abc", Util.createTuple(row));
+ row = new Object[] { "def",
+ Util.createBagOfOneColumn(new String[] { "def"}), mBf.newDefaultBag() };
+ results.put("def", Util.createTuple(row));
+ Object[] thirdColContents = new Object[] { "ghi", 4 };
+ Tuple t = Util.createTuple(thirdColContents);
+ row = new Object[] { "ghi",
+ Util.createBagOfOneColumn(new String[] { "ghi"}), Util.createBag(new Tuple[] { t })};
+ results.put("ghi", Util.createTuple(row));
+
+ while(it.hasNext()) {
+ Tuple tup = it.next();
+ List<Object> fields = tup.getAll();
+ Tuple expected = results.get((String)fields.get(0));
+ int i = 0;
+ for (Object field : fields) {
+ assertEquals(expected.get(i++), field);
+ }
+ }
+
+ Util.deleteFile(cluster, "table1");
+ Util.deleteFile(cluster, "table2");
+ }
+
}