You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by da...@apache.org on 2010/02/09 20:45:31 UTC

svn commit: r908177 - in /hadoop/pig/trunk: CHANGES.txt src/org/apache/pig/data/DefaultDataBag.java test/org/apache/pig/test/TestDataBag.java test/org/apache/pig/test/TestNullConstant.java

Author: daijy
Date: Tue Feb  9 19:45:31 2010
New Revision: 908177

URL: http://svn.apache.org/viewvc?rev=908177&view=rev
Log:
PIG-1231: DefaultDataBagIterator.hasNext() should be idempotent in all cases

Modified:
    hadoop/pig/trunk/CHANGES.txt
    hadoop/pig/trunk/src/org/apache/pig/data/DefaultDataBag.java
    hadoop/pig/trunk/test/org/apache/pig/test/TestDataBag.java
    hadoop/pig/trunk/test/org/apache/pig/test/TestNullConstant.java

Modified: hadoop/pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=908177&r1=908176&r2=908177&view=diff
==============================================================================
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Tue Feb  9 19:45:31 2010
@@ -388,6 +388,9 @@
 
 PIG-1210: fieldsToRead send the same fields more than once in some cases (daijy)
 
+PIG-1231: DefaultDataBagIterator.hasNext() should be idempotent in all cases
+(daijy)
+
 Release 0.5.0
 
 INCOMPATIBLE CHANGES

Modified: hadoop/pig/trunk/src/org/apache/pig/data/DefaultDataBag.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/DefaultDataBag.java?rev=908177&r1=908176&r2=908177&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/data/DefaultDataBag.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/data/DefaultDataBag.java Tue Feb  9 19:45:31 2010
@@ -50,6 +50,8 @@
 
     private static final Log log = LogFactory.getLog(DefaultDataBag.class);
     
+    boolean hasCachedTuple = false;
+    
     public DefaultDataBag() {
         mContents = new ArrayList<Tuple>();
     }
@@ -74,6 +76,7 @@
     }
     
     public Iterator<Tuple> iterator() {
+        hasCachedTuple = false;
         return new DefaultDataBagIterator();
     }
 
@@ -150,9 +153,12 @@
         }
 
         public boolean hasNext() { 
-            // See if we can find a tuple.  If so, buffer it.
+            // Once we call hasNext(), set the flag, so we can call hasNext() repeated without fetching next tuple
+            if (hasCachedTuple)
+                return (mBuf != null);
             mBuf = next();
-            return mBuf != null;
+            hasCachedTuple = true;
+            return (mBuf != null);
         }
 
         public Tuple next() {
@@ -161,9 +167,9 @@
             if ((mCntr++ & 0x3ff) == 0) reportProgress();
 
             // If there's one in the buffer, use that one.
-            if (mBuf != null) {
+            if (hasCachedTuple) {
                 Tuple t = mBuf;
-                mBuf = null;
+                hasCachedTuple = false;
                 return t;
             }
 

Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestDataBag.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestDataBag.java?rev=908177&r1=908176&r2=908177&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestDataBag.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestDataBag.java Tue Feb  9 19:45:31 2010
@@ -1082,6 +1082,40 @@
         }
         assertEquals(bg6, bg7);
     }
+    
+    // See PIG-1231
+    @Test
+    public void testDataBagIterIdempotent() throws Exception {
+        DataBag bg0 = new DefaultDataBag();
+        processDataBag(bg0, true);
+        
+        DataBag bg1 = new DistinctDataBag();
+        processDataBag(bg1, true);
+        
+        DataBag bg2 = new InternalDistinctBag();
+        processDataBag(bg2, true);
+        
+        DataBag bg3 = new InternalSortedBag();
+        processDataBag(bg3, true);
+        
+        DataBag bg4 = new SortedDataBag(null);
+        processDataBag(bg4, true);
+        
+        DataBag bg5 = new InternalCachedBag(0, 0);
+        processDataBag(bg5, false);
+    }
+    
+    void processDataBag(DataBag bg, boolean doSpill) {
+        Tuple t = TupleFactory.getInstance().newTuple(new Integer(0));
+        bg.add(t);
+        if (doSpill)
+            bg.spill();
+        Iterator<Tuple> iter = bg.iterator();
+        assertTrue(iter.hasNext());
+        iter.next();
+        assertFalse(iter.hasNext());
+        assertFalse("hasNext should be idempotent", iter.hasNext());        
+    }
 }
 
 

Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestNullConstant.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestNullConstant.java?rev=908177&r1=908176&r2=908177&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestNullConstant.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestNullConstant.java Tue Feb  9 19:45:31 2010
@@ -160,10 +160,8 @@
         pigServer.registerQuery("b = foreach a generate {(null)}, ['2'#null];");
         Iterator<Tuple> it = pigServer.openIterator("b");
         Tuple t = it.next();
-System.out.println("tuple: " + t);
         assertEquals(null, ((DataBag)t.get(0)).iterator().next().get(0));
         assertEquals(null, ((Map<String, Object>)t.get(1)).get("2"));
-        
     }
 
     @Test