You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ga...@apache.org on 2008/07/30 21:46:05 UTC

svn commit: r681184 - in /incubator/pig/trunk: CHANGES.txt src/org/apache/pig/data/DistinctDataBag.java test/org/apache/pig/test/TestDataBag.java

Author: gates
Date: Wed Jul 30 12:46:04 2008
New Revision: 681184

URL: http://svn.apache.org/viewvc?rev=681184&view=rev
Log:
PIG-342: Fix DistinctDataBag to recalculate size after it has spilled.


Modified:
    incubator/pig/trunk/CHANGES.txt
    incubator/pig/trunk/src/org/apache/pig/data/DistinctDataBag.java
    incubator/pig/trunk/test/org/apache/pig/test/TestDataBag.java

Modified: incubator/pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/incubator/pig/trunk/CHANGES.txt?rev=681184&r1=681183&r2=681184&view=diff
==============================================================================
--- incubator/pig/trunk/CHANGES.txt (original)
+++ incubator/pig/trunk/CHANGES.txt Wed Jul 30 12:46:04 2008
@@ -352,3 +352,5 @@
 
     PIG-34: updated CHANGES.txt
 
+    PIG-342: Fix DistinctDataBag to recalculate size after it has spilled. (bdimcheff via gates)
+

Modified: incubator/pig/trunk/src/org/apache/pig/data/DistinctDataBag.java
URL: http://svn.apache.org/viewvc/incubator/pig/trunk/src/org/apache/pig/data/DistinctDataBag.java?rev=681184&r1=681183&r2=681184&view=diff
==============================================================================
--- incubator/pig/trunk/src/org/apache/pig/data/DistinctDataBag.java (original)
+++ incubator/pig/trunk/src/org/apache/pig/data/DistinctDataBag.java Wed Jul 30 12:46:04 2008
@@ -67,6 +67,28 @@
         return true;
     }
     
+    
+    public long size() {
+        if (mSpillFiles != null && mSpillFiles.size() > 0){
+            //We need to racalculate size to guarantee a count of unique 
+            //entries including those on disk
+            Iterator<Tuple> iter = iterator();
+            int newSize = 0;
+            while (iter.hasNext()) {
+                newSize++;
+                iter.next();
+            }
+            
+            synchronized(mContents) {
+                //we don't want adds to change our numbers
+                //the lock may need to cover more of the method
+                mSize = newSize;
+            }
+        }
+        return mSize;
+    }
+    
+    
     @Override
     public Iterator<Tuple> iterator() {
         return new DistinctDataBagIterator();
@@ -84,7 +106,6 @@
     @Override
     public void addAll(DataBag b) {
         synchronized (mContents) {
-            mSize += b.size();
             Iterator<Tuple> i = b.iterator();
             while (i.hasNext()) {
                 if (mContents.add(i.next())) {

Modified: incubator/pig/trunk/test/org/apache/pig/test/TestDataBag.java
URL: http://svn.apache.org/viewvc/incubator/pig/trunk/test/org/apache/pig/test/TestDataBag.java?rev=681184&r1=681183&r2=681184&view=diff
==============================================================================
--- incubator/pig/trunk/test/org/apache/pig/test/TestDataBag.java (original)
+++ incubator/pig/trunk/test/org/apache/pig/test/TestDataBag.java Wed Jul 30 12:46:04 2008
@@ -555,6 +555,8 @@
             }
             mgr.forceSpill();
         }
+        
+        assertEquals("Size of distinct data bag is incorrect", b.size(), rightAnswer.size());
 
         // Read tuples back, hopefully they come out in the same order.
         Iterator<Tuple> bIter = b.iterator();