You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ga...@apache.org on 2008/07/30 21:46:05 UTC
svn commit: r681184 - in /incubator/pig/trunk: CHANGES.txt
src/org/apache/pig/data/DistinctDataBag.java
test/org/apache/pig/test/TestDataBag.java
Author: gates
Date: Wed Jul 30 12:46:04 2008
New Revision: 681184
URL: http://svn.apache.org/viewvc?rev=681184&view=rev
Log:
PIG-342: Fix DistinctDataBag to recalculate size after it has spilled.
Modified:
incubator/pig/trunk/CHANGES.txt
incubator/pig/trunk/src/org/apache/pig/data/DistinctDataBag.java
incubator/pig/trunk/test/org/apache/pig/test/TestDataBag.java
Modified: incubator/pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/incubator/pig/trunk/CHANGES.txt?rev=681184&r1=681183&r2=681184&view=diff
==============================================================================
--- incubator/pig/trunk/CHANGES.txt (original)
+++ incubator/pig/trunk/CHANGES.txt Wed Jul 30 12:46:04 2008
@@ -352,3 +352,5 @@
PIG-34: updated CHANGES.txt
+ PIG-342: Fix DistinctDataBag to recalculate size after it has spilled. (bdimcheff via gates)
+
Modified: incubator/pig/trunk/src/org/apache/pig/data/DistinctDataBag.java
URL: http://svn.apache.org/viewvc/incubator/pig/trunk/src/org/apache/pig/data/DistinctDataBag.java?rev=681184&r1=681183&r2=681184&view=diff
==============================================================================
--- incubator/pig/trunk/src/org/apache/pig/data/DistinctDataBag.java (original)
+++ incubator/pig/trunk/src/org/apache/pig/data/DistinctDataBag.java Wed Jul 30 12:46:04 2008
@@ -67,6 +67,28 @@
return true;
}
+
+ public long size() {
+ if (mSpillFiles != null && mSpillFiles.size() > 0){
+ //We need to racalculate size to guarantee a count of unique
+ //entries including those on disk
+ Iterator<Tuple> iter = iterator();
+ int newSize = 0;
+ while (iter.hasNext()) {
+ newSize++;
+ iter.next();
+ }
+
+ synchronized(mContents) {
+ //we don't want adds to change our numbers
+ //the lock may need to cover more of the method
+ mSize = newSize;
+ }
+ }
+ return mSize;
+ }
+
+
@Override
public Iterator<Tuple> iterator() {
return new DistinctDataBagIterator();
@@ -84,7 +106,6 @@
@Override
public void addAll(DataBag b) {
synchronized (mContents) {
- mSize += b.size();
Iterator<Tuple> i = b.iterator();
while (i.hasNext()) {
if (mContents.add(i.next())) {
Modified: incubator/pig/trunk/test/org/apache/pig/test/TestDataBag.java
URL: http://svn.apache.org/viewvc/incubator/pig/trunk/test/org/apache/pig/test/TestDataBag.java?rev=681184&r1=681183&r2=681184&view=diff
==============================================================================
--- incubator/pig/trunk/test/org/apache/pig/test/TestDataBag.java (original)
+++ incubator/pig/trunk/test/org/apache/pig/test/TestDataBag.java Wed Jul 30 12:46:04 2008
@@ -555,6 +555,8 @@
}
mgr.forceSpill();
}
+
+ assertEquals("Size of distinct data bag is incorrect", b.size(), rightAnswer.size());
// Read tuples back, hopefully they come out in the same order.
Iterator<Tuple> bIter = b.iterator();