You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by da...@apache.org on 2010/03/20 02:12:17 UTC

svn commit: r925513 - in /hadoop/pig/trunk: CHANGES.txt src/org/apache/pig/data/DefaultAbstractBag.java src/org/apache/pig/data/InternalCachedBag.java src/org/apache/pig/data/InternalDistinctBag.java src/org/apache/pig/data/InternalSortedBag.java

Author: daijy
Date: Sat Mar 20 01:12:17 2010
New Revision: 925513

URL: http://svn.apache.org/viewvc?rev=925513&view=rev
Log:
PIG-1307: when we spill the DefaultDataBag we are not setting the sized changed flag to be true.

Modified:
    hadoop/pig/trunk/CHANGES.txt
    hadoop/pig/trunk/src/org/apache/pig/data/DefaultAbstractBag.java
    hadoop/pig/trunk/src/org/apache/pig/data/InternalCachedBag.java
    hadoop/pig/trunk/src/org/apache/pig/data/InternalDistinctBag.java
    hadoop/pig/trunk/src/org/apache/pig/data/InternalSortedBag.java

Modified: hadoop/pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=925513&r1=925512&r2=925513&view=diff
==============================================================================
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Sat Mar 20 01:12:17 2010
@@ -159,6 +159,8 @@ OPTIMIZATIONS
 
 BUG FIXES
 
+PIG-1307: when we spill the DefaultDataBag we are not setting the sized changed flag to be true. (breed via daijy)
+
 PIG-1298: Restore file traversal behavior to Pig loaders (rding)
 
 PIG-1289: PIG Join fails while doing a filter on joined data (daijy)

Modified: hadoop/pig/trunk/src/org/apache/pig/data/DefaultAbstractBag.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/DefaultAbstractBag.java?rev=925513&r1=925512&r2=925513&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/data/DefaultAbstractBag.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/data/DefaultAbstractBag.java Sat Mar 20 01:12:17 2010
@@ -61,7 +61,7 @@ public abstract class DefaultAbstractBag
     // to run through the disk when people ask.
     protected long mSize = 0;
 
-    protected boolean mMemSizeChanged = false;
+    protected int mLastContentsSize = -1;
 
     protected long mMemSize = 0;
 
@@ -78,7 +78,6 @@ public abstract class DefaultAbstractBag
      */
     public void add(Tuple t) {
         synchronized (mContents) {
-            mMemSizeChanged = true;
             mSize++;
             mContents.add(t);
         }
@@ -90,7 +89,6 @@ public abstract class DefaultAbstractBag
      */
     public void addAll(DataBag b) {
         synchronized (mContents) {
-            mMemSizeChanged = true;
             mSize += b.size();
             Iterator<Tuple> i = b.iterator();
             while (i.hasNext()) mContents.add(i.next());
@@ -103,7 +101,6 @@ public abstract class DefaultAbstractBag
      */
     public void addAll(Collection<Tuple> c) {
         synchronized (mContents) {
-            mMemSizeChanged = true;
             mSize += c.size();
             Iterator<Tuple> i = c.iterator();
             while (i.hasNext()) mContents.add(i.next());
@@ -114,22 +111,24 @@ public abstract class DefaultAbstractBag
      * Return the size of memory usage.
      */
     public long getMemorySize() {
-        if (!mMemSizeChanged) return mMemSize;
-
-        long used = 0;
-        // I can't afford to talk through all the tuples every time the
-        // memory manager wants to know if it's time to dump.  Just sample
-        // the first 100 and see what we get.  This may not be 100%
-        // accurate, but it's just an estimate anyway.
         int j;
         int numInMem = 0;
+        long used = 0;
+
         synchronized (mContents) {
+            if (mLastContentsSize == mContents.size()) return mMemSize;
+
+            // I can't afford to talk through all the tuples every time the
+            // memory manager wants to know if it's time to dump.  Just sample
+            // the first 100 and see what we get.  This may not be 100%
+            // accurate, but it's just an estimate anyway.
             numInMem = mContents.size();
             // Measure only what's in memory, not what's on disk.
             Iterator<Tuple> i = mContents.iterator();
             for (j = 0; i.hasNext() && j < 100; j++) { 
                 used += i.next().getMemorySize();
             }
+            mLastContentsSize = numInMem;
         }
 
         if (numInMem > 100) {
@@ -150,7 +149,6 @@ public abstract class DefaultAbstractBag
         }
         
         mMemSize = used;
-        mMemSizeChanged = false;
         return used;
     }
 

Modified: hadoop/pig/trunk/src/org/apache/pig/data/InternalCachedBag.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/InternalCachedBag.java?rev=925513&r1=925512&r2=925513&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/data/InternalCachedBag.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/data/InternalCachedBag.java Sat Mar 20 01:12:17 2010
@@ -83,7 +83,6 @@ public class InternalCachedBag extends D
         }
                 
         if(mContents.size() < cacheLimit)  {
-            mMemSizeChanged = true;
             mContents.add(t);           
             if(mContents.size() < 100)
             {

Modified: hadoop/pig/trunk/src/org/apache/pig/data/InternalDistinctBag.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/InternalDistinctBag.java?rev=925513&r1=925512&r2=925513&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/data/InternalDistinctBag.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/data/InternalDistinctBag.java Sat Mar 20 01:12:17 2010
@@ -149,7 +149,6 @@ public class InternalDistinctBag extends
     	}
     	            	
         if (mContents.add(t)) {
-        	mMemSizeChanged = true;
         	mSize ++;
                 
         	 // check how many tuples memory can hold by getting average
@@ -227,7 +226,6 @@ public class InternalDistinctBag extends
             }
         }
         mContents.clear();
-        mMemSizeChanged = true;
         memUsage = 0;
 
         // Increment the spill count

Modified: hadoop/pig/trunk/src/org/apache/pig/data/InternalSortedBag.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/InternalSortedBag.java?rev=925513&r1=925512&r2=925513&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/data/InternalSortedBag.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/data/InternalSortedBag.java Sat Mar 20 01:12:17 2010
@@ -145,7 +145,6 @@ public class InternalSortedBag extends D
     		spill();
     	}
     	        
-    	mMemSizeChanged = true;
         mContents.add(t);
         
         // check how many tuples memory can hold by getting average
@@ -236,7 +235,6 @@ public class InternalSortedBag extends D
             }
         }
         mContents.clear();
-        mMemSizeChanged = true;
         memUsage = 0;
         
         // Increment the spill count