You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by da...@apache.org on 2010/03/20 02:12:17 UTC
svn commit: r925513 - in /hadoop/pig/trunk: CHANGES.txt
src/org/apache/pig/data/DefaultAbstractBag.java
src/org/apache/pig/data/InternalCachedBag.java
src/org/apache/pig/data/InternalDistinctBag.java
src/org/apache/pig/data/InternalSortedBag.java
Author: daijy
Date: Sat Mar 20 01:12:17 2010
New Revision: 925513
URL: http://svn.apache.org/viewvc?rev=925513&view=rev
Log:
PIG-1307: when we spill the DefaultDataBag we are not setting the sized changed flag to be true.
Modified:
hadoop/pig/trunk/CHANGES.txt
hadoop/pig/trunk/src/org/apache/pig/data/DefaultAbstractBag.java
hadoop/pig/trunk/src/org/apache/pig/data/InternalCachedBag.java
hadoop/pig/trunk/src/org/apache/pig/data/InternalDistinctBag.java
hadoop/pig/trunk/src/org/apache/pig/data/InternalSortedBag.java
Modified: hadoop/pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=925513&r1=925512&r2=925513&view=diff
==============================================================================
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Sat Mar 20 01:12:17 2010
@@ -159,6 +159,8 @@ OPTIMIZATIONS
BUG FIXES
+PIG-1307: when we spill the DefaultDataBag we are not setting the sized changed flag to be true. (breed via daijy)
+
PIG-1298: Restore file traversal behavior to Pig loaders (rding)
PIG-1289: PIG Join fails while doing a filter on joined data (daijy)
Modified: hadoop/pig/trunk/src/org/apache/pig/data/DefaultAbstractBag.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/DefaultAbstractBag.java?rev=925513&r1=925512&r2=925513&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/data/DefaultAbstractBag.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/data/DefaultAbstractBag.java Sat Mar 20 01:12:17 2010
@@ -61,7 +61,7 @@ public abstract class DefaultAbstractBag
// to run through the disk when people ask.
protected long mSize = 0;
- protected boolean mMemSizeChanged = false;
+ protected int mLastContentsSize = -1;
protected long mMemSize = 0;
@@ -78,7 +78,6 @@ public abstract class DefaultAbstractBag
*/
public void add(Tuple t) {
synchronized (mContents) {
- mMemSizeChanged = true;
mSize++;
mContents.add(t);
}
@@ -90,7 +89,6 @@ public abstract class DefaultAbstractBag
*/
public void addAll(DataBag b) {
synchronized (mContents) {
- mMemSizeChanged = true;
mSize += b.size();
Iterator<Tuple> i = b.iterator();
while (i.hasNext()) mContents.add(i.next());
@@ -103,7 +101,6 @@ public abstract class DefaultAbstractBag
*/
public void addAll(Collection<Tuple> c) {
synchronized (mContents) {
- mMemSizeChanged = true;
mSize += c.size();
Iterator<Tuple> i = c.iterator();
while (i.hasNext()) mContents.add(i.next());
@@ -114,22 +111,24 @@ public abstract class DefaultAbstractBag
* Return the size of memory usage.
*/
public long getMemorySize() {
- if (!mMemSizeChanged) return mMemSize;
-
- long used = 0;
- // I can't afford to talk through all the tuples every time the
- // memory manager wants to know if it's time to dump. Just sample
- // the first 100 and see what we get. This may not be 100%
- // accurate, but it's just an estimate anyway.
int j;
int numInMem = 0;
+ long used = 0;
+
synchronized (mContents) {
+ if (mLastContentsSize == mContents.size()) return mMemSize;
+
+ // I can't afford to talk through all the tuples every time the
+ // memory manager wants to know if it's time to dump. Just sample
+ // the first 100 and see what we get. This may not be 100%
+ // accurate, but it's just an estimate anyway.
numInMem = mContents.size();
// Measure only what's in memory, not what's on disk.
Iterator<Tuple> i = mContents.iterator();
for (j = 0; i.hasNext() && j < 100; j++) {
used += i.next().getMemorySize();
}
+ mLastContentsSize = numInMem;
}
if (numInMem > 100) {
@@ -150,7 +149,6 @@ public abstract class DefaultAbstractBag
}
mMemSize = used;
- mMemSizeChanged = false;
return used;
}
Modified: hadoop/pig/trunk/src/org/apache/pig/data/InternalCachedBag.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/InternalCachedBag.java?rev=925513&r1=925512&r2=925513&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/data/InternalCachedBag.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/data/InternalCachedBag.java Sat Mar 20 01:12:17 2010
@@ -83,7 +83,6 @@ public class InternalCachedBag extends D
}
if(mContents.size() < cacheLimit) {
- mMemSizeChanged = true;
mContents.add(t);
if(mContents.size() < 100)
{
Modified: hadoop/pig/trunk/src/org/apache/pig/data/InternalDistinctBag.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/InternalDistinctBag.java?rev=925513&r1=925512&r2=925513&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/data/InternalDistinctBag.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/data/InternalDistinctBag.java Sat Mar 20 01:12:17 2010
@@ -149,7 +149,6 @@ public class InternalDistinctBag extends
}
if (mContents.add(t)) {
- mMemSizeChanged = true;
mSize ++;
// check how many tuples memory can hold by getting average
@@ -227,7 +226,6 @@ public class InternalDistinctBag extends
}
}
mContents.clear();
- mMemSizeChanged = true;
memUsage = 0;
// Increment the spill count
Modified: hadoop/pig/trunk/src/org/apache/pig/data/InternalSortedBag.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/InternalSortedBag.java?rev=925513&r1=925512&r2=925513&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/data/InternalSortedBag.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/data/InternalSortedBag.java Sat Mar 20 01:12:17 2010
@@ -145,7 +145,6 @@ public class InternalSortedBag extends D
spill();
}
- mMemSizeChanged = true;
mContents.add(t);
// check how many tuples memory can hold by getting average
@@ -236,7 +235,6 @@ public class InternalSortedBag extends D
}
}
mContents.clear();
- mMemSizeChanged = true;
memUsage = 0;
// Increment the spill count