You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by dv...@apache.org on 2012/09/15 04:38:55 UTC
svn commit: r1385001 - in /pig/trunk: ./ src/org/apache/pig/builtin/
src/org/apache/pig/builtin/mock/ src/org/apache/pig/impl/builtin/
Author: dvryaboy
Date: Sat Sep 15 02:38:54 2012
New Revision: 1385001
URL: http://svn.apache.org/viewvc?rev=1385001&view=rev
Log:
PIG-2918: Avoid Spillable bag overhead where possible
Modified:
pig/trunk/CHANGES.txt
pig/trunk/src/org/apache/pig/builtin/INVERSEMAP.java
pig/trunk/src/org/apache/pig/builtin/KEYSET.java
pig/trunk/src/org/apache/pig/builtin/TOBAG.java
pig/trunk/src/org/apache/pig/builtin/VALUELIST.java
pig/trunk/src/org/apache/pig/builtin/VALUESET.java
pig/trunk/src/org/apache/pig/builtin/mock/Storage.java
pig/trunk/src/org/apache/pig/impl/builtin/FindQuantiles.java
Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1385001&r1=1385000&r2=1385001&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Sat Sep 15 02:38:54 2012
@@ -25,6 +25,8 @@ PIG-1891 Enable StoreFunc to make intell
IMPROVEMENTS
+PIG-2918: Avoid Spillable bag overhead where possible (dvryaboy)
+
PIG-2900: Streaming should provide conf settings in the environment (dvryaboy)
PIG-2353: RANK function like in SQL (xalan via azaroth)
Modified: pig/trunk/src/org/apache/pig/builtin/INVERSEMAP.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/builtin/INVERSEMAP.java?rev=1385001&r1=1385000&r2=1385001&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/builtin/INVERSEMAP.java (original)
+++ pig/trunk/src/org/apache/pig/builtin/INVERSEMAP.java Sat Sep 15 02:38:54 2012
@@ -26,6 +26,7 @@ import org.apache.pig.backend.executione
import org.apache.pig.data.BagFactory;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataType;
+import org.apache.pig.data.NonSpillableDataBag;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.logicalLayer.FrontendException;
@@ -100,9 +101,9 @@ public class INVERSEMAP extends EvalFunc
}
// Create a new bag if "newKey" does not exist in Map
- DataBag bag = (DataBag) inverseMap.get(newKey);
+ DataBag bag = inverseMap.get(newKey);
if (bag == null) {
- bag = BAG_FACTORY.newDefaultBag();
+ bag = new NonSpillableDataBag();
bag.add(TUPLE_FACTORY.newTuple(entry.getKey()));
inverseMap.put(newKey, bag);
} else {
Modified: pig/trunk/src/org/apache/pig/builtin/KEYSET.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/builtin/KEYSET.java?rev=1385001&r1=1385000&r2=1385001&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/builtin/KEYSET.java (original)
+++ pig/trunk/src/org/apache/pig/builtin/KEYSET.java Sat Sep 15 02:38:54 2012
@@ -21,9 +21,9 @@ import java.io.IOException;
import java.util.Map;
import org.apache.pig.EvalFunc;
-import org.apache.pig.data.BagFactory;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataType;
+import org.apache.pig.data.NonSpillableDataBag;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.logicalLayer.FrontendException;
@@ -49,7 +49,6 @@ import org.apache.pig.impl.logicalLayer.
*/
public class KEYSET extends EvalFunc<DataBag> {
private static final TupleFactory TUPLE_FACTORY = TupleFactory.getInstance();
- private static final BagFactory BAG_FACTORY = BagFactory.getInstance();
@SuppressWarnings("unchecked")
@Override
@@ -65,7 +64,7 @@ public class KEYSET extends EvalFunc<Dat
return null;
}
- DataBag bag = BAG_FACTORY.newDefaultBag();
+ DataBag bag = new NonSpillableDataBag(m.size());
for (String s : m.keySet()) {
Tuple t = TUPLE_FACTORY.newTuple(s);
bag.add(t);
Modified: pig/trunk/src/org/apache/pig/builtin/TOBAG.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/builtin/TOBAG.java?rev=1385001&r1=1385000&r2=1385001&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/builtin/TOBAG.java (original)
+++ pig/trunk/src/org/apache/pig/builtin/TOBAG.java Sat Sep 15 02:38:54 2012
@@ -21,9 +21,9 @@ package org.apache.pig.builtin;
import java.io.IOException;
import org.apache.pig.EvalFunc;
-import org.apache.pig.data.BagFactory;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataType;
+import org.apache.pig.data.NonSpillableDataBag;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.logicalLayer.FrontendException;
@@ -89,7 +89,9 @@ public class TOBAG extends EvalFunc<Data
@Override
public DataBag exec(Tuple input) throws IOException {
try {
- DataBag bag = BagFactory.getInstance().newDefaultBag();
+ // The assumption is that if the bag contents fits into
+ // an input tuple, it will not need to be spilled.
+ DataBag bag = new NonSpillableDataBag(input.size());
for (int i = 0; i < input.size(); ++i) {
final Object object = input.get(i);
Modified: pig/trunk/src/org/apache/pig/builtin/VALUELIST.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/builtin/VALUELIST.java?rev=1385001&r1=1385000&r2=1385001&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/builtin/VALUELIST.java (original)
+++ pig/trunk/src/org/apache/pig/builtin/VALUELIST.java Sat Sep 15 02:38:54 2012
@@ -26,6 +26,7 @@ import org.apache.pig.EvalFunc;
import org.apache.pig.data.BagFactory;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataType;
+import org.apache.pig.data.NonSpillableDataBag;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.logicalLayer.FrontendException;
@@ -71,7 +72,7 @@ public class VALUELIST extends EvalFunc<
}
Collection c = m.values();
- DataBag bag = BAG_FACTORY.newDefaultBag();
+ DataBag bag = new NonSpillableDataBag(c.size());
Iterator<Object> iter = c.iterator();
while(iter.hasNext()) {
Tuple t = TUPLE_FACTORY.newTuple(iter.next());
Modified: pig/trunk/src/org/apache/pig/builtin/VALUESET.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/builtin/VALUESET.java?rev=1385001&r1=1385000&r2=1385001&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/builtin/VALUESET.java (original)
+++ pig/trunk/src/org/apache/pig/builtin/VALUESET.java Sat Sep 15 02:38:54 2012
@@ -28,6 +28,7 @@ import org.apache.pig.EvalFunc;
import org.apache.pig.data.BagFactory;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataType;
+import org.apache.pig.data.NonSpillableDataBag;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.logicalLayer.FrontendException;
@@ -73,7 +74,7 @@ public class VALUESET extends EvalFunc<D
int initialSetSize = getInitialSetSize(m.values());
Set<Object> uniqueElements = new HashSet<Object>(initialSetSize);
- DataBag bag = BAG_FACTORY.newDefaultBag();
+ DataBag bag = new NonSpillableDataBag();
Iterator<Object> iter = m.values().iterator();
Modified: pig/trunk/src/org/apache/pig/builtin/mock/Storage.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/builtin/mock/Storage.java?rev=1385001&r1=1385000&r2=1385001&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/builtin/mock/Storage.java (original)
+++ pig/trunk/src/org/apache/pig/builtin/mock/Storage.java Sat Sep 15 02:38:54 2012
@@ -39,8 +39,8 @@ import org.apache.pig.ResourceStatistics
import org.apache.pig.StoreFuncInterface;
import org.apache.pig.StoreMetadata;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigSplit;
-import org.apache.pig.data.BagFactory;
import org.apache.pig.data.DataBag;
+import org.apache.pig.data.NonSpillableDataBag;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.PigContext;
@@ -97,7 +97,6 @@ public class Storage extends LoadFunc im
private static final Logger LOG = Logger.getLogger(Storage.class);
private static Map<Integer, Data> idToData = new HashMap<Integer, Data>();
private static TupleFactory TF = TupleFactory.getInstance();
- private static BagFactory BF = BagFactory.getInstance();
private static int nextId;
@@ -114,7 +113,7 @@ public class Storage extends LoadFunc im
* @return a bag containing the provided objects
*/
public static DataBag bag(Tuple... tuples) {
- return BF.newDefaultBag(Arrays.asList(tuples));
+ return new NonSpillableDataBag(Arrays.asList(tuples));
}
/**
Modified: pig/trunk/src/org/apache/pig/impl/builtin/FindQuantiles.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/impl/builtin/FindQuantiles.java?rev=1385001&r1=1385000&r2=1385001&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/impl/builtin/FindQuantiles.java (original)
+++ pig/trunk/src/org/apache/pig/impl/builtin/FindQuantiles.java Sat Sep 15 02:38:54 2012
@@ -22,7 +22,6 @@ import java.io.ObjectInputStream;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
-import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;
@@ -30,26 +29,16 @@ import org.apache.pig.ComparisonFunc;
import org.apache.pig.EvalFunc;
import org.apache.pig.FuncSpec;
import org.apache.pig.backend.executionengine.ExecException;
-import org.apache.pig.backend.hadoop.HDataType;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MRCompiler;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.partitioners.CountingMap;
-import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.partitioners.DiscreteProbabilitySampleGenerator;
import org.apache.pig.data.BagFactory;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataType;
-import org.apache.pig.data.DefaultDataBag;
import org.apache.pig.data.InternalMap;
+import org.apache.pig.data.NonSpillableDataBag;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.PigContext;
-import org.apache.pig.impl.io.NullableBytesWritable;
-import org.apache.pig.impl.io.NullableDoubleWritable;
-import org.apache.pig.impl.io.NullableFloatWritable;
-import org.apache.pig.impl.io.NullableIntWritable;
-import org.apache.pig.impl.io.NullableLongWritable;
-import org.apache.pig.impl.io.NullableText;
-import org.apache.pig.impl.io.NullableTuple;
-import org.apache.pig.impl.io.PigNullableWritable;
public class FindQuantiles extends EvalFunc<Map<String, Object>>{
@@ -65,6 +54,7 @@ public class FindQuantiles extends EvalF
State mState;
private class SortComparator implements Comparator<Tuple> {
+ @Override
@SuppressWarnings("unchecked")
public int compare(Tuple t1, Tuple t2) {
switch (mState) {
@@ -272,7 +262,7 @@ public class FindQuantiles extends EvalF
}
weightedParts.put(key, probVec);
}
- output.put(QUANTILES_LIST, mBagFactory.newDefaultBag(quantilesList));
+ output.put(QUANTILES_LIST, new NonSpillableDataBag(quantilesList));
output.put(WEIGHTED_PARTS, weightedParts);
return output;
}catch (Exception e){