You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by dv...@apache.org on 2012/09/15 04:38:55 UTC

svn commit: r1385001 - in /pig/trunk: ./ src/org/apache/pig/builtin/ src/org/apache/pig/builtin/mock/ src/org/apache/pig/impl/builtin/

Author: dvryaboy
Date: Sat Sep 15 02:38:54 2012
New Revision: 1385001

URL: http://svn.apache.org/viewvc?rev=1385001&view=rev
Log:
PIG-2918: Avoid Spillable bag overhead where possible

Modified:
    pig/trunk/CHANGES.txt
    pig/trunk/src/org/apache/pig/builtin/INVERSEMAP.java
    pig/trunk/src/org/apache/pig/builtin/KEYSET.java
    pig/trunk/src/org/apache/pig/builtin/TOBAG.java
    pig/trunk/src/org/apache/pig/builtin/VALUELIST.java
    pig/trunk/src/org/apache/pig/builtin/VALUESET.java
    pig/trunk/src/org/apache/pig/builtin/mock/Storage.java
    pig/trunk/src/org/apache/pig/impl/builtin/FindQuantiles.java

Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1385001&r1=1385000&r2=1385001&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Sat Sep 15 02:38:54 2012
@@ -25,6 +25,8 @@ PIG-1891 Enable StoreFunc to make intell
 
 IMPROVEMENTS
 
+PIG-2918: Avoid Spillable bag overhead where possible (dvryaboy)
+
 PIG-2900: Streaming should provide conf settings in the environment (dvryaboy)
 
 PIG-2353: RANK function like in SQL (xalan via azaroth)

Modified: pig/trunk/src/org/apache/pig/builtin/INVERSEMAP.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/builtin/INVERSEMAP.java?rev=1385001&r1=1385000&r2=1385001&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/builtin/INVERSEMAP.java (original)
+++ pig/trunk/src/org/apache/pig/builtin/INVERSEMAP.java Sat Sep 15 02:38:54 2012
@@ -26,6 +26,7 @@ import org.apache.pig.backend.executione
 import org.apache.pig.data.BagFactory;
 import org.apache.pig.data.DataBag;
 import org.apache.pig.data.DataType;
+import org.apache.pig.data.NonSpillableDataBag;
 import org.apache.pig.data.Tuple;
 import org.apache.pig.data.TupleFactory;
 import org.apache.pig.impl.logicalLayer.FrontendException;
@@ -100,9 +101,9 @@ public class INVERSEMAP extends EvalFunc
             }
 
             // Create a new bag if "newKey" does not exist in Map
-            DataBag bag = (DataBag) inverseMap.get(newKey);
+            DataBag bag = inverseMap.get(newKey);
             if (bag == null) {
-                bag = BAG_FACTORY.newDefaultBag();
+                bag = new NonSpillableDataBag();
                 bag.add(TUPLE_FACTORY.newTuple(entry.getKey()));
                 inverseMap.put(newKey, bag);
             } else {

Modified: pig/trunk/src/org/apache/pig/builtin/KEYSET.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/builtin/KEYSET.java?rev=1385001&r1=1385000&r2=1385001&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/builtin/KEYSET.java (original)
+++ pig/trunk/src/org/apache/pig/builtin/KEYSET.java Sat Sep 15 02:38:54 2012
@@ -21,9 +21,9 @@ import java.io.IOException;
 import java.util.Map;
 
 import org.apache.pig.EvalFunc;
-import org.apache.pig.data.BagFactory;
 import org.apache.pig.data.DataBag;
 import org.apache.pig.data.DataType;
+import org.apache.pig.data.NonSpillableDataBag;
 import org.apache.pig.data.Tuple;
 import org.apache.pig.data.TupleFactory;
 import org.apache.pig.impl.logicalLayer.FrontendException;
@@ -49,7 +49,6 @@ import org.apache.pig.impl.logicalLayer.
  */
 public class KEYSET extends EvalFunc<DataBag> {
     private static final TupleFactory TUPLE_FACTORY = TupleFactory.getInstance();
-    private static final BagFactory BAG_FACTORY = BagFactory.getInstance();
 
     @SuppressWarnings("unchecked")
     @Override
@@ -65,7 +64,7 @@ public class KEYSET extends EvalFunc<Dat
             return null;
         }
 
-        DataBag bag = BAG_FACTORY.newDefaultBag();
+        DataBag bag = new NonSpillableDataBag(m.size());
         for (String s : m.keySet()) {
             Tuple t = TUPLE_FACTORY.newTuple(s);
             bag.add(t);

Modified: pig/trunk/src/org/apache/pig/builtin/TOBAG.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/builtin/TOBAG.java?rev=1385001&r1=1385000&r2=1385001&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/builtin/TOBAG.java (original)
+++ pig/trunk/src/org/apache/pig/builtin/TOBAG.java Sat Sep 15 02:38:54 2012
@@ -21,9 +21,9 @@ package org.apache.pig.builtin;
 import java.io.IOException;
 
 import org.apache.pig.EvalFunc;
-import org.apache.pig.data.BagFactory;
 import org.apache.pig.data.DataBag;
 import org.apache.pig.data.DataType;
+import org.apache.pig.data.NonSpillableDataBag;
 import org.apache.pig.data.Tuple;
 import org.apache.pig.data.TupleFactory;
 import org.apache.pig.impl.logicalLayer.FrontendException;
@@ -89,7 +89,9 @@ public class TOBAG extends EvalFunc<Data
     @Override
     public DataBag exec(Tuple input) throws IOException {
         try {
-            DataBag bag = BagFactory.getInstance().newDefaultBag();
+            // The assumption is that if the bag contents fits into
+            // an input tuple, it will not need to be spilled.
+            DataBag bag = new NonSpillableDataBag(input.size());
 
             for (int i = 0; i < input.size(); ++i) {
                 final Object object = input.get(i);

Modified: pig/trunk/src/org/apache/pig/builtin/VALUELIST.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/builtin/VALUELIST.java?rev=1385001&r1=1385000&r2=1385001&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/builtin/VALUELIST.java (original)
+++ pig/trunk/src/org/apache/pig/builtin/VALUELIST.java Sat Sep 15 02:38:54 2012
@@ -26,6 +26,7 @@ import org.apache.pig.EvalFunc;
 import org.apache.pig.data.BagFactory;
 import org.apache.pig.data.DataBag;
 import org.apache.pig.data.DataType;
+import org.apache.pig.data.NonSpillableDataBag;
 import org.apache.pig.data.Tuple;
 import org.apache.pig.data.TupleFactory;
 import org.apache.pig.impl.logicalLayer.FrontendException;
@@ -71,7 +72,7 @@ public class VALUELIST extends EvalFunc<
         }
 
         Collection c = m.values();
-        DataBag bag = BAG_FACTORY.newDefaultBag();
+        DataBag bag = new NonSpillableDataBag(c.size());
         Iterator<Object> iter = c.iterator();
         while(iter.hasNext()) {
             Tuple t = TUPLE_FACTORY.newTuple(iter.next());

Modified: pig/trunk/src/org/apache/pig/builtin/VALUESET.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/builtin/VALUESET.java?rev=1385001&r1=1385000&r2=1385001&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/builtin/VALUESET.java (original)
+++ pig/trunk/src/org/apache/pig/builtin/VALUESET.java Sat Sep 15 02:38:54 2012
@@ -28,6 +28,7 @@ import org.apache.pig.EvalFunc;
 import org.apache.pig.data.BagFactory;
 import org.apache.pig.data.DataBag;
 import org.apache.pig.data.DataType;
+import org.apache.pig.data.NonSpillableDataBag;
 import org.apache.pig.data.Tuple;
 import org.apache.pig.data.TupleFactory;
 import org.apache.pig.impl.logicalLayer.FrontendException;
@@ -73,7 +74,7 @@ public class VALUESET extends EvalFunc<D
 
         int initialSetSize = getInitialSetSize(m.values());
         Set<Object> uniqueElements = new HashSet<Object>(initialSetSize);
-        DataBag bag = BAG_FACTORY.newDefaultBag();
+        DataBag bag = new NonSpillableDataBag();
 
         Iterator<Object> iter = m.values().iterator();
 

Modified: pig/trunk/src/org/apache/pig/builtin/mock/Storage.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/builtin/mock/Storage.java?rev=1385001&r1=1385000&r2=1385001&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/builtin/mock/Storage.java (original)
+++ pig/trunk/src/org/apache/pig/builtin/mock/Storage.java Sat Sep 15 02:38:54 2012
@@ -39,8 +39,8 @@ import org.apache.pig.ResourceStatistics
 import org.apache.pig.StoreFuncInterface;
 import org.apache.pig.StoreMetadata;
 import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigSplit;
-import org.apache.pig.data.BagFactory;
 import org.apache.pig.data.DataBag;
+import org.apache.pig.data.NonSpillableDataBag;
 import org.apache.pig.data.Tuple;
 import org.apache.pig.data.TupleFactory;
 import org.apache.pig.impl.PigContext;
@@ -97,7 +97,6 @@ public class Storage extends LoadFunc im
   private static final Logger LOG = Logger.getLogger(Storage.class);
   private static Map<Integer, Data> idToData = new HashMap<Integer, Data>();
   private static TupleFactory TF = TupleFactory.getInstance();
-  private static BagFactory BF = BagFactory.getInstance();
 
   private static int nextId;
 
@@ -114,7 +113,7 @@ public class Storage extends LoadFunc im
    * @return a bag containing the provided objects
    */
   public static DataBag bag(Tuple... tuples) {
-    return BF.newDefaultBag(Arrays.asList(tuples));
+    return new NonSpillableDataBag(Arrays.asList(tuples));
   }
   
   /**

Modified: pig/trunk/src/org/apache/pig/impl/builtin/FindQuantiles.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/impl/builtin/FindQuantiles.java?rev=1385001&r1=1385000&r2=1385001&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/impl/builtin/FindQuantiles.java (original)
+++ pig/trunk/src/org/apache/pig/impl/builtin/FindQuantiles.java Sat Sep 15 02:38:54 2012
@@ -22,7 +22,6 @@ import java.io.ObjectInputStream;
 import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.HashMap;
-import java.util.Iterator;
 import java.util.Map;
 import java.util.Map.Entry;
 
@@ -30,26 +29,16 @@ import org.apache.pig.ComparisonFunc;
 import org.apache.pig.EvalFunc;
 import org.apache.pig.FuncSpec;
 import org.apache.pig.backend.executionengine.ExecException;
-import org.apache.pig.backend.hadoop.HDataType;
 import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MRCompiler;
 import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.partitioners.CountingMap;
-import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.partitioners.DiscreteProbabilitySampleGenerator;
 import org.apache.pig.data.BagFactory;
 import org.apache.pig.data.DataBag;
 import org.apache.pig.data.DataType;
-import org.apache.pig.data.DefaultDataBag;
 import org.apache.pig.data.InternalMap;
+import org.apache.pig.data.NonSpillableDataBag;
 import org.apache.pig.data.Tuple;
 import org.apache.pig.data.TupleFactory;
 import org.apache.pig.impl.PigContext;
-import org.apache.pig.impl.io.NullableBytesWritable;
-import org.apache.pig.impl.io.NullableDoubleWritable;
-import org.apache.pig.impl.io.NullableFloatWritable;
-import org.apache.pig.impl.io.NullableIntWritable;
-import org.apache.pig.impl.io.NullableLongWritable;
-import org.apache.pig.impl.io.NullableText;
-import org.apache.pig.impl.io.NullableTuple;
-import org.apache.pig.impl.io.PigNullableWritable;
 
 
 public class FindQuantiles extends EvalFunc<Map<String, Object>>{
@@ -65,6 +54,7 @@ public class FindQuantiles extends EvalF
     State mState;
     
     private class SortComparator implements Comparator<Tuple> {
+        @Override
         @SuppressWarnings("unchecked")
         public int compare(Tuple t1, Tuple t2) {
             switch (mState) {
@@ -272,7 +262,7 @@ public class FindQuantiles extends EvalF
                 }
                 weightedParts.put(key, probVec);
             }
-            output.put(QUANTILES_LIST, mBagFactory.newDefaultBag(quantilesList));
+            output.put(QUANTILES_LIST, new NonSpillableDataBag(quantilesList));
             output.put(WEIGHTED_PARTS, weightedParts);
             return output;
         }catch (Exception e){