You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by jb...@apache.org on 2010/07/12 03:02:44 UTC

svn commit: r963172 - in /cassandra/trunk: src/java/org/apache/cassandra/db/CompactionManager.java test/unit/org/apache/cassandra/db/CompactionsTest.java

Author: jbellis
Date: Mon Jul 12 01:02:44 2010
New Revision: 963172

URL: http://svn.apache.org/viewvc?rev=963172&view=rev
Log:
add test for getBuckets.  patch by Tyler Hobbs; reviewed by jbellis for CASSANDRA-1265

Modified:
    cassandra/trunk/src/java/org/apache/cassandra/db/CompactionManager.java
    cassandra/trunk/test/unit/org/apache/cassandra/db/CompactionsTest.java

Modified: cassandra/trunk/src/java/org/apache/cassandra/db/CompactionManager.java
URL: http://svn.apache.org/viewvc/cassandra/trunk/src/java/org/apache/cassandra/db/CompactionManager.java?rev=963172&r1=963171&r2=963172&view=diff
==============================================================================
--- cassandra/trunk/src/java/org/apache/cassandra/db/CompactionManager.java (original)
+++ cassandra/trunk/src/java/org/apache/cassandra/db/CompactionManager.java Mon Jul 12 01:02:44 2010
@@ -42,6 +42,7 @@ import org.apache.cassandra.service.Stor
 import org.apache.cassandra.service.AntiEntropyService;
 import org.apache.cassandra.io.util.FileUtils;
 import org.apache.cassandra.utils.FBUtilities;
+import org.apache.cassandra.utils.Pair;
 import org.cliffc.high_scale_lib.NonBlockingHashMap;
 
 import java.net.InetAddress;
@@ -138,7 +139,7 @@ public class CompactionManager implement
                     return 0;
                 }
                 logger.debug("Checking to see if compaction of " + cfs.columnFamily_ + " would be useful");
-                Set<List<SSTableReader>> buckets = getBuckets(cfs.getSSTables(), 50L * 1024L * 1024L);
+                Set<List<SSTableReader>> buckets = getBuckets(convertSSTablesToPairs(cfs.getSSTables()), 50L * 1024L * 1024L);
                 updateEstimateFor(cfs, buckets);
                 
                 for (List<SSTableReader> sstables : buckets)
@@ -506,29 +507,40 @@ public class CompactionManager implement
     /*
     * Group files of similar size into buckets.
     */
-    static Set<List<SSTableReader>> getBuckets(Iterable<SSTableReader> files, long min)
+    static <T> Set<List<T>> getBuckets(Collection<Pair<T, Long>> files, long min)
     {
-        Map<List<SSTableReader>, Long> buckets = new HashMap<List<SSTableReader>, Long>();
-        for (SSTableReader sstable : files)
+        // Sort the list in order to get deterministic results during the grouping below
+        List<Pair<T, Long>> sortedFiles = new ArrayList<Pair<T, Long>>(files);
+        Collections.sort(sortedFiles, new Comparator<Pair<T, Long>>()
         {
-            long size = sstable.length();
+            public int compare(Pair<T, Long> p1, Pair<T, Long> p2)
+            {
+                return p1.right.compareTo(p2.right);
+            }
+        });
+
+        Map<List<T>, Long> buckets = new HashMap<List<T>, Long>();
+
+        for (Pair<T, Long> pair: sortedFiles)
+        {
+            long size = pair.right;
 
             boolean bFound = false;
             // look for a bucket containing similar-sized files:
             // group in the same bucket if it's w/in 50% of the average for this bucket,
             // or this file and the bucket are all considered "small" (less than `min`)
-            for (Entry<List<SSTableReader>, Long> entry : buckets.entrySet())
+            for (Entry<List<T>, Long> entry : buckets.entrySet())
             {
-                List<SSTableReader> bucket = entry.getKey();
+                List<T> bucket = entry.getKey();
                 long averageSize = entry.getValue();
-                if ((size > averageSize / 2 && size < 3 * averageSize / 2)
+                if ((size > (averageSize / 2) && size < (3 * averageSize) / 2)
                     || (size < min && averageSize < min))
                 {
                     // remove and re-add because adding changes the hash
                     buckets.remove(bucket);
                     long totalSize = bucket.size() * averageSize;
                     averageSize = (totalSize + size) / (bucket.size() + 1);
-                    bucket.add(sstable);
+                    bucket.add(pair.left);
                     buckets.put(bucket, averageSize);
                     bFound = true;
                     break;
@@ -537,8 +549,8 @@ public class CompactionManager implement
             // no similar bucket found; put it in a new one
             if (!bFound)
             {
-                ArrayList<SSTableReader> bucket = new ArrayList<SSTableReader>();
-                bucket.add(sstable);
+                ArrayList<T> bucket = new ArrayList<T>();
+                bucket.add(pair.left);
                 buckets.put(bucket, size);
             }
         }
@@ -546,6 +558,16 @@ public class CompactionManager implement
         return buckets.keySet();
     }
 
+    private static Collection<Pair<SSTableReader, Long>> convertSSTablesToPairs(Collection<SSTableReader> collection)
+    {
+        Collection<Pair<SSTableReader, Long>> tablePairs = new ArrayList<Pair<SSTableReader, Long>>();
+        for(SSTableReader table: collection)
+        {
+            tablePairs.add(new Pair<SSTableReader, Long>(table, table.length()));
+        }
+        return tablePairs;
+    }
+
     public static int getDefaultGCBefore()
     {
         return (int)(System.currentTimeMillis() / 1000) - DatabaseDescriptor.getGcGraceInSeconds();
@@ -604,7 +626,7 @@ public class CompactionManager implement
                 public void run ()
                 {
                     logger.debug("Estimating compactions for " + cfs.columnFamily_);
-                    final Set<List<SSTableReader>> buckets = getBuckets(cfs.getSSTables(), 50L * 1024L * 1024L);
+                    final Set<List<SSTableReader>> buckets = getBuckets(convertSSTablesToPairs(cfs.getSSTables()), 50L * 1024L * 1024L);
                     updateEstimateFor(cfs, buckets);
                 }
             };

Modified: cassandra/trunk/test/unit/org/apache/cassandra/db/CompactionsTest.java
URL: http://svn.apache.org/viewvc/cassandra/trunk/test/unit/org/apache/cassandra/db/CompactionsTest.java?rev=963172&r1=963171&r2=963172&view=diff
==============================================================================
--- cassandra/trunk/test/unit/org/apache/cassandra/db/CompactionsTest.java (original)
+++ cassandra/trunk/test/unit/org/apache/cassandra/db/CompactionsTest.java Mon Jul 12 01:02:44 2010
@@ -22,6 +22,7 @@ import java.io.IOException;
 import java.net.InetAddress;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.Future;
+import java.util.List;
 import java.util.ArrayList;
 import java.util.Set;
 import java.util.HashSet;
@@ -34,6 +35,7 @@ import org.apache.cassandra.io.sstable.S
 import org.apache.cassandra.CleanupHelper;
 import org.apache.cassandra.db.filter.QueryPath;
 import org.apache.cassandra.utils.FBUtilities;
+import org.apache.cassandra.utils.Pair;
 import static junit.framework.Assert.assertEquals;
 
 public class CompactionsTest extends CleanupHelper
@@ -76,4 +78,61 @@ public class CompactionsTest extends Cle
         }
         assertEquals(inserted.size(), Util.getRangeSlice(store).size());
     }
+
+    @Test
+    public void testGetBuckets()
+    {
+        List<Pair<String, Long>> pairs = new ArrayList<Pair<String, Long>>();
+        String[] strings = { "a", "bbbb", "cccccccc", "cccccccc", "bbbb", "a" };
+        for (String st : strings)
+        {
+            Pair<String, Long> pair = new Pair<String, Long>(st, new Long(st.length()));
+            pairs.add(pair);
+        }
+
+        Set<List<String>> buckets = CompactionManager.getBuckets(pairs, 2);
+        assertEquals(3, buckets.size());
+
+        for (List<String> bucket : buckets)
+        {
+            assertEquals(2, bucket.size());
+            assertEquals(bucket.get(0).length(), bucket.get(1).length());
+            assertEquals(bucket.get(0).charAt(0), bucket.get(1).charAt(0));
+        }
+
+        pairs.clear();
+        buckets.clear();
+
+        String[] strings2 = { "aaa", "bbbbbbbb", "aaa", "bbbbbbbb", "bbbbbbbb", "aaa" };
+        for (String st : strings2)
+        {
+            Pair<String, Long> pair = new Pair<String, Long>(st, new Long(st.length()));
+            pairs.add(pair);
+        }
+
+        buckets = CompactionManager.getBuckets(pairs, 2);
+        assertEquals(2, buckets.size());
+
+        for (List<String> bucket : buckets)
+        {
+            assertEquals(3, bucket.size());
+            assertEquals(bucket.get(0).charAt(0), bucket.get(1).charAt(0));
+            assertEquals(bucket.get(1).charAt(0), bucket.get(2).charAt(0));
+        }
+
+        // Test the "min" functionality
+        pairs.clear();
+        buckets.clear();
+
+        String[] strings3 = { "aaa", "bbbbbbbb", "aaa", "bbbbbbbb", "bbbbbbbb", "aaa" };
+        for (String st : strings3)
+        {
+            Pair<String, Long> pair = new Pair<String, Long>(st, new Long(st.length()));
+            pairs.add(pair);
+        }
+
+        buckets = CompactionManager.getBuckets(pairs, 10); // notice the min is 10
+        assertEquals(1, buckets.size());
+    }
+
 }