You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by jb...@apache.org on 2011/09/23 15:58:54 UTC

svn commit: r1174770 - in /cassandra/branches/cassandra-1.0.0/src/java/org/apache/cassandra/db: ./ compaction/

Author: jbellis
Date: Fri Sep 23 13:58:54 2011
New Revision: 1174770

URL: http://svn.apache.org/viewvc?rev=1174770&view=rev
Log:
avoid echoedRow when checking shouldPurge is more expensive than just de/serializing
patch by jbellis; reviewed by slebresne for CASSANDRA-3234

Modified:
    cassandra/branches/cassandra-1.0.0/src/java/org/apache/cassandra/db/ColumnFamilyStore.java
    cassandra/branches/cassandra-1.0.0/src/java/org/apache/cassandra/db/compaction/AbstractCompactionStrategy.java
    cassandra/branches/cassandra-1.0.0/src/java/org/apache/cassandra/db/compaction/CompactionController.java
    cassandra/branches/cassandra-1.0.0/src/java/org/apache/cassandra/db/compaction/LeveledCompactionStrategy.java
    cassandra/branches/cassandra-1.0.0/src/java/org/apache/cassandra/db/compaction/LeveledManifest.java
    cassandra/branches/cassandra-1.0.0/src/java/org/apache/cassandra/db/compaction/SizeTieredCompactionStrategy.java

Modified: cassandra/branches/cassandra-1.0.0/src/java/org/apache/cassandra/db/ColumnFamilyStore.java
URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-1.0.0/src/java/org/apache/cassandra/db/ColumnFamilyStore.java?rev=1174770&r1=1174769&r2=1174770&view=diff
==============================================================================
--- cassandra/branches/cassandra-1.0.0/src/java/org/apache/cassandra/db/ColumnFamilyStore.java (original)
+++ cassandra/branches/cassandra-1.0.0/src/java/org/apache/cassandra/db/ColumnFamilyStore.java Fri Sep 23 13:58:54 2011
@@ -885,6 +885,11 @@ public class ColumnFamilyStore implement
         return false;
     }
 
+    public boolean isKeyExistenceExpensive(Set<? extends SSTable> sstablesToIgnore)
+    {
+        return compactionStrategy.isKeyExistenceExpensive(sstablesToIgnore);
+    }
+
     /*
      * Called after a BinaryMemtable flushes its in-memory data, or we add a file
      * via bootstrap. This information is cached in the ColumnFamilyStore.

Modified: cassandra/branches/cassandra-1.0.0/src/java/org/apache/cassandra/db/compaction/AbstractCompactionStrategy.java
URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-1.0.0/src/java/org/apache/cassandra/db/compaction/AbstractCompactionStrategy.java?rev=1174770&r1=1174769&r2=1174770&view=diff
==============================================================================
--- cassandra/branches/cassandra-1.0.0/src/java/org/apache/cassandra/db/compaction/AbstractCompactionStrategy.java (original)
+++ cassandra/branches/cassandra-1.0.0/src/java/org/apache/cassandra/db/compaction/AbstractCompactionStrategy.java Fri Sep 23 13:58:54 2011
@@ -21,9 +21,11 @@ package org.apache.cassandra.db.compacti
 import java.util.Collection;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.concurrent.TimeUnit;
 
 import org.apache.cassandra.db.ColumnFamilyStore;
+import org.apache.cassandra.io.sstable.SSTable;
 import org.apache.cassandra.io.sstable.SSTableReader;
 import org.apache.cassandra.service.StorageService;
 
@@ -97,4 +99,10 @@ public abstract class AbstractCompaction
      * @return size in bytes of the largest sstables for this strategy
      */
     public abstract long getMaxSSTableSize();
+
+    /**
+     * @return true if checking for whether a key exists, ignoring @param sstablesToIgnore,
+     * is going to be expensive
+     */
+    public abstract boolean isKeyExistenceExpensive(Set<? extends SSTable> sstablesToIgnore);
 }

Modified: cassandra/branches/cassandra-1.0.0/src/java/org/apache/cassandra/db/compaction/CompactionController.java
URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-1.0.0/src/java/org/apache/cassandra/db/compaction/CompactionController.java?rev=1174770&r1=1174769&r2=1174770&view=diff
==============================================================================
--- cassandra/branches/cassandra-1.0.0/src/java/org/apache/cassandra/db/compaction/CompactionController.java (original)
+++ cassandra/branches/cassandra-1.0.0/src/java/org/apache/cassandra/db/compaction/CompactionController.java Fri Sep 23 13:58:54 2011
@@ -44,6 +44,7 @@ public class CompactionController
     private final boolean forceDeserialize;
 
     public final int gcBefore;
+    public boolean keyExistenceIsExpensive;
 
     public CompactionController(ColumnFamilyStore cfs, Collection<SSTableReader> sstables, int gcBefore, boolean forceDeserialize)
     {
@@ -52,6 +53,7 @@ public class CompactionController
         this.sstables = new HashSet<SSTableReader>(sstables);
         this.gcBefore = gcBefore;
         this.forceDeserialize = forceDeserialize;
+        keyExistenceIsExpensive = cfs.getCompactionStrategy().isKeyExistenceExpensive(this.sstables);
     }
 
     public String getKeyspace()
@@ -102,13 +104,19 @@ public class CompactionController
      */
     public AbstractCompactedRow getCompactedRow(List<SSTableIdentityIterator> rows)
     {
-        if (rows.size() == 1 && !needDeserialize() && !shouldPurge(rows.get(0).getKey()))
-            return new EchoedRow(this, rows.get(0));
-
         long rowSize = 0;
         for (SSTableIdentityIterator row : rows)
             rowSize += row.dataSize;
 
+        // in-memory echoedrow is only enabled if we think checking for the key's existence in the other sstables,
+        // is going to be less expensive than simply de/serializing the row again
+        if (rows.size() == 1 && !needDeserialize()
+            && (rowSize > DatabaseDescriptor.getInMemoryCompactionLimit() || !keyExistenceIsExpensive)
+            && !shouldPurge(rows.get(0).getKey()))
+        {
+            return new EchoedRow(this, rows.get(0));
+        }
+
         if (rowSize > DatabaseDescriptor.getInMemoryCompactionLimit())
         {
             String keyString = cfs.metadata.getKeyValidator().getString(rows.get(0).getKey().key);

Modified: cassandra/branches/cassandra-1.0.0/src/java/org/apache/cassandra/db/compaction/LeveledCompactionStrategy.java
URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-1.0.0/src/java/org/apache/cassandra/db/compaction/LeveledCompactionStrategy.java?rev=1174770&r1=1174769&r2=1174770&view=diff
==============================================================================
--- cassandra/branches/cassandra-1.0.0/src/java/org/apache/cassandra/db/compaction/LeveledCompactionStrategy.java (original)
+++ cassandra/branches/cassandra-1.0.0/src/java/org/apache/cassandra/db/compaction/LeveledCompactionStrategy.java Fri Sep 23 13:58:54 2011
@@ -21,17 +21,17 @@ package org.apache.cassandra.db.compacti
  */
 
 
-import java.util.Collection;
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
+import java.util.*;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicReference;
 
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Sets;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import org.apache.cassandra.db.ColumnFamilyStore;
+import org.apache.cassandra.io.sstable.SSTable;
 import org.apache.cassandra.io.sstable.SSTableReader;
 import org.apache.cassandra.notifications.INotification;
 import org.apache.cassandra.notifications.INotificationConsumer;
@@ -159,6 +159,12 @@ public class LeveledCompactionStrategy e
         return maxSSTableSizeInMB * 1024 * 1024;
     }
 
+    public boolean isKeyExistenceExpensive(Set<? extends SSTable> sstablesToIgnore)
+    {
+        Set<SSTableReader> L0 = ImmutableSet.copyOf(manifest.getLevel(0));
+        return Sets.difference(L0, sstablesToIgnore).size() + manifest.getLevelCount() > 20;
+    }
+
     @Override
     public String toString()
     {

Modified: cassandra/branches/cassandra-1.0.0/src/java/org/apache/cassandra/db/compaction/LeveledManifest.java
URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-1.0.0/src/java/org/apache/cassandra/db/compaction/LeveledManifest.java?rev=1174770&r1=1174769&r2=1174770&view=diff
==============================================================================
--- cassandra/branches/cassandra-1.0.0/src/java/org/apache/cassandra/db/compaction/LeveledManifest.java (original)
+++ cassandra/branches/cassandra-1.0.0/src/java/org/apache/cassandra/db/compaction/LeveledManifest.java Fri Sep 23 13:58:54 2011
@@ -27,8 +27,6 @@ import java.io.IOException;
 import java.util.*;
 
 import com.google.common.collect.Iterables;
-import org.apache.commons.lang.StringUtils;
-
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -54,12 +52,13 @@ public class LeveledManifest
      * uses a pessimistic estimate of how many keys overlap (none), so we risk wasting memory
      * or even OOMing when compacting highly overlapping sstables
      */
-    private static int MAX_COMPACTING_L0 = 32;
+    static int MAX_COMPACTING_L0 = 32;
 
     private final ColumnFamilyStore cfs;
     private final List<SSTableReader>[] generations;
     private final DecoratedKey[] lastCompactedKeys;
     private final int maxSSTableSizeInMB;
+    private int levelCount;
 
     private LeveledManifest(ColumnFamilyStore cfs, int maxSSTableSizeInMB)
     {
@@ -404,4 +403,19 @@ public class LeveledManifest
     {
         return "Manifest@" + hashCode();
     }
+
+    public int getLevelCount()
+    {
+        for (int i = generations.length - 1; i >= 0; i--)
+        {
+            if (generations[i].size() > 0)
+                return i;
+        }
+        return 0;
+    }
+
+    public List<SSTableReader> getLevel(int i)
+    {
+        return generations[i];
+    }
 }

Modified: cassandra/branches/cassandra-1.0.0/src/java/org/apache/cassandra/db/compaction/SizeTieredCompactionStrategy.java
URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-1.0.0/src/java/org/apache/cassandra/db/compaction/SizeTieredCompactionStrategy.java?rev=1174770&r1=1174769&r2=1174770&view=diff
==============================================================================
--- cassandra/branches/cassandra-1.0.0/src/java/org/apache/cassandra/db/compaction/SizeTieredCompactionStrategy.java (original)
+++ cassandra/branches/cassandra-1.0.0/src/java/org/apache/cassandra/db/compaction/SizeTieredCompactionStrategy.java Fri Sep 23 13:58:54 2011
@@ -25,6 +25,8 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import org.apache.cassandra.db.ColumnFamilyStore;
+import org.apache.cassandra.db.DataTracker;
+import org.apache.cassandra.io.sstable.SSTable;
 import org.apache.cassandra.io.sstable.SSTableReader;
 import org.apache.cassandra.utils.Pair;
 
@@ -181,6 +183,11 @@ public class SizeTieredCompactionStrateg
         return Long.MAX_VALUE;
     }
 
+    public boolean isKeyExistenceExpensive(Set<? extends SSTable> sstablesToIgnore)
+    {
+        return cfs.getSSTables().size() - sstablesToIgnore.size() > 20;
+    }
+
     public String toString()
     {
         return String.format("SizeTieredCompactionStrategy[%s/%s]",