You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by jb...@apache.org on 2011/02/09 22:22:03 UTC

svn commit: r1069119 - in /cassandra/branches/cassandra-0.7: ./ conf/ src/java/org/apache/cassandra/config/ src/java/org/apache/cassandra/db/ src/java/org/apache/cassandra/service/

Author: jbellis
Date: Wed Feb  9 21:22:01 2011
New Revision: 1069119

URL: http://svn.apache.org/viewvc?rev=1069119&view=rev
Log:
add flush_largest_memtables_at and reduce_cache_sizes_at options
patch by jbellis; reviewed by brandonwilliams for CASSANDRA-2142

Modified:
    cassandra/branches/cassandra-0.7/CHANGES.txt
    cassandra/branches/cassandra-0.7/NEWS.txt
    cassandra/branches/cassandra-0.7/conf/cassandra.yaml
    cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/config/Config.java
    cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/config/DatabaseDescriptor.java
    cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/db/ColumnFamilyStore.java
    cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/service/AbstractCassandraDaemon.java
    cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/service/GCInspector.java
    cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/service/StorageService.java

Modified: cassandra/branches/cassandra-0.7/CHANGES.txt
URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.7/CHANGES.txt?rev=1069119&r1=1069118&r2=1069119&view=diff
==============================================================================
--- cassandra/branches/cassandra-0.7/CHANGES.txt (original)
+++ cassandra/branches/cassandra-0.7/CHANGES.txt Wed Feb  9 21:22:01 2011
@@ -1,9 +1,3 @@
-0.7.2
- * cache writing moved to CompactionManager to reduce i/o contention and
-   updated to use non-cache-polluting writes (CASSANDRA-2053)
- * page through large rows when exporting to JSON (CASSANDRA-2041)
-
-
 0.7.1
  * buffer network stack to avoid inefficient small TCP messages while avoiding
    the nagle/delayed ack problem (CASSANDRA-1896)
@@ -56,6 +50,11 @@
  * avoid blocking gossip while deleting handoff hints (CASSANDRA-2073)
  * ignore messages from newer versions, keep track of nodes in gossip 
    regardless of version (CASSANDRA-1970)
+ * cache writing moved to CompactionManager to reduce i/o contention and
+   updated to use non-cache-polluting writes (CASSANDRA-2053)
+ * page through large rows when exporting to JSON (CASSANDRA-2041)
+ * add flush_largest_memtables_at and reduce_cache_sizes_at options
+   (CASSANDRA-2142)
 
 
 0.7.0-final

Modified: cassandra/branches/cassandra-0.7/NEWS.txt
URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.7/NEWS.txt?rev=1069119&r1=1069118&r2=1069119&view=diff
==============================================================================
--- cassandra/branches/cassandra-0.7/NEWS.txt (original)
+++ cassandra/branches/cassandra-0.7/NEWS.txt Wed Feb  9 21:22:01 2011
@@ -1,17 +1,33 @@
 0.7.1
 =====
 
-Uprading
---------
+Upgrading
+---------
     - 0.7.1 is completely backwards compatible with 0.7.0.  Just restart
       each node with the new version, one at a time.  (The cluster does
       not all need to be upgraded simultaneously.)
 
 Features
 --------
-    - Cassandra can perform writes efficiently across datacenters by
+    - added flush_largest_memtables_at and reduce_cache_sizes_at options
+      to cassandra.yaml as an escape valve for memory pressure
+    - added option to specify -Dcassandra.join_ring=false on startup
+      to allow "warm spare" nodes or performing JMX maintenance before
+      joining the ring
+
+Performance
+-----------
+    - Disk writes and sequential scans avoid polluting page cache
+      (requires JNA to be enabled)
+    - Cassandra performs writes efficiently across datacenters by
       sending a single copy of the mutation and having the recipient
       forward that to other replicas in its datacenter.
+    - Improved network buffering
+    - Reduced lock contention on memtable flush
+    - Optimized supercolumn deserialization
+    - Zero-copy reads from mmapped sstable files
+    - Explicitly set higher JVM new generation size
+    - Reduced i/o contention during saving of caches
 
 
 0.7.0

Modified: cassandra/branches/cassandra-0.7/conf/cassandra.yaml
URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.7/conf/cassandra.yaml?rev=1069119&r1=1069118&r2=1069119&view=diff
==============================================================================
--- cassandra/branches/cassandra-0.7/conf/cassandra.yaml (original)
+++ cassandra/branches/cassandra-0.7/conf/cassandra.yaml Wed Feb  9 21:22:01 2011
@@ -90,6 +90,31 @@ commitlog_sync: periodic
 # milliseconds.
 commitlog_sync_period_in_ms: 10000
 
+# emergency pressure valve: each time heap usage after a full (CMS)
+# garbage collection is above this fraction of the max, Cassandra will
+# flush the largest memtables.  
+#
+# Set to 1.0 to disable.  Setting this lower than
+# CMSInitiatingOccupancyFraction is not likely to be useful.
+#
+# RELYING ON THIS AS YOUR PRIMARY TUNING MECHANISM WILL WORK POORLY:
+# it is most effective under light to moderate load, or read-heavy
+# workloads; under truly massive write load, it will often be too
+# little, too late.
+flush_largest_memtables_at: 0.75
+
+# emergency pressure valve #2: the first time heap usage after a full
+# (CMS) garbage collection is above this fraction of the max,
+# Cassandra will reduce cache maximum _capacity_ to the given fraction
+# of the current _size_.  Should usually be set substantially above
+# flush_largest_memtables_at, since that will have less long-term
+# impact on the system.  
+# 
+# Set to 1.0 to disable.  Setting this lower than
+# CMSInitiatingOccupancyFraction is not likely to be useful.
+reduce_cache_sizes_at: 0.85
+reduce_cache_capacity_to: 0.6
+
 # Addresses of hosts that are deemed contact points.
 # Cassandra nodes use this list of hosts to find each other and learn
 # the topology of the ring.  You must change this if you are running

Modified: cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/config/Config.java
URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/config/Config.java?rev=1069119&r1=1069118&r2=1069119&view=diff
==============================================================================
--- cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/config/Config.java (original)
+++ cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/config/Config.java Wed Feb  9 21:22:01 2011
@@ -103,7 +103,10 @@ public class Config
     public Integer index_interval = 128;
 
     public List<RawKeyspace> keyspaces;
-    
+    public Double flush_largest_memtables_at = 1.0;
+    public Double reduce_cache_sizes_at = 1.0;
+    public double reduce_cache_capacity_to = 0.6;
+
     public static enum CommitLogSync {
         periodic,
         batch

Modified: cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/config/DatabaseDescriptor.java
URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/config/DatabaseDescriptor.java?rev=1069119&r1=1069118&r2=1069119&view=diff
==============================================================================
--- cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/config/DatabaseDescriptor.java (original)
+++ cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/config/DatabaseDescriptor.java Wed Feb  9 21:22:01 2011
@@ -20,7 +20,6 @@ package org.apache.cassandra.config;
 
 import java.io.*;
 import java.net.InetAddress;
-import java.net.MalformedURLException;
 import java.net.URL;
 import java.net.UnknownHostException;
 import java.nio.ByteBuffer;
@@ -1155,4 +1154,19 @@ public class    DatabaseDescriptor
     {
         conf.dynamic_snitch_badness_threshold = dynamicBadnessThreshold;
     }
+
+    public static double getFlushLargestMemtablesAt()
+    {
+        return conf.flush_largest_memtables_at;
+    }
+
+    public static double getReduceCacheSizesAt()
+    {
+        return conf.reduce_cache_sizes_at;
+    }
+
+    public static double getReduceCacheCapacityTo()
+    {
+        return conf.reduce_cache_capacity_to;
+    }
 }

Modified: cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/db/ColumnFamilyStore.java
URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/db/ColumnFamilyStore.java?rev=1069119&r1=1069118&r2=1069119&view=diff
==============================================================================
--- cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/db/ColumnFamilyStore.java (original)
+++ cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/db/ColumnFamilyStore.java Wed Feb  9 21:22:01 2011
@@ -1878,7 +1878,7 @@ public class ColumnFamilyStore implement
     public String toString()
     {
         return "ColumnFamilyStore(" +
-               "table='" + table + '\'' +
+               "table='" + table.name + '\'' +
                ", columnFamily='" + columnFamily + '\'' +
                ')';
     }
@@ -2019,4 +2019,26 @@ public class ColumnFamilyStore implement
     {
         return partitioner instanceof LocalPartitioner;
     }
+
+    /**
+     * sets each cache's maximum capacity to 75% of its current size
+     */
+    public void reduceCacheSizes()
+    {
+        if (ssTables.getRowCache().getCapacity() > 0)
+        {
+            int newCapacity = (int) (DatabaseDescriptor.getReduceCacheCapacityTo() * ssTables.getRowCache().getSize());
+            logger.warn(String.format("Reducing %s row cache capacity from %d to %s to reduce memory pressure",
+                                      columnFamily, ssTables.getRowCache().getCapacity(), newCapacity));
+            ssTables.getRowCache().setCapacity(newCapacity);
+        }
+
+        if (ssTables.getKeyCache().getCapacity() > 0)
+        {
+            int newCapacity = (int) (DatabaseDescriptor.getReduceCacheCapacityTo() * ssTables.getKeyCache().getSize());
+            logger.warn(String.format("Reducing %s key cache capacity from %d to %s to reduce memory pressure",
+                                      columnFamily, ssTables.getKeyCache().getCapacity(), newCapacity));
+            ssTables.getKeyCache().setCapacity(newCapacity);
+        }
+    }
 }

Modified: cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/service/AbstractCassandraDaemon.java
URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/service/AbstractCassandraDaemon.java?rev=1069119&r1=1069118&r2=1069119&view=diff
==============================================================================
--- cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/service/AbstractCassandraDaemon.java (original)
+++ cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/service/AbstractCassandraDaemon.java Wed Feb  9 21:22:01 2011
@@ -162,6 +162,15 @@ public abstract class AbstractCassandraD
             Table.open(table);
         }
 
+        try
+        {
+            GCInspector.instance.start();
+        }
+        catch (Throwable t)
+        {
+            logger.warn("Unable to start GCInspector (currently only supported on the Sun JVM)");
+        }
+
         // replay the log if necessary and check for compaction candidates
         CommitLog.recover();
         CompactionManager.instance.checkAllColumnFamilies();

Modified: cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/service/GCInspector.java
URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/service/GCInspector.java?rev=1069119&r1=1069118&r2=1069119&view=diff
==============================================================================
--- cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/service/GCInspector.java (original)
+++ cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/service/GCInspector.java Wed Feb  9 21:22:01 2011
@@ -32,6 +32,7 @@ import javax.management.ObjectName;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import org.apache.cassandra.config.DatabaseDescriptor;
 import org.apache.cassandra.utils.StatusLogger;
 
 public class GCInspector
@@ -46,6 +47,7 @@ public class GCInspector
     private HashMap<String, Long> gctimes = new HashMap<String, Long>();
 
     List<Object> beans = new ArrayList<Object>(); // these are instances of com.sun.management.GarbageCollectorMXBean
+    private volatile boolean cacheSizesReduced;
 
     public GCInspector()
     {
@@ -87,13 +89,13 @@ public class GCInspector
         {
             public void run()
             {
-                logIntervalGCStats();
+                logGCResults();
             }
         };
         StorageService.scheduledTasks.scheduleWithFixedDelay(t, INTERVAL_IN_MS, INTERVAL_IN_MS, TimeUnit.MILLISECONDS);
     }
 
-    private void logIntervalGCStats()
+    private void logGCResults()
     {
         for (Object gc : beans)
         {
@@ -121,7 +123,7 @@ public class GCInspector
             }
 
             String st = String.format("GC for %s: %s ms, %s reclaimed leaving %s used; max is %s",
-                    gcw.getName(), gcw.getDuration(), previousMemoryUsed - memoryUsed, memoryUsed, memoryMax);
+                                      gcw.getName(), gcw.getDuration(), previousMemoryUsed - memoryUsed, memoryUsed, memoryMax);
             if (gcw.getDuration() > MIN_DURATION)                          
                 logger.info(st);
             else if (logger.isDebugEnabled())
@@ -129,6 +131,25 @@ public class GCInspector
 
             if (gcw.getDuration() > MIN_DURATION_TPSTATS)
                 StatusLogger.log();
+
+            // if we just finished a full collection and we're still using a lot of memory, try to reduce the pressure
+            if (gcw.getName().equals("ConcurrentMarkSweep"))
+            {
+                double usage = (double) memoryUsed / memoryMax;
+
+                if (memoryUsed > DatabaseDescriptor.getReduceCacheSizesAt() * memoryMax && !cacheSizesReduced)
+                {
+                    cacheSizesReduced = true;
+                    logger.warn("Heap is " + usage + " full.  You may need to reduce memtable and/or cache sizes.  Cassandra is now reducing cache sizes to free up memory.  Adjust reduce_cache_sizes_at threshold in cassandra.yaml if you don't want Cassandra to do this automatically");
+                    StorageService.instance.reduceCacheSizes();
+                }
+
+                if (memoryUsed > DatabaseDescriptor.getFlushLargestMemtablesAt() * memoryMax)
+                {
+                    logger.warn("Heap is " + usage + " full.  You may need to reduce memtable and/or cache sizes.  Cassandra will now flush up to the two largest memtables to free up memory.  Adjust flush_largest_memtables_at threshold in cassandra.yaml if you don't want Cassandra to do this automatically");
+                    StorageService.instance.flushLargestMemtables();
+                }
+            }
         }
     }
 

Modified: cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/service/StorageService.java
URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/service/StorageService.java?rev=1069119&r1=1069118&r2=1069119&view=diff
==============================================================================
--- cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/service/StorageService.java (original)
+++ cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/service/StorageService.java Wed Feb  9 21:22:01 2011
@@ -363,15 +363,6 @@ public class StorageService implements I
         initialized = true;
         isClientMode = false;
 
-        try
-        {
-            GCInspector.instance.start();
-        }
-        catch (Throwable t)
-        {
-            logger_.warn("Unable to start GCInspector (currently only supported on the Sun JVM)");
-        }
-
         if (Boolean.parseBoolean(System.getProperty("cassandra.load_ring_state", "true")))
         {
             logger_.info("Loading persisted ring state");
@@ -2176,4 +2167,39 @@ public class StorageService implements I
     {
         return efficientCrossDCWrites;
     }
+
+    /**
+     * Flushes the two largest memtables by ops and by throughput
+     */
+    public void flushLargestMemtables()
+    {
+        ColumnFamilyStore largestByOps = null;
+        ColumnFamilyStore largestByThroughput = null;
+        for (ColumnFamilyStore cfs : ColumnFamilyStore.all())
+        {
+            if (largestByOps == null || cfs.getMemtableColumnsCount() > largestByOps.getMemtableColumnsCount())
+                largestByOps = cfs;
+            if (largestByThroughput == null || cfs.getMemtableThroughputInMB() > largestByThroughput.getMemtableThroughputInMB())
+                largestByThroughput = cfs;
+        }
+        if (largestByOps == null)
+        {
+            logger_.error("Unable to reduce heap usage since there are no column families defined");
+            return;
+        }
+
+        logger_.warn("Flushing " + largestByOps + " to relieve memory pressure");
+        largestByOps.forceFlush();
+        if (largestByThroughput != largestByOps)
+        {
+            logger_.warn("Flushing " + largestByThroughput + " to relieve memory pressure");
+            largestByThroughput.forceFlush();
+        }
+    }
+
+    public void reduceCacheSizes()
+    {
+        for (ColumnFamilyStore cfs : ColumnFamilyStore.all())
+            cfs.reduceCacheSizes();
+    }
 }