You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by jb...@apache.org on 2011/02/09 22:22:03 UTC
svn commit: r1069119 - in /cassandra/branches/cassandra-0.7: ./ conf/
src/java/org/apache/cassandra/config/ src/java/org/apache/cassandra/db/
src/java/org/apache/cassandra/service/
Author: jbellis
Date: Wed Feb 9 21:22:01 2011
New Revision: 1069119
URL: http://svn.apache.org/viewvc?rev=1069119&view=rev
Log:
add flush_largest_memtables_at and reduce_cache_sizes_at options
patch by jbellis; reviewed by brandonwilliams for CASSANDRA-2142
Modified:
cassandra/branches/cassandra-0.7/CHANGES.txt
cassandra/branches/cassandra-0.7/NEWS.txt
cassandra/branches/cassandra-0.7/conf/cassandra.yaml
cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/config/Config.java
cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/config/DatabaseDescriptor.java
cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/db/ColumnFamilyStore.java
cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/service/AbstractCassandraDaemon.java
cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/service/GCInspector.java
cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/service/StorageService.java
Modified: cassandra/branches/cassandra-0.7/CHANGES.txt
URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.7/CHANGES.txt?rev=1069119&r1=1069118&r2=1069119&view=diff
==============================================================================
--- cassandra/branches/cassandra-0.7/CHANGES.txt (original)
+++ cassandra/branches/cassandra-0.7/CHANGES.txt Wed Feb 9 21:22:01 2011
@@ -1,9 +1,3 @@
-0.7.2
- * cache writing moved to CompactionManager to reduce i/o contention and
- updated to use non-cache-polluting writes (CASSANDRA-2053)
- * page through large rows when exporting to JSON (CASSANDRA-2041)
-
-
0.7.1
* buffer network stack to avoid inefficient small TCP messages while avoiding
the nagle/delayed ack problem (CASSANDRA-1896)
@@ -56,6 +50,11 @@
* avoid blocking gossip while deleting handoff hints (CASSANDRA-2073)
* ignore messages from newer versions, keep track of nodes in gossip
regardless of version (CASSANDRA-1970)
+ * cache writing moved to CompactionManager to reduce i/o contention and
+ updated to use non-cache-polluting writes (CASSANDRA-2053)
+ * page through large rows when exporting to JSON (CASSANDRA-2041)
+ * add flush_largest_memtables_at and reduce_cache_sizes_at options
+ (CASSANDRA-2142)
0.7.0-final
Modified: cassandra/branches/cassandra-0.7/NEWS.txt
URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.7/NEWS.txt?rev=1069119&r1=1069118&r2=1069119&view=diff
==============================================================================
--- cassandra/branches/cassandra-0.7/NEWS.txt (original)
+++ cassandra/branches/cassandra-0.7/NEWS.txt Wed Feb 9 21:22:01 2011
@@ -1,17 +1,33 @@
0.7.1
=====
-Uprading
---------
+Upgrading
+---------
- 0.7.1 is completely backwards compatible with 0.7.0. Just restart
each node with the new version, one at a time. (The cluster does
not all need to be upgraded simultaneously.)
Features
--------
- - Cassandra can perform writes efficiently across datacenters by
+ - added flush_largest_memtables_at and reduce_cache_sizes_at options
+ to cassandra.yaml as an escape valve for memory pressure
+ - added option to specify -Dcassandra.join_ring=false on startup
+ to allow "warm spare" nodes or performing JMX maintenance before
+ joining the ring
+
+Performance
+-----------
+ - Disk writes and sequential scans avoid polluting page cache
+ (requires JNA to be enabled)
+ - Cassandra performs writes efficiently across datacenters by
sending a single copy of the mutation and having the recipient
forward that to other replicas in its datacenter.
+ - Improved network buffering
+ - Reduced lock contention on memtable flush
+ - Optimized supercolumn deserialization
+ - Zero-copy reads from mmapped sstable files
+ - Explicitly set higher JVM new generation size
+ - Reduced i/o contention during saving of caches
0.7.0
Modified: cassandra/branches/cassandra-0.7/conf/cassandra.yaml
URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.7/conf/cassandra.yaml?rev=1069119&r1=1069118&r2=1069119&view=diff
==============================================================================
--- cassandra/branches/cassandra-0.7/conf/cassandra.yaml (original)
+++ cassandra/branches/cassandra-0.7/conf/cassandra.yaml Wed Feb 9 21:22:01 2011
@@ -90,6 +90,31 @@ commitlog_sync: periodic
# milliseconds.
commitlog_sync_period_in_ms: 10000
+# emergency pressure valve: each time heap usage after a full (CMS)
+# garbage collection is above this fraction of the max, Cassandra will
+# flush the largest memtables.
+#
+# Set to 1.0 to disable. Setting this lower than
+# CMSInitiatingOccupancyFraction is not likely to be useful.
+#
+# RELYING ON THIS AS YOUR PRIMARY TUNING MECHANISM WILL WORK POORLY:
+# it is most effective under light to moderate load, or read-heavy
+# workloads; under truly massive write load, it will often be too
+# little, too late.
+flush_largest_memtables_at: 0.75
+
+# emergency pressure valve #2: the first time heap usage after a full
+# (CMS) garbage collection is above this fraction of the max,
+# Cassandra will reduce cache maximum _capacity_ to the given fraction
+# of the current _size_. Should usually be set substantially above
+# flush_largest_memtables_at, since that will have less long-term
+# impact on the system.
+#
+# Set to 1.0 to disable. Setting this lower than
+# CMSInitiatingOccupancyFraction is not likely to be useful.
+reduce_cache_sizes_at: 0.85
+reduce_cache_capacity_to: 0.6
+
# Addresses of hosts that are deemed contact points.
# Cassandra nodes use this list of hosts to find each other and learn
# the topology of the ring. You must change this if you are running
Modified: cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/config/Config.java
URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/config/Config.java?rev=1069119&r1=1069118&r2=1069119&view=diff
==============================================================================
--- cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/config/Config.java (original)
+++ cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/config/Config.java Wed Feb 9 21:22:01 2011
@@ -103,7 +103,10 @@ public class Config
public Integer index_interval = 128;
public List<RawKeyspace> keyspaces;
-
+ public Double flush_largest_memtables_at = 1.0;
+ public Double reduce_cache_sizes_at = 1.0;
+ public double reduce_cache_capacity_to = 0.6;
+
public static enum CommitLogSync {
periodic,
batch
Modified: cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/config/DatabaseDescriptor.java
URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/config/DatabaseDescriptor.java?rev=1069119&r1=1069118&r2=1069119&view=diff
==============================================================================
--- cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/config/DatabaseDescriptor.java (original)
+++ cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/config/DatabaseDescriptor.java Wed Feb 9 21:22:01 2011
@@ -20,7 +20,6 @@ package org.apache.cassandra.config;
import java.io.*;
import java.net.InetAddress;
-import java.net.MalformedURLException;
import java.net.URL;
import java.net.UnknownHostException;
import java.nio.ByteBuffer;
@@ -1155,4 +1154,19 @@ public class DatabaseDescriptor
{
conf.dynamic_snitch_badness_threshold = dynamicBadnessThreshold;
}
+
+ public static double getFlushLargestMemtablesAt()
+ {
+ return conf.flush_largest_memtables_at;
+ }
+
+ public static double getReduceCacheSizesAt()
+ {
+ return conf.reduce_cache_sizes_at;
+ }
+
+ public static double getReduceCacheCapacityTo()
+ {
+ return conf.reduce_cache_capacity_to;
+ }
}
Modified: cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/db/ColumnFamilyStore.java
URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/db/ColumnFamilyStore.java?rev=1069119&r1=1069118&r2=1069119&view=diff
==============================================================================
--- cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/db/ColumnFamilyStore.java (original)
+++ cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/db/ColumnFamilyStore.java Wed Feb 9 21:22:01 2011
@@ -1878,7 +1878,7 @@ public class ColumnFamilyStore implement
public String toString()
{
return "ColumnFamilyStore(" +
- "table='" + table + '\'' +
+ "table='" + table.name + '\'' +
", columnFamily='" + columnFamily + '\'' +
')';
}
@@ -2019,4 +2019,26 @@ public class ColumnFamilyStore implement
{
return partitioner instanceof LocalPartitioner;
}
+
+ /**
+ * sets each cache's maximum capacity to 75% of its current size
+ */
+ public void reduceCacheSizes()
+ {
+ if (ssTables.getRowCache().getCapacity() > 0)
+ {
+ int newCapacity = (int) (DatabaseDescriptor.getReduceCacheCapacityTo() * ssTables.getRowCache().getSize());
+ logger.warn(String.format("Reducing %s row cache capacity from %d to %s to reduce memory pressure",
+ columnFamily, ssTables.getRowCache().getCapacity(), newCapacity));
+ ssTables.getRowCache().setCapacity(newCapacity);
+ }
+
+ if (ssTables.getKeyCache().getCapacity() > 0)
+ {
+ int newCapacity = (int) (DatabaseDescriptor.getReduceCacheCapacityTo() * ssTables.getKeyCache().getSize());
+ logger.warn(String.format("Reducing %s key cache capacity from %d to %s to reduce memory pressure",
+ columnFamily, ssTables.getKeyCache().getCapacity(), newCapacity));
+ ssTables.getKeyCache().setCapacity(newCapacity);
+ }
+ }
}
Modified: cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/service/AbstractCassandraDaemon.java
URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/service/AbstractCassandraDaemon.java?rev=1069119&r1=1069118&r2=1069119&view=diff
==============================================================================
--- cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/service/AbstractCassandraDaemon.java (original)
+++ cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/service/AbstractCassandraDaemon.java Wed Feb 9 21:22:01 2011
@@ -162,6 +162,15 @@ public abstract class AbstractCassandraD
Table.open(table);
}
+ try
+ {
+ GCInspector.instance.start();
+ }
+ catch (Throwable t)
+ {
+ logger.warn("Unable to start GCInspector (currently only supported on the Sun JVM)");
+ }
+
// replay the log if necessary and check for compaction candidates
CommitLog.recover();
CompactionManager.instance.checkAllColumnFamilies();
Modified: cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/service/GCInspector.java
URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/service/GCInspector.java?rev=1069119&r1=1069118&r2=1069119&view=diff
==============================================================================
--- cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/service/GCInspector.java (original)
+++ cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/service/GCInspector.java Wed Feb 9 21:22:01 2011
@@ -32,6 +32,7 @@ import javax.management.ObjectName;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.apache.cassandra.config.DatabaseDescriptor;
import org.apache.cassandra.utils.StatusLogger;
public class GCInspector
@@ -46,6 +47,7 @@ public class GCInspector
private HashMap<String, Long> gctimes = new HashMap<String, Long>();
List<Object> beans = new ArrayList<Object>(); // these are instances of com.sun.management.GarbageCollectorMXBean
+ private volatile boolean cacheSizesReduced;
public GCInspector()
{
@@ -87,13 +89,13 @@ public class GCInspector
{
public void run()
{
- logIntervalGCStats();
+ logGCResults();
}
};
StorageService.scheduledTasks.scheduleWithFixedDelay(t, INTERVAL_IN_MS, INTERVAL_IN_MS, TimeUnit.MILLISECONDS);
}
- private void logIntervalGCStats()
+ private void logGCResults()
{
for (Object gc : beans)
{
@@ -121,7 +123,7 @@ public class GCInspector
}
String st = String.format("GC for %s: %s ms, %s reclaimed leaving %s used; max is %s",
- gcw.getName(), gcw.getDuration(), previousMemoryUsed - memoryUsed, memoryUsed, memoryMax);
+ gcw.getName(), gcw.getDuration(), previousMemoryUsed - memoryUsed, memoryUsed, memoryMax);
if (gcw.getDuration() > MIN_DURATION)
logger.info(st);
else if (logger.isDebugEnabled())
@@ -129,6 +131,25 @@ public class GCInspector
if (gcw.getDuration() > MIN_DURATION_TPSTATS)
StatusLogger.log();
+
+ // if we just finished a full collection and we're still using a lot of memory, try to reduce the pressure
+ if (gcw.getName().equals("ConcurrentMarkSweep"))
+ {
+ double usage = (double) memoryUsed / memoryMax;
+
+ if (memoryUsed > DatabaseDescriptor.getReduceCacheSizesAt() * memoryMax && !cacheSizesReduced)
+ {
+ cacheSizesReduced = true;
+ logger.warn("Heap is " + usage + " full. You may need to reduce memtable and/or cache sizes. Cassandra is now reducing cache sizes to free up memory. Adjust reduce_cache_sizes_at threshold in cassandra.yaml if you don't want Cassandra to do this automatically");
+ StorageService.instance.reduceCacheSizes();
+ }
+
+ if (memoryUsed > DatabaseDescriptor.getFlushLargestMemtablesAt() * memoryMax)
+ {
+ logger.warn("Heap is " + usage + " full. You may need to reduce memtable and/or cache sizes. Cassandra will now flush up to the two largest memtables to free up memory. Adjust flush_largest_memtables_at threshold in cassandra.yaml if you don't want Cassandra to do this automatically");
+ StorageService.instance.flushLargestMemtables();
+ }
+ }
}
}
Modified: cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/service/StorageService.java
URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/service/StorageService.java?rev=1069119&r1=1069118&r2=1069119&view=diff
==============================================================================
--- cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/service/StorageService.java (original)
+++ cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/service/StorageService.java Wed Feb 9 21:22:01 2011
@@ -363,15 +363,6 @@ public class StorageService implements I
initialized = true;
isClientMode = false;
- try
- {
- GCInspector.instance.start();
- }
- catch (Throwable t)
- {
- logger_.warn("Unable to start GCInspector (currently only supported on the Sun JVM)");
- }
-
if (Boolean.parseBoolean(System.getProperty("cassandra.load_ring_state", "true")))
{
logger_.info("Loading persisted ring state");
@@ -2176,4 +2167,39 @@ public class StorageService implements I
{
return efficientCrossDCWrites;
}
+
+ /**
+ * Flushes the two largest memtables by ops and by throughput
+ */
+ public void flushLargestMemtables()
+ {
+ ColumnFamilyStore largestByOps = null;
+ ColumnFamilyStore largestByThroughput = null;
+ for (ColumnFamilyStore cfs : ColumnFamilyStore.all())
+ {
+ if (largestByOps == null || cfs.getMemtableColumnsCount() > largestByOps.getMemtableColumnsCount())
+ largestByOps = cfs;
+ if (largestByThroughput == null || cfs.getMemtableThroughputInMB() > largestByThroughput.getMemtableThroughputInMB())
+ largestByThroughput = cfs;
+ }
+ if (largestByOps == null)
+ {
+ logger_.error("Unable to reduce heap usage since there are no column families defined");
+ return;
+ }
+
+ logger_.warn("Flushing " + largestByOps + " to relieve memory pressure");
+ largestByOps.forceFlush();
+ if (largestByThroughput != largestByOps)
+ {
+ logger_.warn("Flushing " + largestByThroughput + " to relieve memory pressure");
+ largestByThroughput.forceFlush();
+ }
+ }
+
+ public void reduceCacheSizes()
+ {
+ for (ColumnFamilyStore cfs : ColumnFamilyStore.all())
+ cfs.reduceCacheSizes();
+ }
}