You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by jb...@apache.org on 2013/10/01 21:21:01 UTC
[2/3] git commit: warn, then drop queries exceeding a configurable
number of tombstones patch by jbellis;
reviewed by slebresne for CASSANDRA-6117
warn, then drop queries exceeding a configurable number of tombstones
patch by jbellis; reviewed by slebresne for CASSANDRA-6117
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/dc7221f0
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/dc7221f0
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/dc7221f0
Branch: refs/heads/trunk
Commit: dc7221f0bfd44dfb70d0f9256c9cb1fe91462c50
Parents: 8f88670
Author: Jonathan Ellis <jb...@apache.org>
Authored: Tue Oct 1 14:20:19 2013 -0500
Committer: Jonathan Ellis <jb...@apache.org>
Committed: Tue Oct 1 14:20:44 2013 -0500
----------------------------------------------------------------------
CHANGES.txt | 1 +
NEWS.txt | 8 +++++++
conf/cassandra.yaml | 15 +++++++++----
.../org/apache/cassandra/config/Config.java | 3 ++-
.../cassandra/config/DatabaseDescriptor.java | 22 +++++++++++++-------
.../apache/cassandra/db/ReadVerbHandler.java | 12 ++++++++++-
.../db/columniterator/IdentityQueryFilter.java | 6 ++++++
.../cassandra/db/filter/SliceQueryFilter.java | 16 ++++++++++++--
.../filter/TombstoneOverwhelmingException.java | 5 +++++
.../service/RangeSliceVerbHandler.java | 5 +++++
.../cassandra/service/StorageService.java | 18 ++++++++++++----
.../cassandra/service/StorageServiceMBean.java | 13 ++++++++----
12 files changed, 100 insertions(+), 24 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/cassandra/blob/dc7221f0/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 128d7c4..9bd01ef 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
2.0.2
+ * drop queries exceeding a configurable number of tombstones (CASSANDRA-6117)
* Track and persist sstable read activity (CASSANDRA-5515)
* Fixes for speculative retry (CASSANDRA-5932)
* Improve memory usage of metadata min/max column names (CASSANDRA-6077)
http://git-wip-us.apache.org/repos/asf/cassandra/blob/dc7221f0/NEWS.txt
----------------------------------------------------------------------
diff --git a/NEWS.txt b/NEWS.txt
index 1f76277..747234f 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -22,6 +22,14 @@ New features
- Speculative retry defaults to 99th percentile
(See blog post at TODO)
+Upgrading
+---------
+ - tombstone_debug_threshold from 1.2.11 has been changed to
+ tombstone_warn_threshold and tombstone_failure_threshold.
+ Adjust these if your application relies on scanning a large
+ number of tombstones; see the comments in cassandra.yaml for why
+ this is dangerous.
+
2.0.1
=====
http://git-wip-us.apache.org/repos/asf/cassandra/blob/dc7221f0/conf/cassandra.yaml
----------------------------------------------------------------------
diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml
index e0f2e1e..5a0dfb6 100644
--- a/conf/cassandra.yaml
+++ b/conf/cassandra.yaml
@@ -404,10 +404,17 @@ snapshot_before_compaction: false
# lose data on truncation or drop.
auto_snapshot: true
-# Log a debug message if more than this many tombstones are scanned
-# in a single-partition query. Set the threshold on SliceQueryFilter
-# to debug to enable.
-tombstone_debug_threshold: 10000
+# When executing a scan, within or across a partition, we need to keep the
+# tombstones seen in memory so we can return them to the coordinator, which
+# will use them to make sure other replicas also know about the deleted rows.
+# With workloads that generate a lot of tombstones, this can cause performance
+# problems and even exaust the server heap.
+# (http://www.datastax.com/dev/blog/cassandra-anti-patterns-queues-and-queue-like-datasets)
+# Adjust the thresholds here if you understand the dangers and want to
+# scan more tombstones anyway. These thresholds may also be adjusted at runtime
+# using the StorageService mbean.
+tombstone_warn_threshold: 1000
+tombstone_failure_threshold: 100000
# Add column indexes to a row after its contents reach this size.
# Increase if your column values are large, or if you have a very large
http://git-wip-us.apache.org/repos/asf/cassandra/blob/dc7221f0/src/java/org/apache/cassandra/config/Config.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/config/Config.java b/src/java/org/apache/cassandra/config/Config.java
index 131c9ef..8f0f22e 100644
--- a/src/java/org/apache/cassandra/config/Config.java
+++ b/src/java/org/apache/cassandra/config/Config.java
@@ -179,7 +179,8 @@ public class Config
private static boolean outboundBindAny = false;
- public volatile int tombstone_debug_threshold = 10000;
+ public volatile int tombstone_warn_threshold = 1000;
+ public volatile int tombstone_failure_threshold = 100000;
public static boolean getOutboundBindAny()
{
http://git-wip-us.apache.org/repos/asf/cassandra/blob/dc7221f0/src/java/org/apache/cassandra/config/DatabaseDescriptor.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java
index 14b932a..81c7308 100644
--- a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java
+++ b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java
@@ -890,18 +890,24 @@ public class DatabaseDescriptor
return conf.commitlog_directory;
}
- /**
- * How many tombstones need to be scanned before we log a
- * debug message
- */
- public static int getTombstoneDebugThreshold()
+ public static int getTombstoneWarnThreshold()
+ {
+ return conf.tombstone_warn_threshold;
+ }
+
+ public static void setTombstoneWarnThreshold(int threshold)
+ {
+ conf.tombstone_warn_threshold = threshold;
+ }
+
+ public static int getTombstoneFailureThreshold()
{
- return conf.tombstone_debug_threshold;
+ return conf.tombstone_failure_threshold;
}
- public static void setTombstoneDebugThreshold(int tombstoneDebugThreshold)
+ public static void setTombstoneFailureThreshold(int threshold)
{
- conf.tombstone_debug_threshold = tombstoneDebugThreshold;
+ conf.tombstone_failure_threshold = threshold;
}
/**
http://git-wip-us.apache.org/repos/asf/cassandra/blob/dc7221f0/src/java/org/apache/cassandra/db/ReadVerbHandler.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/ReadVerbHandler.java b/src/java/org/apache/cassandra/db/ReadVerbHandler.java
index e811cc1..35082e6 100644
--- a/src/java/org/apache/cassandra/db/ReadVerbHandler.java
+++ b/src/java/org/apache/cassandra/db/ReadVerbHandler.java
@@ -20,6 +20,7 @@ package org.apache.cassandra.db;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.apache.cassandra.db.filter.TombstoneOverwhelmingException;
import org.apache.cassandra.net.IVerbHandler;
import org.apache.cassandra.net.MessageIn;
import org.apache.cassandra.net.MessageOut;
@@ -40,7 +41,16 @@ public class ReadVerbHandler implements IVerbHandler<ReadCommand>
ReadCommand command = message.payload;
Keyspace keyspace = Keyspace.open(command.ksName);
- Row row = command.getRow(keyspace);
+ Row row;
+ try
+ {
+ row = command.getRow(keyspace);
+ }
+ catch (TombstoneOverwhelmingException e)
+ {
+ // error already logged. Drop the request
+ return;
+ }
MessageOut<ReadResponse> reply = new MessageOut<ReadResponse>(MessagingService.Verb.REQUEST_RESPONSE,
getResponse(command, row),
http://git-wip-us.apache.org/repos/asf/cassandra/blob/dc7221f0/src/java/org/apache/cassandra/db/columniterator/IdentityQueryFilter.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/columniterator/IdentityQueryFilter.java b/src/java/org/apache/cassandra/db/columniterator/IdentityQueryFilter.java
index efad926..7b1085a 100644
--- a/src/java/org/apache/cassandra/db/columniterator/IdentityQueryFilter.java
+++ b/src/java/org/apache/cassandra/db/columniterator/IdentityQueryFilter.java
@@ -29,4 +29,10 @@ public class IdentityQueryFilter extends SliceQueryFilter
{
super(ByteBufferUtil.EMPTY_BYTE_BUFFER, ByteBufferUtil.EMPTY_BYTE_BUFFER, false, Integer.MAX_VALUE);
}
+
+ @Override
+ protected boolean respectTombstoneFailures()
+ {
+ return false;
+ }
}
http://git-wip-us.apache.org/repos/asf/cassandra/blob/dc7221f0/src/java/org/apache/cassandra/db/filter/SliceQueryFilter.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/filter/SliceQueryFilter.java b/src/java/org/apache/cassandra/db/filter/SliceQueryFilter.java
index 443ff8e..9c67d71 100644
--- a/src/java/org/apache/cassandra/db/filter/SliceQueryFilter.java
+++ b/src/java/org/apache/cassandra/db/filter/SliceQueryFilter.java
@@ -194,12 +194,24 @@ public class SliceQueryFilter implements IDiskAtomFilter
if (columnCounter.live() > count)
break;
+ if (respectTombstoneFailures() && columnCounter.ignored() > DatabaseDescriptor.getTombstoneFailureThreshold())
+ {
+ Tracing.trace("Scanned over {} tombstones; query aborted (see tombstone_fail_threshold)", DatabaseDescriptor.getTombstoneFailureThreshold());
+ logger.error("Scanned over {} tombstones; query aborted (see tombstone_fail_threshold)", DatabaseDescriptor.getTombstoneFailureThreshold());
+ throw new TombstoneOverwhelmingException();
+ }
+
container.addIfRelevant(column, tester, gcBefore);
}
Tracing.trace("Read {} live and {} tombstoned cells", columnCounter.live(), columnCounter.ignored());
- if (columnCounter.ignored() > DatabaseDescriptor.getTombstoneDebugThreshold())
- logger.debug("Read {} live and {} tombstoned cells", columnCounter.live(), columnCounter.ignored());
+ if (columnCounter.ignored() > DatabaseDescriptor.getTombstoneWarnThreshold())
+ logger.warn("Read {} live and {} tombstoned cells (see tombstone_warn_threshold)", columnCounter.live(), columnCounter.ignored());
+ }
+
+ protected boolean respectTombstoneFailures()
+ {
+ return true;
}
public int getLiveCount(ColumnFamily cf, long now)
http://git-wip-us.apache.org/repos/asf/cassandra/blob/dc7221f0/src/java/org/apache/cassandra/db/filter/TombstoneOverwhelmingException.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/filter/TombstoneOverwhelmingException.java b/src/java/org/apache/cassandra/db/filter/TombstoneOverwhelmingException.java
new file mode 100644
index 0000000..5975463
--- /dev/null
+++ b/src/java/org/apache/cassandra/db/filter/TombstoneOverwhelmingException.java
@@ -0,0 +1,5 @@
+package org.apache.cassandra.db.filter;
+
+public class TombstoneOverwhelmingException extends RuntimeException
+{
+}
http://git-wip-us.apache.org/repos/asf/cassandra/blob/dc7221f0/src/java/org/apache/cassandra/service/RangeSliceVerbHandler.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/service/RangeSliceVerbHandler.java b/src/java/org/apache/cassandra/service/RangeSliceVerbHandler.java
index 22f904c..f1fd1f9 100644
--- a/src/java/org/apache/cassandra/service/RangeSliceVerbHandler.java
+++ b/src/java/org/apache/cassandra/service/RangeSliceVerbHandler.java
@@ -19,6 +19,7 @@ package org.apache.cassandra.service;
import org.apache.cassandra.db.AbstractRangeCommand;
import org.apache.cassandra.db.RangeSliceReply;
+import org.apache.cassandra.db.filter.TombstoneOverwhelmingException;
import org.apache.cassandra.net.IVerbHandler;
import org.apache.cassandra.net.MessageIn;
import org.apache.cassandra.net.MessagingService;
@@ -39,6 +40,10 @@ public class RangeSliceVerbHandler implements IVerbHandler<AbstractRangeCommand>
Tracing.trace("Enqueuing response to {}", message.from);
MessagingService.instance().sendReply(reply.createMessage(), id, message.from);
}
+ catch (TombstoneOverwhelmingException e)
+ {
+ // error already logged. Drop the request
+ }
catch (Exception ex)
{
throw new RuntimeException(ex);
http://git-wip-us.apache.org/repos/asf/cassandra/blob/dc7221f0/src/java/org/apache/cassandra/service/StorageService.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/service/StorageService.java b/src/java/org/apache/cassandra/service/StorageService.java
index 0f1c35c..d28fcb5 100644
--- a/src/java/org/apache/cassandra/service/StorageService.java
+++ b/src/java/org/apache/cassandra/service/StorageService.java
@@ -3654,13 +3654,23 @@ public class StorageService extends NotificationBroadcasterSupport implements IE
return DatabaseDescriptor.getPartitionerName();
}
- public int getTombstoneDebugThreshold()
+ public int getTombstoneWarnThreshold()
{
- return DatabaseDescriptor.getTombstoneDebugThreshold();
+ return DatabaseDescriptor.getTombstoneWarnThreshold();
}
- public void setTombstoneDebugThreshold(int tombstoneDebugThreshold)
+ public void setTombstoneWarnThreshold(int threshold)
{
- DatabaseDescriptor.setTombstoneDebugThreshold(tombstoneDebugThreshold);
+ DatabaseDescriptor.setTombstoneWarnThreshold(threshold);
+ }
+
+ public int getTombstoneFailureThreshold()
+ {
+ return DatabaseDescriptor.getTombstoneFailureThreshold();
+ }
+
+ public void setTombstoneFailureThreshold(int threshold)
+ {
+ DatabaseDescriptor.setTombstoneFailureThreshold(threshold);
}
}
http://git-wip-us.apache.org/repos/asf/cassandra/blob/dc7221f0/src/java/org/apache/cassandra/service/StorageServiceMBean.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/service/StorageServiceMBean.java b/src/java/org/apache/cassandra/service/StorageServiceMBean.java
index 73e4124..2dd8b00 100644
--- a/src/java/org/apache/cassandra/service/StorageServiceMBean.java
+++ b/src/java/org/apache/cassandra/service/StorageServiceMBean.java
@@ -477,8 +477,13 @@ public interface StorageServiceMBean extends NotificationEmitter
/** Returns the cluster partitioner */
public String getPartitionerName();
- /** Returns the threshold for returning debugging queries with many tombstones */
- public int getTombstoneDebugThreshold();
- /** Sets the threshold for returning debugging queries with many tombstones */
- public void setTombstoneDebugThreshold(int tombstoneDebugThreshold);
+ /** Returns the threshold for warning of queries with many tombstones */
+ public int getTombstoneWarnThreshold();
+ /** Sets the threshold for warning queries with many tombstones */
+ public void setTombstoneWarnThreshold(int tombstoneDebugThreshold);
+
+ /** Returns the threshold for abandoning queries with many tombstones */
+ public int getTombstoneFailureThreshold();
+ /** Sets the threshold for abandoning queries with many tombstones */
+ public void setTombstoneFailureThreshold(int tombstoneDebugThreshold);
}