You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by jb...@apache.org on 2013/10/01 21:21:01 UTC

[2/3] git commit: warn, then drop queries exceeding a configurable number of tombstones patch by jbellis; reviewed by slebresne for CASSANDRA-6117

warn, then drop queries exceeding a configurable number of tombstones
patch by jbellis; reviewed by slebresne for CASSANDRA-6117


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/dc7221f0
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/dc7221f0
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/dc7221f0

Branch: refs/heads/trunk
Commit: dc7221f0bfd44dfb70d0f9256c9cb1fe91462c50
Parents: 8f88670
Author: Jonathan Ellis <jb...@apache.org>
Authored: Tue Oct 1 14:20:19 2013 -0500
Committer: Jonathan Ellis <jb...@apache.org>
Committed: Tue Oct 1 14:20:44 2013 -0500

----------------------------------------------------------------------
 CHANGES.txt                                     |  1 +
 NEWS.txt                                        |  8 +++++++
 conf/cassandra.yaml                             | 15 +++++++++----
 .../org/apache/cassandra/config/Config.java     |  3 ++-
 .../cassandra/config/DatabaseDescriptor.java    | 22 +++++++++++++-------
 .../apache/cassandra/db/ReadVerbHandler.java    | 12 ++++++++++-
 .../db/columniterator/IdentityQueryFilter.java  |  6 ++++++
 .../cassandra/db/filter/SliceQueryFilter.java   | 16 ++++++++++++--
 .../filter/TombstoneOverwhelmingException.java  |  5 +++++
 .../service/RangeSliceVerbHandler.java          |  5 +++++
 .../cassandra/service/StorageService.java       | 18 ++++++++++++----
 .../cassandra/service/StorageServiceMBean.java  | 13 ++++++++----
 12 files changed, 100 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/dc7221f0/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 128d7c4..9bd01ef 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 2.0.2
+ * drop queries exceeding a configurable number of tombstones (CASSANDRA-6117)
  * Track and persist sstable read activity (CASSANDRA-5515)
  * Fixes for speculative retry (CASSANDRA-5932)
  * Improve memory usage of metadata min/max column names (CASSANDRA-6077)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/dc7221f0/NEWS.txt
----------------------------------------------------------------------
diff --git a/NEWS.txt b/NEWS.txt
index 1f76277..747234f 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -22,6 +22,14 @@ New features
     - Speculative retry defaults to 99th percentile
       (See blog post at TODO)
 
+Upgrading
+---------
+    - tombstone_debug_threshold from 1.2.11 has been changed to
+      tombstone_warn_threshold and tombstone_failure_threshold.
+      Adjust these if your application relies on scanning a large
+      number of tombstones; see the comments in cassandra.yaml for why
+      this is dangerous.
+
 
 2.0.1
 =====

http://git-wip-us.apache.org/repos/asf/cassandra/blob/dc7221f0/conf/cassandra.yaml
----------------------------------------------------------------------
diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml
index e0f2e1e..5a0dfb6 100644
--- a/conf/cassandra.yaml
+++ b/conf/cassandra.yaml
@@ -404,10 +404,17 @@ snapshot_before_compaction: false
 # lose data on truncation or drop.
 auto_snapshot: true
 
-# Log a debug message if more than this many tombstones are scanned
-# in a single-partition query.  Set the threshold on SliceQueryFilter
-# to debug to enable.
-tombstone_debug_threshold: 10000
+# When executing a scan, within or across a partition, we need to keep the
+# tombstones seen in memory so we can return them to the coordinator, which
+# will use them to make sure other replicas also know about the deleted rows.
+# With workloads that generate a lot of tombstones, this can cause performance
+# problems and even exaust the server heap.
+# (http://www.datastax.com/dev/blog/cassandra-anti-patterns-queues-and-queue-like-datasets)
+# Adjust the thresholds here if you understand the dangers and want to
+# scan more tombstones anyway.  These thresholds may also be adjusted at runtime
+# using the StorageService mbean.
+tombstone_warn_threshold: 1000
+tombstone_failure_threshold: 100000
 
 # Add column indexes to a row after its contents reach this size.
 # Increase if your column values are large, or if you have a very large

http://git-wip-us.apache.org/repos/asf/cassandra/blob/dc7221f0/src/java/org/apache/cassandra/config/Config.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/config/Config.java b/src/java/org/apache/cassandra/config/Config.java
index 131c9ef..8f0f22e 100644
--- a/src/java/org/apache/cassandra/config/Config.java
+++ b/src/java/org/apache/cassandra/config/Config.java
@@ -179,7 +179,8 @@ public class Config
 
     private static boolean outboundBindAny = false;
 
-    public volatile int tombstone_debug_threshold = 10000;
+    public volatile int tombstone_warn_threshold = 1000;
+    public volatile int tombstone_failure_threshold = 100000;
 
     public static boolean getOutboundBindAny()
     {

http://git-wip-us.apache.org/repos/asf/cassandra/blob/dc7221f0/src/java/org/apache/cassandra/config/DatabaseDescriptor.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java
index 14b932a..81c7308 100644
--- a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java
+++ b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java
@@ -890,18 +890,24 @@ public class DatabaseDescriptor
         return conf.commitlog_directory;
     }
 
-    /**
-     * How many tombstones need to be scanned before we log a
-     * debug message
-     */
-    public static int getTombstoneDebugThreshold()
+    public static int getTombstoneWarnThreshold()
+    {
+        return conf.tombstone_warn_threshold;
+    }
+
+    public static void setTombstoneWarnThreshold(int threshold)
+    {
+        conf.tombstone_warn_threshold = threshold;
+    }
+
+    public static int getTombstoneFailureThreshold()
     {
-        return conf.tombstone_debug_threshold;
+        return conf.tombstone_failure_threshold;
     }
 
-    public static void setTombstoneDebugThreshold(int tombstoneDebugThreshold)
+    public static void setTombstoneFailureThreshold(int threshold)
     {
-        conf.tombstone_debug_threshold = tombstoneDebugThreshold;
+        conf.tombstone_failure_threshold = threshold;
     }
 
     /**

http://git-wip-us.apache.org/repos/asf/cassandra/blob/dc7221f0/src/java/org/apache/cassandra/db/ReadVerbHandler.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/ReadVerbHandler.java b/src/java/org/apache/cassandra/db/ReadVerbHandler.java
index e811cc1..35082e6 100644
--- a/src/java/org/apache/cassandra/db/ReadVerbHandler.java
+++ b/src/java/org/apache/cassandra/db/ReadVerbHandler.java
@@ -20,6 +20,7 @@ package org.apache.cassandra.db;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import org.apache.cassandra.db.filter.TombstoneOverwhelmingException;
 import org.apache.cassandra.net.IVerbHandler;
 import org.apache.cassandra.net.MessageIn;
 import org.apache.cassandra.net.MessageOut;
@@ -40,7 +41,16 @@ public class ReadVerbHandler implements IVerbHandler<ReadCommand>
 
         ReadCommand command = message.payload;
         Keyspace keyspace = Keyspace.open(command.ksName);
-        Row row = command.getRow(keyspace);
+        Row row;
+        try
+        {
+            row = command.getRow(keyspace);
+        }
+        catch (TombstoneOverwhelmingException e)
+        {
+            // error already logged.  Drop the request
+            return;
+        }
 
         MessageOut<ReadResponse> reply = new MessageOut<ReadResponse>(MessagingService.Verb.REQUEST_RESPONSE,
                                                                       getResponse(command, row),

http://git-wip-us.apache.org/repos/asf/cassandra/blob/dc7221f0/src/java/org/apache/cassandra/db/columniterator/IdentityQueryFilter.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/columniterator/IdentityQueryFilter.java b/src/java/org/apache/cassandra/db/columniterator/IdentityQueryFilter.java
index efad926..7b1085a 100644
--- a/src/java/org/apache/cassandra/db/columniterator/IdentityQueryFilter.java
+++ b/src/java/org/apache/cassandra/db/columniterator/IdentityQueryFilter.java
@@ -29,4 +29,10 @@ public class IdentityQueryFilter extends SliceQueryFilter
     {
         super(ByteBufferUtil.EMPTY_BYTE_BUFFER, ByteBufferUtil.EMPTY_BYTE_BUFFER, false, Integer.MAX_VALUE);
     }
+
+    @Override
+    protected boolean respectTombstoneFailures()
+    {
+        return false;
+    }
 }

http://git-wip-us.apache.org/repos/asf/cassandra/blob/dc7221f0/src/java/org/apache/cassandra/db/filter/SliceQueryFilter.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/filter/SliceQueryFilter.java b/src/java/org/apache/cassandra/db/filter/SliceQueryFilter.java
index 443ff8e..9c67d71 100644
--- a/src/java/org/apache/cassandra/db/filter/SliceQueryFilter.java
+++ b/src/java/org/apache/cassandra/db/filter/SliceQueryFilter.java
@@ -194,12 +194,24 @@ public class SliceQueryFilter implements IDiskAtomFilter
             if (columnCounter.live() > count)
                 break;
 
+            if (respectTombstoneFailures() && columnCounter.ignored() > DatabaseDescriptor.getTombstoneFailureThreshold())
+            {
+                Tracing.trace("Scanned over {} tombstones; query aborted (see tombstone_fail_threshold)", DatabaseDescriptor.getTombstoneFailureThreshold());
+                logger.error("Scanned over {} tombstones; query aborted (see tombstone_fail_threshold)", DatabaseDescriptor.getTombstoneFailureThreshold());
+                throw new TombstoneOverwhelmingException();
+            }
+
             container.addIfRelevant(column, tester, gcBefore);
         }
 
         Tracing.trace("Read {} live and {} tombstoned cells", columnCounter.live(), columnCounter.ignored());
-        if (columnCounter.ignored() > DatabaseDescriptor.getTombstoneDebugThreshold())
-            logger.debug("Read {} live and {} tombstoned cells", columnCounter.live(), columnCounter.ignored());
+        if (columnCounter.ignored() > DatabaseDescriptor.getTombstoneWarnThreshold())
+            logger.warn("Read {} live and {} tombstoned cells (see tombstone_warn_threshold)", columnCounter.live(), columnCounter.ignored());
+    }
+
+    protected boolean respectTombstoneFailures()
+    {
+        return true;
     }
 
     public int getLiveCount(ColumnFamily cf, long now)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/dc7221f0/src/java/org/apache/cassandra/db/filter/TombstoneOverwhelmingException.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/filter/TombstoneOverwhelmingException.java b/src/java/org/apache/cassandra/db/filter/TombstoneOverwhelmingException.java
new file mode 100644
index 0000000..5975463
--- /dev/null
+++ b/src/java/org/apache/cassandra/db/filter/TombstoneOverwhelmingException.java
@@ -0,0 +1,5 @@
+package org.apache.cassandra.db.filter;
+
+public class TombstoneOverwhelmingException extends RuntimeException
+{
+}

http://git-wip-us.apache.org/repos/asf/cassandra/blob/dc7221f0/src/java/org/apache/cassandra/service/RangeSliceVerbHandler.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/service/RangeSliceVerbHandler.java b/src/java/org/apache/cassandra/service/RangeSliceVerbHandler.java
index 22f904c..f1fd1f9 100644
--- a/src/java/org/apache/cassandra/service/RangeSliceVerbHandler.java
+++ b/src/java/org/apache/cassandra/service/RangeSliceVerbHandler.java
@@ -19,6 +19,7 @@ package org.apache.cassandra.service;
 
 import org.apache.cassandra.db.AbstractRangeCommand;
 import org.apache.cassandra.db.RangeSliceReply;
+import org.apache.cassandra.db.filter.TombstoneOverwhelmingException;
 import org.apache.cassandra.net.IVerbHandler;
 import org.apache.cassandra.net.MessageIn;
 import org.apache.cassandra.net.MessagingService;
@@ -39,6 +40,10 @@ public class RangeSliceVerbHandler implements IVerbHandler<AbstractRangeCommand>
             Tracing.trace("Enqueuing response to {}", message.from);
             MessagingService.instance().sendReply(reply.createMessage(), id, message.from);
         }
+        catch (TombstoneOverwhelmingException e)
+        {
+            // error already logged.  Drop the request
+        }
         catch (Exception ex)
         {
             throw new RuntimeException(ex);

http://git-wip-us.apache.org/repos/asf/cassandra/blob/dc7221f0/src/java/org/apache/cassandra/service/StorageService.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/service/StorageService.java b/src/java/org/apache/cassandra/service/StorageService.java
index 0f1c35c..d28fcb5 100644
--- a/src/java/org/apache/cassandra/service/StorageService.java
+++ b/src/java/org/apache/cassandra/service/StorageService.java
@@ -3654,13 +3654,23 @@ public class StorageService extends NotificationBroadcasterSupport implements IE
         return DatabaseDescriptor.getPartitionerName();
     }
 
-    public int getTombstoneDebugThreshold()
+    public int getTombstoneWarnThreshold()
     {
-        return DatabaseDescriptor.getTombstoneDebugThreshold();
+        return DatabaseDescriptor.getTombstoneWarnThreshold();
     }
 
-    public void setTombstoneDebugThreshold(int tombstoneDebugThreshold)
+    public void setTombstoneWarnThreshold(int threshold)
     {
-        DatabaseDescriptor.setTombstoneDebugThreshold(tombstoneDebugThreshold);
+        DatabaseDescriptor.setTombstoneWarnThreshold(threshold);
+    }
+
+    public int getTombstoneFailureThreshold()
+    {
+        return DatabaseDescriptor.getTombstoneFailureThreshold();
+    }
+
+    public void setTombstoneFailureThreshold(int threshold)
+    {
+        DatabaseDescriptor.setTombstoneFailureThreshold(threshold);
     }
 }

http://git-wip-us.apache.org/repos/asf/cassandra/blob/dc7221f0/src/java/org/apache/cassandra/service/StorageServiceMBean.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/service/StorageServiceMBean.java b/src/java/org/apache/cassandra/service/StorageServiceMBean.java
index 73e4124..2dd8b00 100644
--- a/src/java/org/apache/cassandra/service/StorageServiceMBean.java
+++ b/src/java/org/apache/cassandra/service/StorageServiceMBean.java
@@ -477,8 +477,13 @@ public interface StorageServiceMBean extends NotificationEmitter
     /** Returns the cluster partitioner */
     public String getPartitionerName();
 
-    /** Returns the threshold for returning debugging queries with many tombstones */
-    public int getTombstoneDebugThreshold();
-    /** Sets the threshold for returning debugging queries with many tombstones */
-    public void setTombstoneDebugThreshold(int tombstoneDebugThreshold);
+    /** Returns the threshold for warning of queries with many tombstones */
+    public int getTombstoneWarnThreshold();
+    /** Sets the threshold for warning queries with many tombstones */
+    public void setTombstoneWarnThreshold(int tombstoneDebugThreshold);
+
+    /** Returns the threshold for abandoning queries with many tombstones */
+    public int getTombstoneFailureThreshold();
+    /** Sets the threshold for abandoning queries with many tombstones */
+    public void setTombstoneFailureThreshold(int tombstoneDebugThreshold);
 }