You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by ru...@apache.org on 2018/02/08 18:55:24 UTC

[1/2] cassandra git commit: Count deleted rows scanned during reads for tracing and warning tombstone thresholds.

Repository: cassandra
Updated Branches:
  refs/heads/trunk 0f58f6c65 -> b2d20d4bb


Count deleted rows scanned during reads for tracing and warning tombstone thresholds.

If a row is read but is not live anymore (which happens with row level tombstones) it is not counted anywhere
in the metrics nor reported in tracing. Row tombstones themselves are not reported anywhere.
The consequence is that some delete heavy workloads will show no tombstone read but endure severe
performance issues. This commit counts deleted rows as standard tombstone cells.


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/419552a9
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/419552a9
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/419552a9

Branch: refs/heads/trunk
Commit: 419552a93f4e164dfce3cb6fea15328abade5d3f
Parents: 0ac5a17
Author: Alexander Dejanovski <al...@thelastpickle.com>
Authored: Mon Dec 18 18:27:44 2017 +0100
Committer: Jon Haddad <jo...@jonhaddad.com>
Committed: Thu Jan 11 11:24:59 2018 -0800

----------------------------------------------------------------------
 .../org/apache/cassandra/db/ReadCommand.java    |  26 ++-
 .../apache/cassandra/metrics/TableMetrics.java  |   2 +-
 .../apache/cassandra/db/ReadCommandTest.java    | 171 +++++++++++++++++++
 3 files changed, 193 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/419552a9/src/java/org/apache/cassandra/db/ReadCommand.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/ReadCommand.java b/src/java/org/apache/cassandra/db/ReadCommand.java
index 1d74c16..128f8f3 100644
--- a/src/java/org/apache/cassandra/db/ReadCommand.java
+++ b/src/java/org/apache/cassandra/db/ReadCommand.java
@@ -433,14 +433,26 @@ public abstract class ReadCommand extends MonitorableImpl implements ReadQuery
             @Override
             public Row applyToRow(Row row)
             {
-                if (row.hasLiveData(ReadCommand.this.nowInSec(), enforceStrictLiveness))
-                    ++liveRows;
-
+                boolean hasTombstones = false;
                 for (Cell cell : row.cells())
                 {
                     if (!cell.isLive(ReadCommand.this.nowInSec()))
+                    {
                         countTombstone(row.clustering());
+                        hasTombstones = true; // allows to avoid counting an extra tombstone if the whole row expired
+                    }
                 }
+
+                if (row.hasLiveData(ReadCommand.this.nowInSec(), enforceStrictLiveness))
+                    ++liveRows;
+                else if (!row.primaryKeyLivenessInfo().isLive(ReadCommand.this.nowInSec())
+                        && row.hasDeletion(ReadCommand.this.nowInSec())
+                        && !hasTombstones)
+                {
+                    // We're counting primary key deletions only here.
+                    countTombstone(row.clustering());
+                }
+
                 return row;
             }
 
@@ -474,7 +486,9 @@ public abstract class ReadCommand extends MonitorableImpl implements ReadQuery
                 boolean warnTombstones = tombstones > warningThreshold && respectTombstoneThresholds;
                 if (warnTombstones)
                 {
-                    String msg = String.format("Read %d live rows and %d tombstone cells for query %1.512s (see tombstone_warn_threshold)", liveRows, tombstones, ReadCommand.this.toCQLString());
+                    String msg = String.format(
+                            "Read %d live rows and %d tombstone cells for query %1.512s (see tombstone_warn_threshold)",
+                            liveRows, tombstones, ReadCommand.this.toCQLString());
                     ClientWarn.instance.warn(msg);
                     if (tombstones < failureThreshold)
                     {
@@ -484,7 +498,9 @@ public abstract class ReadCommand extends MonitorableImpl implements ReadQuery
                     logger.warn(msg);
                 }
 
-                Tracing.trace("Read {} live and {} tombstone cells{}", liveRows, tombstones, (warnTombstones ? " (see tombstone_warn_threshold)" : ""));
+                Tracing.trace("Read {} live rows and {} tombstone cells{}",
+                        liveRows, tombstones,
+                        (warnTombstones ? " (see tombstone_warn_threshold)" : ""));
             }
         };
 

http://git-wip-us.apache.org/repos/asf/cassandra/blob/419552a9/src/java/org/apache/cassandra/metrics/TableMetrics.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/metrics/TableMetrics.java b/src/java/org/apache/cassandra/metrics/TableMetrics.java
index 5c4a849..7ce2f16 100644
--- a/src/java/org/apache/cassandra/metrics/TableMetrics.java
+++ b/src/java/org/apache/cassandra/metrics/TableMetrics.java
@@ -125,7 +125,7 @@ public class TableMetrics
     public final Gauge<Double> keyCacheHitRate;
     /** Tombstones scanned in queries on this CF */
     public final TableHistogram tombstoneScannedHistogram;
-    /** Live cells scanned in queries on this CF */
+    /** Live rows scanned in queries on this CF */
     public final TableHistogram liveScannedHistogram;
     /** Column update time delta on this CF */
     public final TableHistogram colUpdateTimeDeltaHistogram;

http://git-wip-us.apache.org/repos/asf/cassandra/blob/419552a9/test/unit/org/apache/cassandra/db/ReadCommandTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/db/ReadCommandTest.java b/test/unit/org/apache/cassandra/db/ReadCommandTest.java
index 64ac627..61ce4bd 100644
--- a/test/unit/org/apache/cassandra/db/ReadCommandTest.java
+++ b/test/unit/org/apache/cassandra/db/ReadCommandTest.java
@@ -50,6 +50,8 @@ import org.apache.cassandra.exceptions.ConfigurationException;
 import org.apache.cassandra.io.util.DataInputBuffer;
 import org.apache.cassandra.io.util.DataOutputBuffer;
 import org.apache.cassandra.io.util.WrappedDataOutputStreamPlus;
+import org.apache.cassandra.metrics.TableMetrics;
+import org.apache.cassandra.metrics.TableMetrics.TableHistogram;
 import org.apache.cassandra.net.MessagingService;
 import org.apache.cassandra.schema.KeyspaceParams;
 import org.apache.cassandra.schema.TableMetadata;
@@ -340,4 +342,173 @@ public class ReadCommandTest
             count++;
         }
     }
+
+    @Test
+    public void testCountDeletedRows() throws Exception
+    {
+        ColumnFamilyStore cfs = Keyspace.open(KEYSPACE).getColumnFamilyStore(CF3);
+        // cfs = Mockito.spy(cfs);
+        // TableMetrics metricsMock = Mockito.mock(TableMetrics.class);
+        // Mockito.doReturn(metricsMock).when(cfs).metric;
+
+        String[][][] groups = new String[][][] {
+                new String[][] {
+                        new String[] { "1", "key1", "aa", "a" }, // "1" indicates to create the data, "-1" to delete the
+                                                                 // row
+                        new String[] { "1", "key2", "bb", "b" },
+                        new String[] { "1", "key3", "cc", "c" }
+                },
+                new String[][] {
+                        new String[] { "1", "key3", "dd", "d" },
+                        new String[] { "1", "key2", "ee", "e" },
+                        new String[] { "1", "key1", "ff", "f" }
+                },
+                new String[][] {
+                        new String[] { "1", "key6", "aa", "a" },
+                        new String[] { "1", "key5", "bb", "b" },
+                        new String[] { "1", "key4", "cc", "c" }
+                },
+                new String[][] {
+                        new String[] { "1", "key2", "aa", "a" },
+                        new String[] { "1", "key2", "cc", "c" },
+                        new String[] { "1", "key2", "dd", "d" }
+                },
+                new String[][] {
+                        new String[] { "-1", "key6", "aa", "a" },
+                        new String[] { "-1", "key2", "bb", "b" },
+                        new String[] { "-1", "key2", "ee", "e" },
+                        new String[] { "-1", "key2", "aa", "a" },
+                        new String[] { "-1", "key2", "cc", "c" },
+                        new String[] { "-1", "key2", "dd", "d" }
+                }
+        };
+
+        List<ByteBuffer> buffers = new ArrayList<>(groups.length);
+        int nowInSeconds = FBUtilities.nowInSeconds();
+        ColumnFilter columnFilter = ColumnFilter.allRegularColumnsBuilder(cfs.metadata()).build();
+        RowFilter rowFilter = RowFilter.create();
+        Slice slice = Slice.make(ClusteringBound.BOTTOM, ClusteringBound.TOP);
+        ClusteringIndexSliceFilter sliceFilter = new ClusteringIndexSliceFilter(
+                Slices.with(cfs.metadata().comparator, slice), false);
+
+        for (String[][] group : groups)
+        {
+            cfs.truncateBlocking();
+
+            List<SinglePartitionReadCommand> commands = new ArrayList<>(group.length);
+
+            for (String[] data : group)
+            {
+                if (data[0].equals("1"))
+                {
+                    new RowUpdateBuilder(cfs.metadata(), 0, ByteBufferUtil.bytes(data[1]))
+                            .clustering(data[2])
+                            .add(data[3], ByteBufferUtil.bytes("blah"))
+                            .build()
+                            .apply();
+                }
+                else
+                {
+                    RowUpdateBuilder.deleteRow(cfs.metadata(), FBUtilities.timestampMicros(),
+                            ByteBufferUtil.bytes(data[1]), data[2]).apply();
+                }
+                commands.add(SinglePartitionReadCommand.create(cfs.metadata(), nowInSeconds, columnFilter, rowFilter,
+                        DataLimits.NONE, Util.dk(data[1]), sliceFilter));
+            }
+
+            cfs.forceBlockingFlush();
+
+            ReadQuery query = new SinglePartitionReadCommand.Group(commands, DataLimits.NONE);
+
+            try (ReadExecutionController executionController = query.executionController();
+                    UnfilteredPartitionIterator iter = query.executeLocally(executionController);
+                    DataOutputBuffer buffer = new DataOutputBuffer())
+            {
+                UnfilteredPartitionIterators.serializerForIntraNode().serialize(iter,
+                        columnFilter,
+                        buffer,
+                        MessagingService.current_version);
+                buffers.add(buffer.buffer());
+            }
+        }
+
+        assertEquals(5, cfs.metric.tombstoneScannedHistogram.cf.getSnapshot().getMax());
+    }
+
+    @Test
+    public void testCountWithNoDeletedRow() throws Exception
+    {
+        ColumnFamilyStore cfs = Keyspace.open(KEYSPACE).getColumnFamilyStore(CF3);
+
+        String[][][] groups = new String[][][] {
+                new String[][] {
+                        new String[] { "1", "key1", "aa", "a" }, // "1" indicates to create the data, "-1" to delete the
+                                                                 // row
+                        new String[] { "1", "key2", "bb", "b" },
+                        new String[] { "1", "key3", "cc", "c" }
+                },
+                new String[][] {
+                        new String[] { "1", "key3", "dd", "d" },
+                        new String[] { "1", "key2", "ee", "e" },
+                        new String[] { "1", "key1", "ff", "f" }
+                },
+                new String[][] {
+                        new String[] { "1", "key6", "aa", "a" },
+                        new String[] { "1", "key5", "bb", "b" },
+                        new String[] { "1", "key4", "cc", "c" }
+                }
+        };
+
+        List<ByteBuffer> buffers = new ArrayList<>(groups.length);
+        int nowInSeconds = FBUtilities.nowInSeconds();
+        ColumnFilter columnFilter = ColumnFilter.allRegularColumnsBuilder(cfs.metadata()).build();
+        RowFilter rowFilter = RowFilter.create();
+        Slice slice = Slice.make(ClusteringBound.BOTTOM, ClusteringBound.TOP);
+        ClusteringIndexSliceFilter sliceFilter = new ClusteringIndexSliceFilter(
+                Slices.with(cfs.metadata().comparator, slice), false);
+
+        for (String[][] group : groups)
+        {
+            cfs.truncateBlocking();
+
+            List<SinglePartitionReadCommand> commands = new ArrayList<>(group.length);
+
+            for (String[] data : group)
+            {
+                if (data[0].equals("1"))
+                {
+                    new RowUpdateBuilder(cfs.metadata(), 0, ByteBufferUtil.bytes(data[1]))
+                            .clustering(data[2])
+                            .add(data[3], ByteBufferUtil.bytes("blah"))
+                            .build()
+                            .apply();
+                }
+                else
+                {
+                    RowUpdateBuilder.deleteRow(cfs.metadata(), FBUtilities.timestampMicros(),
+                            ByteBufferUtil.bytes(data[1]), data[2]).apply();
+                }
+                commands.add(SinglePartitionReadCommand.create(cfs.metadata(), nowInSeconds, columnFilter, rowFilter,
+                        DataLimits.NONE, Util.dk(data[1]), sliceFilter));
+            }
+
+            cfs.forceBlockingFlush();
+
+            ReadQuery query = new SinglePartitionReadCommand.Group(commands, DataLimits.NONE);
+
+            try (ReadExecutionController executionController = query.executionController();
+                    UnfilteredPartitionIterator iter = query.executeLocally(executionController);
+                    DataOutputBuffer buffer = new DataOutputBuffer())
+            {
+                UnfilteredPartitionIterators.serializerForIntraNode().serialize(iter,
+                        columnFilter,
+                        buffer,
+                        MessagingService.current_version);
+                buffers.add(buffer.buffer());
+            }
+        }
+
+        assertEquals(1, cfs.metric.tombstoneScannedHistogram.cf.getSnapshot().getMax());
+    }
+
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@cassandra.apache.org
For additional commands, e-mail: commits-help@cassandra.apache.org


[2/2] cassandra git commit: Count deleted rows scanned during reads for tracing and warning tombstone thresholds.

Posted by ru...@apache.org.
Count deleted rows scanned during reads for tracing and warning tombstone thresholds.

If a row is read but is not live anymore (which happens with row level tombstones) it is not counted anywhere
in the metrics nor reported in tracing. Row tombstones themselves are not reported anywhere.
The consequence is that some delete heavy workloads will show no tombstone read but endure severe
performance issues. This commit counts deleted rows as standard tombstone cells.

Patch by Alexander Dejanovski; Reviewed by Jon Haddad for CASSANDRA-8527


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/b2d20d4b
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/b2d20d4b
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/b2d20d4b

Branch: refs/heads/trunk
Commit: b2d20d4bb10e73935a97d6fbd848e4cb649c105c
Parents: 0f58f6c 419552a
Author: Jon Haddad <jo...@jonhaddad.com>
Authored: Thu Feb 8 10:50:14 2018 -0800
Committer: Jon Haddad <jo...@jonhaddad.com>
Committed: Thu Feb 8 10:50:14 2018 -0800

----------------------------------------------------------------------
 CHANGES.txt                                     |   1 +
 .../org/apache/cassandra/db/ReadCommand.java    |  26 ++-
 .../apache/cassandra/metrics/TableMetrics.java  |   2 +-
 .../apache/cassandra/db/ReadCommandTest.java    | 169 +++++++++++++++++++
 4 files changed, 192 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/b2d20d4b/CHANGES.txt
----------------------------------------------------------------------
diff --cc CHANGES.txt
index 191caaf,bc2645f..c7c1b6f
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@@ -186,6 -174,6 +186,7 @@@
   * Switch to client init for sstabledump (CASSANDRA-13683)
   * CQLSH: Don't pause when capturing data (CASSANDRA-13743)
   * nodetool clearsnapshot requires --all to clear all snapshots (CASSANDRA-13391)
++ * Correctly count range tombstones in traces and tombstone thresholds (CASSANDRA-8527)
  
  
  3.11.2

http://git-wip-us.apache.org/repos/asf/cassandra/blob/b2d20d4b/test/unit/org/apache/cassandra/db/ReadCommandTest.java
----------------------------------------------------------------------


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@cassandra.apache.org
For additional commands, e-mail: commits-help@cassandra.apache.org