You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by sl...@apache.org on 2015/01/14 11:12:13 UTC
cassandra git commit: Fix potentially returning deleted row with
range tombstones
Repository: cassandra
Updated Branches:
refs/heads/cassandra-2.0 c5ccdb766 -> 2424dd133
Fix potentially returning deleted row with range tombstones
patch by slebresne; reviewed by thobbs for CASSANDRA-8558
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/2424dd13
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/2424dd13
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/2424dd13
Branch: refs/heads/cassandra-2.0
Commit: 2424dd133bd971de4b19e5030619d7be79e261e0
Parents: c5ccdb7
Author: Sylvain Lebresne <sy...@datastax.com>
Authored: Wed Jan 14 11:07:22 2015 +0100
Committer: Sylvain Lebresne <sy...@datastax.com>
Committed: Wed Jan 14 11:11:24 2015 +0100
----------------------------------------------------------------------
CHANGES.txt | 1 +
.../db/columniterator/IndexedSliceReader.java | 36 ++++++++++++++++++--
2 files changed, 34 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/cassandra/blob/2424dd13/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index a711790..92bf422 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
2.0.12:
+ * Fix potentially returning deleted rows with range tombstone (CASSANDRA-8558)
* Make sure we unmark compacting after scrub/cleanup etc (CASSANDRA-8548)
* Check for available disk space before starting a compaction (CASSANDRA-8562)
* Fix DISTINCT queries with LIMITs or paging when some partitions
http://git-wip-us.apache.org/repos/asf/cassandra/blob/2424dd13/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java b/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java
index b6aa085..ac48fc0 100644
--- a/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java
+++ b/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java
@@ -327,6 +327,10 @@ class IndexedSliceReader extends AbstractIterator<OnDiskAtom> implements OnDiskA
// If we read blocks in reversed disk order, we may have columns from the previous block to handle.
// Note that prefetched keeps columns in reversed disk order.
+ // Also note that Range Tombstone handling is a bit tricky, because we may run into range tombstones
+ // that cover a slice *after* we've move to the previous slice. To keep it simple, we simply include
+ // every RT in prefetched: it's only slightly inefficient to do so and there is only so much RT that
+ // can be mistakenly added this way.
if (reversed && !prefetched.isEmpty())
{
boolean gotSome = false;
@@ -340,8 +344,22 @@ class IndexedSliceReader extends AbstractIterator<OnDiskAtom> implements OnDiskA
if (isColumnBeforeSliceStart(prefetchedCol))
{
inSlice = false;
- if (!setNextSlice())
- return false;
+
+ // As explained above, we add RT unconditionally
+ if (prefetchedCol instanceof RangeTombstone)
+ {
+ blockColumns.addLast(prefetched.poll());
+ gotSome = true;
+ continue;
+ }
+
+ // Otherwise, we either move to the next slice or, if we have none (which can happen
+ // because we unwind prefetched no matter what due to RT), we skip the cell
+ if (hasMoreSlice())
+ setNextSlice();
+ else
+ prefetched.poll();
+
}
// col is within slice, all columns
// (we go in reverse, so as soon as we are in a slice, no need to check
@@ -416,12 +434,19 @@ class IndexedSliceReader extends AbstractIterator<OnDiskAtom> implements OnDiskA
// (If in slice, don't bother checking that until we change slice)
if (!inSlice && isColumnBeforeSliceStart(column))
{
- if (reversed)
+ // If it's a rangeTombstone, then we need to read it and include it unless it's end
+ // stops before our slice start.
+ if (column instanceof RangeTombstone && !isBeforeSliceStart(((RangeTombstone)column).max))
+ {
+ addColumn(column);
+ }
+ else if (reversed)
{
// the next slice select columns that are before the current one, so it may
// match this column, so keep it around.
prefetched.addFirst(column);
}
+
column = null;
}
// col is within slice
@@ -492,6 +517,11 @@ class IndexedSliceReader extends AbstractIterator<OnDiskAtom> implements OnDiskA
// (If in slice, don't bother checking that until we change slice)
if (!inSlice && isColumnBeforeSliceStart(column))
{
+ // If it's a rangeTombstone, then we need to read it and include it unless it's end
+ // stops before our slice start.
+ if (column instanceof RangeTombstone && !isBeforeSliceStart(((RangeTombstone)column).max))
+ addColumn(column);
+
column = null;
continue;
}