You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by sl...@apache.org on 2017/03/29 11:24:56 UTC
[2/6] cassandra git commit: Legacy deserializer can create empty range tombstones

Legacy deserializer can create empty range tombstones

patch by Sylvain Lebresne; reviewed by Branimir Lambov for CASSANDRA-13341


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/451fe9d8
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/451fe9d8
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/451fe9d8

Branch: refs/heads/cassandra-3.11
Commit: 451fe9d8ac567942f62852f542d28d7d1116f1a1
Parents: 6edc268
Author: Sylvain Lebresne <sy...@datastax.com>
Authored: Thu Mar 16 17:25:39 2017 +0100
Committer: Sylvain Lebresne <sy...@datastax.com>
Committed: Wed Mar 29 13:17:58 2017 +0200

----------------------------------------------------------------------
 CHANGES.txt                                     |   1 +
 .../cassandra/db/UnfilteredDeserializer.java    | 124 ++++++++++++++-----
 .../db/rows/RangeTombstoneBoundMarker.java      |   8 ++
 .../db/rows/RangeTombstoneBoundaryMarker.java   |   5 +
 .../cassandra/db/rows/RangeTombstoneMarker.java |   2 +
 .../cassandra/db/OldFormatDeserializerTest.java |  54 ++++++++
 6 files changed, 162 insertions(+), 32 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/451fe9d8/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index b46eb50..c4293de 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 3.0.13
+ * Legacy deserializer can create empty range tombstones (CASSANDRA-13341)
  * Use the Kernel32 library to retrieve the PID on Windows and fix startup checks (CASSANDRA-13333)
  * Fix code to not exchange schema across major versions (CASSANDRA-13274)
  * Dropping column results in "corrupt" SSTable (CASSANDRA-13337)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/451fe9d8/src/java/org/apache/cassandra/db/UnfilteredDeserializer.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/UnfilteredDeserializer.java b/src/java/org/apache/cassandra/db/UnfilteredDeserializer.java
index 7bbbfdb..92690e1 100644
--- a/src/java/org/apache/cassandra/db/UnfilteredDeserializer.java
+++ b/src/java/org/apache/cassandra/db/UnfilteredDeserializer.java
@@ -26,8 +26,6 @@ import com.google.common.annotations.VisibleForTesting;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.PeekingIterator;
 
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 import org.apache.cassandra.config.CFMetaData;
 import org.apache.cassandra.db.rows.*;
 import org.apache.cassandra.io.util.DataInputPlus;
@@ -43,8 +41,6 @@ import org.apache.cassandra.net.MessagingService;
  */
 public abstract class UnfilteredDeserializer
 {
-    private static final Logger logger = LoggerFactory.getLogger(UnfilteredDeserializer.class);
-
     protected final CFMetaData metadata;
     protected final DataInputPlus in;
     protected final SerializationHelper helper;
@@ -433,21 +429,31 @@ public abstract class UnfilteredDeserializer
                 {
                     if (atoms.hasNext())
                     {
+                        // If there is a range tombstone to open strictly before the next row/RT, we need to return that open (or boundary) marker first.
+                        if (tombstoneTracker.hasOpeningMarkerBefore(atoms.peek()))
+                        {
+                            next = tombstoneTracker.popOpeningMarker();
+                        }
                         // If a range tombstone closes strictly before the next row/RT, we need to return that close (or boundary) marker first.
-                        if (tombstoneTracker.hasClosingMarkerBefore(atoms.peek()))
+                        else if (tombstoneTracker.hasClosingMarkerBefore(atoms.peek()))
                         {
                             next = tombstoneTracker.popClosingMarker();
                         }
                         else
                         {
                             LegacyLayout.LegacyAtom atom = atoms.next();
-                            if (!tombstoneTracker.isShadowed(atom))
-                                next = isRow(atom) ? readRow(atom) : tombstoneTracker.openNew(atom.asRangeTombstone());
+                            if (tombstoneTracker.isShadowed(atom))
+                                continue;
+
+                            if (isRow(atom))
+                                next = readRow(atom);
+                            else
+                                tombstoneTracker.openNew(atom.asRangeTombstone());
                         }
                     }
                     else if (tombstoneTracker.hasOpenTombstones())
                     {
-                        next = tombstoneTracker.popClosingMarker();
+                        next = tombstoneTracker.popMarker();
                     }
                     else
                     {
@@ -562,11 +568,31 @@ public abstract class UnfilteredDeserializer
 
             /**
              * Tracks which range tombstones are open when deserializing the old format.
+             * <p>
+             * This is a bit tricky because in the old of format we could have duplicated tombstones, overlapping ones,
+             * shadowed ones, etc.., but we should generate from that a "flat" output where at most one non-shadoowed
+             * range is open at any given time and without empty range.
+             * <p>
+             * One consequence of that is that we have to be careful to not generate markers too soon. For instance,
+             * we might get a range tombstone [1, 1]@3 followed by [1, 10]@5. So if we generate an opening marker on
+             * the first tombstone (so INCL_START(1)@3), we're screwed when we get to the 2nd range tombstone: we really
+             * should ignore the first tombstone in that that and generate INCL_START(1)@5 (assuming obviously we don't
+             * have one more range tombstone starting at 1 in the stream). This is why we have the
+             * {@link #hasOpeningMarkerBefore} method: in practice, we remember when a marker should be opened, but only
+             * generate that opening marker when we're sure that we won't get anything shadowing that marker.
+             * <p>
+             * For closing marker, we also have a {@link #hasClosingMarkerBefore} because in the old format the closing
+             * markers comes with the opening one, but we should generate them "in order" in the new format.
              */
             private class TombstoneTracker
             {
                 private final DeletionTime partitionDeletion;
 
+                // As explained in the javadoc, we need to wait to generate an opening marker until we're sure we have
+                // seen anything that could shadow it. So this remember a marker that needs to be opened but hasn't
+                // been yet. This is truly returned when hasOpeningMarkerBefore tells us it's safe to.
+                private RangeTombstoneMarker openMarkerToReturn;
+
                 // Open tombstones sorted by their closing bound (i.e. first tombstone is the first to close).
                 // As we only track non-fully-shadowed ranges, the first range is necessarily the currently
                 // open tombstone (the one with the higher timestamp).
@@ -594,6 +620,23 @@ public abstract class UnfilteredDeserializer
                 }
 
                 /**
+                 * Whether there is an outstanding opening marker that should be returned before we process the provided row/RT.
+                 */
+                public boolean hasOpeningMarkerBefore(LegacyLayout.LegacyAtom atom)
+                {
+                    return openMarkerToReturn != null
+                           && metadata.comparator.compare(openMarkerToReturn.openBound(false), atom.clustering()) < 0;
+                }
+
+                public Unfiltered popOpeningMarker()
+                {
+                    assert openMarkerToReturn != null;
+                    Unfiltered toReturn = openMarkerToReturn;
+                    openMarkerToReturn = null;
+                    return toReturn;
+                }
+
+                /**
                  * Whether the currently open marker closes stricly before the provided row/RT.
                  */
                 public boolean hasClosingMarkerBefore(LegacyLayout.LegacyAtom atom)
@@ -622,35 +665,56 @@ public abstract class UnfilteredDeserializer
                     return RangeTombstoneBoundaryMarker.makeBoundary(false, first.stop.bound, first.stop.bound.invert(), first.deletionTime, next.deletionTime);
                 }
 
+                 /**
+                  * Pop whatever next marker needs to be popped. This should be called as many time as necessary (until
+                  * {@link #hasOpenTombstones} returns {@false}) when all atoms have been consumed to "empty" the tracker.
+                  */
+                 public Unfiltered popMarker()
+                 {
+                     assert hasOpenTombstones();
+                     return openMarkerToReturn == null ? popClosingMarker() : popOpeningMarker();
+                 }
+
                 /**
-                 * Update the tracker given the provided newly open tombstone. This return the Unfiltered corresponding to the opening
-                 * of said tombstone: this can be a simple open mark, a boundary (if there was an open tombstone superseded by this new one)
-                 * or even null (if the new tombstone start is supersedes by the currently open tombstone).
+                 * Update the tracker given the provided newly open tombstone. This potentially update openMarkerToReturn
+                 * to account for th new opening.
                  *
-                 * Note that this method assume the added tombstone is not fully shadowed, i.e. that !isShadowed(tombstone). It also
-                 * assumes no opened tombstone closes before that tombstone (so !hasClosingMarkerBefore(tombstone)).
+                 * Note that this method assumes that:
+                 +  1) the added tombstone is not fully shadowed: !isShadowed(tombstone).
+                 +  2) there is no marker to open that open strictly before this new tombstone: !hasOpeningMarkerBefore(tombstone).
+                 +  3) no opened tombstone closes before that tombstone: !hasClosingMarkerBefore(tombstone).
+                 + One can check that this is only called after the condition above have been checked in UnfilteredIterator.hasNext above.
                  */
-                public Unfiltered openNew(LegacyLayout.LegacyRangeTombstone tombstone)
+                public void openNew(LegacyLayout.LegacyRangeTombstone tombstone)
                 {
                     if (openTombstones.isEmpty())
                     {
+                        // If we have an openMarkerToReturn, the corresponding RT must be in openTombstones (or we wouldn't know when to close it)
+                        assert openMarkerToReturn == null;
                         openTombstones.add(tombstone);
-                        return new RangeTombstoneBoundMarker(tombstone.start.bound, tombstone.deletionTime);
+                        openMarkerToReturn = new RangeTombstoneBoundMarker(tombstone.start.bound, tombstone.deletionTime);
+                        return;
                     }
 
-                    // Add the new tombstone, and then check if it changes the currently open deletion or not.
-                    // Note: we grab the first tombstone (which represents the currently open deletion time) before adding
-                    // because add() can remove that first.
-                    Iterator<LegacyLayout.LegacyRangeTombstone> iter = openTombstones.iterator();
-                    LegacyLayout.LegacyRangeTombstone first = iter.next();
+                    if (openMarkerToReturn != null)
+                    {
+                        // If the new opening supersedes the one we're about to return, we need to update the one to return.
+                        if (tombstone.deletionTime.supersedes(openMarkerToReturn.openDeletionTime(false)))
+                            openMarkerToReturn = openMarkerToReturn.withNewOpeningDeletionTime(false, tombstone.deletionTime);
+                    }
+                    else
+                    {
+                        // We have no openMarkerToReturn set yet so set it now if needs be.
+                        // Since openTombstones isn't empty, it means we have a currently ongoing deletion. And if the new tombstone
+                        // supersedes that ongoing deletion, we need to close the opening  deletion and open with the new one.
+                        DeletionTime currentOpenDeletion = openTombstones.first().deletionTime;
+                        if (tombstone.deletionTime.supersedes(currentOpenDeletion))
+                            openMarkerToReturn = RangeTombstoneBoundaryMarker.makeBoundary(false, tombstone.start.bound.invert(), tombstone.start.bound, currentOpenDeletion, tombstone.deletionTime);
+                    }
 
+                    // In all cases, we know !isShadowed(tombstone) so we need to add the tombstone (note however that we may not have set openMarkerToReturn if the
+                    // new tombstone doesn't supersedes the current deletion _but_ extend past the marker currently open)
                     add(tombstone);
-
-                    // If the newly opened tombstone superseds the currently open one, we have to produce a boundary to change
-                    // the currently open deletion time, otherwise we have nothing to do.
-                    return tombstone.deletionTime.supersedes(first.deletionTime)
-                           ? RangeTombstoneBoundaryMarker.makeBoundary(false, tombstone.start.bound.invert(), tombstone.start.bound, first.deletionTime, tombstone.deletionTime)
-                           : null;
                 }
 
                 /**
@@ -680,16 +744,12 @@ public abstract class UnfilteredDeserializer
 
                 public boolean hasOpenTombstones()
                 {
-                    return !openTombstones.isEmpty();
-                }
-
-                private boolean formBoundary(LegacyLayout.LegacyRangeTombstone close, LegacyLayout.LegacyRangeTombstone open)
-                {
-                    return metadata.comparator.compare(close.stop.bound, open.start.bound) == 0;
+                    return openMarkerToReturn != null || !openTombstones.isEmpty();
                 }
 
                 public void clearState()
                 {
+                    openMarkerToReturn = null;
                     openTombstones.clear();
                 }
             }

http://git-wip-us.apache.org/repos/asf/cassandra/blob/451fe9d8/src/java/org/apache/cassandra/db/rows/RangeTombstoneBoundMarker.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/rows/RangeTombstoneBoundMarker.java b/src/java/org/apache/cassandra/db/rows/RangeTombstoneBoundMarker.java
index b35033d..6f4afa5 100644
--- a/src/java/org/apache/cassandra/db/rows/RangeTombstoneBoundMarker.java
+++ b/src/java/org/apache/cassandra/db/rows/RangeTombstoneBoundMarker.java
@@ -124,6 +124,14 @@ public class RangeTombstoneBoundMarker extends AbstractRangeTombstoneMarker
         return new RangeTombstoneBoundMarker(clustering().copy(allocator), deletion);
     }
 
+    public RangeTombstoneBoundMarker withNewOpeningDeletionTime(boolean reversed, DeletionTime newDeletionTime)
+    {
+        if (!isOpen(reversed))
+            throw new IllegalStateException();
+
+        return new RangeTombstoneBoundMarker(clustering(), newDeletionTime);
+    }
+
     public void digest(MessageDigest digest)
     {
         bound.digest(digest);

http://git-wip-us.apache.org/repos/asf/cassandra/blob/451fe9d8/src/java/org/apache/cassandra/db/rows/RangeTombstoneBoundaryMarker.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/rows/RangeTombstoneBoundaryMarker.java b/src/java/org/apache/cassandra/db/rows/RangeTombstoneBoundaryMarker.java
index 06fbf87..0683d76 100644
--- a/src/java/org/apache/cassandra/db/rows/RangeTombstoneBoundaryMarker.java
+++ b/src/java/org/apache/cassandra/db/rows/RangeTombstoneBoundaryMarker.java
@@ -120,6 +120,11 @@ public class RangeTombstoneBoundaryMarker extends AbstractRangeTombstoneMarker
         return new RangeTombstoneBoundaryMarker(clustering().copy(allocator), endDeletion, startDeletion);
     }
 
+    public RangeTombstoneBoundaryMarker withNewOpeningDeletionTime(boolean reversed, DeletionTime newDeletionTime)
+    {
+        return new RangeTombstoneBoundaryMarker(clustering(), reversed ? newDeletionTime : endDeletion, reversed ? startDeletion : newDeletionTime);
+    }
+
     public static RangeTombstoneBoundaryMarker makeBoundary(boolean reversed, Slice.Bound close, Slice.Bound open, DeletionTime closeDeletion, DeletionTime openDeletion)
     {
         assert RangeTombstone.Bound.Kind.compare(close.kind(), open.kind()) == 0 : "Both bound don't form a boundary";

http://git-wip-us.apache.org/repos/asf/cassandra/blob/451fe9d8/src/java/org/apache/cassandra/db/rows/RangeTombstoneMarker.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/rows/RangeTombstoneMarker.java b/src/java/org/apache/cassandra/db/rows/RangeTombstoneMarker.java
index c4c9f7f..dee7231 100644
--- a/src/java/org/apache/cassandra/db/rows/RangeTombstoneMarker.java
+++ b/src/java/org/apache/cassandra/db/rows/RangeTombstoneMarker.java
@@ -55,6 +55,8 @@ public interface RangeTombstoneMarker extends Unfiltered
         return false;
     }
 
+    public RangeTombstoneMarker withNewOpeningDeletionTime(boolean reversed, DeletionTime newDeletionTime);
+
     /**
      * Utility class to help merging range tombstone markers coming from multiple inputs (UnfilteredRowIterators).
      * <p>

http://git-wip-us.apache.org/repos/asf/cassandra/blob/451fe9d8/test/unit/org/apache/cassandra/db/OldFormatDeserializerTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/db/OldFormatDeserializerTest.java b/test/unit/org/apache/cassandra/db/OldFormatDeserializerTest.java
index 1060569..886b191 100644
--- a/test/unit/org/apache/cassandra/db/OldFormatDeserializerTest.java
+++ b/test/unit/org/apache/cassandra/db/OldFormatDeserializerTest.java
@@ -77,6 +77,60 @@ public class OldFormatDeserializerTest
          assertFalse(iterator.hasNext());
     }
 
+    @Test
+    public void testRangeTombstonesSameStart() throws Exception
+    {
+        CFMetaData metadata = CFMetaData.Builder.create("ks", "table")
+                                                .withPartitioner(Murmur3Partitioner.instance)
+                                                .addPartitionKey("k", Int32Type.instance)
+                                                .addClusteringColumn("v", Int32Type.instance)
+                                                .build();
+
+        // Multiple RT that have the same start (we _can_ get this in the legacy format!)
+        Supplier<LegacyLayout.LegacyAtom> atomSupplier = supplier(rt(1, 2, 3),
+                                                                  rt(1, 2, 5),
+                                                                  rt(1, 5, 4));
+
+        UnfilteredIterator iterator = new UnfilteredIterator(metadata,
+                                                             DeletionTime.LIVE,
+                                                             new SerializationHelper(metadata, MessagingService.current_version, SerializationHelper.Flag.LOCAL),
+                                                             atomSupplier);
+
+        // We should be entirely ignoring the first tombston (shadowed by 2nd one) so we should generate
+        // [1, 2]@5 (2, 5]@4 (but where both range actually form a boundary)
+
+        assertTrue(iterator.hasNext());
+
+        Unfiltered first = iterator.next();
+        System.out.println(">> " + first.toString(metadata));
+        assertTrue(first.isRangeTombstoneMarker());
+        RangeTombstoneMarker start = (RangeTombstoneMarker)first;
+        assertTrue(start.isOpen(false));
+        assertFalse(start.isClose(false));
+        assertEquals(1, toInt(start.openBound(false)));
+        assertEquals(5, start.openDeletionTime(false).markedForDeleteAt());
+
+        Unfiltered second = iterator.next();
+        assertTrue(second.isRangeTombstoneMarker());
+        RangeTombstoneMarker middle = (RangeTombstoneMarker)second;
+        assertTrue(middle.isClose(false));
+        assertTrue(middle.isOpen(false));
+        assertEquals(2, toInt(middle.closeBound(false)));
+        assertEquals(2, toInt(middle.openBound(false)));
+        assertEquals(5, middle.closeDeletionTime(false).markedForDeleteAt());
+        assertEquals(4, middle.openDeletionTime(false).markedForDeleteAt());
+
+        Unfiltered third = iterator.next();
+        assertTrue(third.isRangeTombstoneMarker());
+        RangeTombstoneMarker end = (RangeTombstoneMarker)third;
+        assertTrue(end.isClose(false));
+        assertFalse(end.isOpen(false));
+        assertEquals(5, toInt(end.closeBound(false)));
+        assertEquals(4, end.closeDeletionTime(false).markedForDeleteAt());
+
+        assertFalse(iterator.hasNext());
+    }
+
     private static int toInt(ClusteringPrefix prefix)
     {
         assertTrue(prefix.size() == 1);