You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by sl...@apache.org on 2017/03/27 10:04:42 UTC
[3/6] cassandra git commit: Dropping column results in "corrupt"
SSTable
Dropping column results in "corrupt" SSTable
patch by Sylvain Lebresne; reviewed by Alex Petrov for CASSANDRA-13337
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/5262bb17
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/5262bb17
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/5262bb17
Branch: refs/heads/trunk
Commit: 5262bb17b46fc8c02f9f836ddf9317d0de2698cd
Parents: 6311622
Author: Sylvain Lebresne <sy...@datastax.com>
Authored: Mon Mar 20 15:49:27 2017 +0100
Committer: Sylvain Lebresne <sy...@datastax.com>
Committed: Mon Mar 27 11:58:32 2017 +0200
----------------------------------------------------------------------
CHANGES.txt | 1 +
.../db/columniterator/SSTableIterator.java | 78 ++++++++++++--------
.../columniterator/SSTableReversedIterator.java | 3 +-
.../cassandra/db/rows/RangeTombstoneMarker.java | 6 ++
.../apache/cassandra/db/rows/Unfiltered.java | 2 +
.../cassandra/db/rows/UnfilteredSerializer.java | 43 +++++++++--
.../cql3/validation/operations/AlterTest.java | 51 +++++++++++++
7 files changed, 144 insertions(+), 40 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/cassandra/blob/5262bb17/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 2c5573a..0b1bb01 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
3.0.13
+ * Dropping column results in "corrupt" SSTable (CASSANDRA-13337)
* Bugs handling range tombstones in the sstable iterators (CASSANDRA-13340)
* Fix CONTAINS filtering for null collections (CASSANDRA-13246)
* Applying: Use a unique metric reservoir per test run when using Cassandra-wide metrics residing in MBeans (CASSANDRA-13216)
http://git-wip-us.apache.org/repos/asf/cassandra/blob/5262bb17/src/java/org/apache/cassandra/db/columniterator/SSTableIterator.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/columniterator/SSTableIterator.java b/src/java/org/apache/cassandra/db/columniterator/SSTableIterator.java
index 9bcca48..fa337c0 100644
--- a/src/java/org/apache/cassandra/db/columniterator/SSTableIterator.java
+++ b/src/java/org/apache/cassandra/db/columniterator/SSTableIterator.java
@@ -123,20 +123,27 @@ public class SSTableIterator extends AbstractSSTableIterator
{
assert deserializer != null;
- // We use a same reasoning as in handlePreSliceData regarding the strictness of the inequality below.
- // We want to exclude deserialized unfiltered equal to end, because 1) we won't miss any rows since those
- // woudn't be equal to a slice bound and 2) a end bound can be equal to a start bound
- // (EXCL_END(x) == INCL_START(x) for instance) and in that case we don't want to return start bound because
- // it's fundamentally excluded. And if the bound is a end (for a range tombstone), it means it's exactly
- // our slice end, but in that case we will properly close the range tombstone anyway as part of our "close
- // an open marker" code in hasNextInterna
- if (!deserializer.hasNext() || deserializer.compareNextTo(end) >= 0)
- return null;
-
- Unfiltered next = deserializer.readNext();
- if (next.kind() == Unfiltered.Kind.RANGE_TOMBSTONE_MARKER)
- updateOpenMarker((RangeTombstoneMarker)next);
- return next;
+ while (true)
+ {
+ // We use a same reasoning as in handlePreSliceData regarding the strictness of the inequality below.
+ // We want to exclude deserialized unfiltered equal to end, because 1) we won't miss any rows since those
+ // woudn't be equal to a slice bound and 2) a end bound can be equal to a start bound
+ // (EXCL_END(x) == INCL_START(x) for instance) and in that case we don't want to return start bound because
+ // it's fundamentally excluded. And if the bound is a end (for a range tombstone), it means it's exactly
+ // our slice end, but in that case we will properly close the range tombstone anyway as part of our "close
+ // an open marker" code in hasNextInterna
+ if (!deserializer.hasNext() || deserializer.compareNextTo(end) >= 0)
+ return null;
+
+ Unfiltered next = deserializer.readNext();
+ // We may get empty row for the same reason expressed on UnfilteredSerializer.deserializeOne.
+ if (next.isEmpty())
+ continue;
+
+ if (next.kind() == Unfiltered.Kind.RANGE_TOMBSTONE_MARKER)
+ updateOpenMarker((RangeTombstoneMarker) next);
+ return next;
+ }
}
protected boolean hasNextInternal() throws IOException
@@ -256,24 +263,31 @@ public class SSTableIterator extends AbstractSSTableIterator
@Override
protected Unfiltered computeNext() throws IOException
{
- // Our previous read might have made us cross an index block boundary. If so, update our informations.
- // If we read from the beginning of the partition, this is also what will initialize the index state.
- indexState.updateBlock();
-
- // Return the next unfiltered unless we've reached the end, or we're beyond our slice
- // end (note that unless we're on the last block for the slice, there is no point
- // in checking the slice end).
- if (indexState.isDone()
- || indexState.currentBlockIdx() > lastBlockIdx
- || !deserializer.hasNext()
- || (indexState.currentBlockIdx() == lastBlockIdx && deserializer.compareNextTo(end) >= 0))
- return null;
-
-
- Unfiltered next = deserializer.readNext();
- if (next.kind() == Unfiltered.Kind.RANGE_TOMBSTONE_MARKER)
- updateOpenMarker((RangeTombstoneMarker)next);
- return next;
+ while (true)
+ {
+ // Our previous read might have made us cross an index block boundary. If so, update our informations.
+ // If we read from the beginning of the partition, this is also what will initialize the index state.
+ indexState.updateBlock();
+
+ // Return the next unfiltered unless we've reached the end, or we're beyond our slice
+ // end (note that unless we're on the last block for the slice, there is no point
+ // in checking the slice end).
+ if (indexState.isDone()
+ || indexState.currentBlockIdx() > lastBlockIdx
+ || !deserializer.hasNext()
+ || (indexState.currentBlockIdx() == lastBlockIdx && deserializer.compareNextTo(end) >= 0))
+ return null;
+
+
+ Unfiltered next = deserializer.readNext();
+ // We may get empty row for the same reason expressed on UnfilteredSerializer.deserializeOne.
+ if (next.isEmpty())
+ continue;
+
+ if (next.kind() == Unfiltered.Kind.RANGE_TOMBSTONE_MARKER)
+ updateOpenMarker((RangeTombstoneMarker) next);
+ return next;
+ }
}
}
}
http://git-wip-us.apache.org/repos/asf/cassandra/blob/5262bb17/src/java/org/apache/cassandra/db/columniterator/SSTableReversedIterator.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/columniterator/SSTableReversedIterator.java b/src/java/org/apache/cassandra/db/columniterator/SSTableReversedIterator.java
index 49dc82a..4bb7fe8 100644
--- a/src/java/org/apache/cassandra/db/columniterator/SSTableReversedIterator.java
+++ b/src/java/org/apache/cassandra/db/columniterator/SSTableReversedIterator.java
@@ -213,7 +213,8 @@ public class SSTableReversedIterator extends AbstractSSTableIterator
&& !stopReadingDisk())
{
Unfiltered unfiltered = deserializer.readNext();
- if (!isFirst || includeFirst)
+ // We may get empty row for the same reason expressed on UnfilteredSerializer.deserializeOne.
+ if (!unfiltered.isEmpty() && (!isFirst || includeFirst))
buffer.add(unfiltered);
isFirst = false;
http://git-wip-us.apache.org/repos/asf/cassandra/blob/5262bb17/src/java/org/apache/cassandra/db/rows/RangeTombstoneMarker.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/rows/RangeTombstoneMarker.java b/src/java/org/apache/cassandra/db/rows/RangeTombstoneMarker.java
index 1cd5fb4..c4c9f7f 100644
--- a/src/java/org/apache/cassandra/db/rows/RangeTombstoneMarker.java
+++ b/src/java/org/apache/cassandra/db/rows/RangeTombstoneMarker.java
@@ -49,6 +49,12 @@ public interface RangeTombstoneMarker extends Unfiltered
public RangeTombstoneMarker copy(AbstractAllocator allocator);
+ default public boolean isEmpty()
+ {
+ // There is no such thing as an empty marker
+ return false;
+ }
+
/**
* Utility class to help merging range tombstone markers coming from multiple inputs (UnfilteredRowIterators).
* <p>
http://git-wip-us.apache.org/repos/asf/cassandra/blob/5262bb17/src/java/org/apache/cassandra/db/rows/Unfiltered.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/rows/Unfiltered.java b/src/java/org/apache/cassandra/db/rows/Unfiltered.java
index 9d96137..9511eeb 100644
--- a/src/java/org/apache/cassandra/db/rows/Unfiltered.java
+++ b/src/java/org/apache/cassandra/db/rows/Unfiltered.java
@@ -55,6 +55,8 @@ public interface Unfiltered extends Clusterable
*/
public void validateData(CFMetaData metadata);
+ public boolean isEmpty();
+
public String toString(CFMetaData metadata);
public String toString(CFMetaData metadata, boolean fullDetails);
public String toString(CFMetaData metadata, boolean includeClusterKeys, boolean fullDetails);
http://git-wip-us.apache.org/repos/asf/cassandra/blob/5262bb17/src/java/org/apache/cassandra/db/rows/UnfilteredSerializer.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/rows/UnfilteredSerializer.java b/src/java/org/apache/cassandra/db/rows/UnfilteredSerializer.java
index dc6f187..bdc8388 100644
--- a/src/java/org/apache/cassandra/db/rows/UnfilteredSerializer.java
+++ b/src/java/org/apache/cassandra/db/rows/UnfilteredSerializer.java
@@ -350,9 +350,44 @@ public class UnfilteredSerializer
return 1;
}
+ /**
+ * Deserialize an {@link Unfiltered} from the provided input.
+ *
+ * @param in the input from which to deserialize.
+ * @param header serialization header corresponding to the serialized data.
+ * @param helper the helper to use for deserialization.
+ * @param builder a row builder, passed here so we don't allocate a new one for every new row.
+ * @return the deserialized {@link Unfiltered} or {@code null} if we've read the end of a partition. This method is
+ * guaranteed to never return empty rows.
+ */
public Unfiltered deserialize(DataInputPlus in, SerializationHeader header, SerializationHelper helper, Row.Builder builder)
throws IOException
{
+ while (true)
+ {
+ Unfiltered unfiltered = deserializeOne(in, header, helper, builder);
+ if (unfiltered == null)
+ return null;
+
+ // Skip empty rows, see deserializeOne javadoc
+ if (!unfiltered.isEmpty())
+ return unfiltered;
+ }
+ }
+
+ /**
+ * Deserialize a single {@link Unfiltered} from the provided input.
+ * <p>
+ * <b>WARNING:</b> this can return an empty row because it's possible there is a row serialized, but that row only
+ * contains data for dropped columns, see CASSANDRA-13337. But as most code expect rows to not be empty, this isn't
+ * meant to be exposed publicly.
+ *
+ * But as {@link UnfilteredRowIterator} should not return empty
+ * rows, this mean consumer of this method should make sure to skip said empty rows.
+ */
+ private Unfiltered deserializeOne(DataInputPlus in, SerializationHeader header, SerializationHelper helper, Row.Builder builder)
+ throws IOException
+ {
// It wouldn't be wrong per-se to use an unsorted builder, but it would be inefficient so make sure we don't do it by mistake
assert builder.isSorted();
@@ -374,13 +409,7 @@ public class UnfilteredSerializer
throw new IOException("Corrupt flags value for unfiltered partition (isStatic flag set): " + flags);
builder.newRow(Clustering.serializer.deserialize(in, helper.version, header.clusteringTypes()));
- Row row = deserializeRowBody(in, header, helper, flags, extendedFlags, builder);
- // we do not write empty rows because Rows.collectStats(), called by BTW.applyToRow(), asserts that rows are not empty
- // if we don't throw here, then later the very same assertion in Rows.collectStats() will fail compactions
- // see BlackListingCompactionsTest and CASSANDRA-9530 for details
- if (row.isEmpty())
- throw new IOException("Corrupt empty row found in unfiltered partition");
- return row;
+ return deserializeRowBody(in, header, helper, flags, extendedFlags, builder);
}
}
http://git-wip-us.apache.org/repos/asf/cassandra/blob/5262bb17/test/unit/org/apache/cassandra/cql3/validation/operations/AlterTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/cql3/validation/operations/AlterTest.java b/test/unit/org/apache/cassandra/cql3/validation/operations/AlterTest.java
index 245be30..c48ffe5 100644
--- a/test/unit/org/apache/cassandra/cql3/validation/operations/AlterTest.java
+++ b/test/unit/org/apache/cassandra/cql3/validation/operations/AlterTest.java
@@ -355,4 +355,55 @@ public class AlterTest extends CQLTester
assertEquals(errorMsg, e.getMessage());
}
}
+
+ /**
+ * Test for CASSANDRA-13337. Checks that dropping a column when a sstable contains only data for that column
+ * works properly.
+ */
+ @Test
+ public void testAlterDropEmptySSTable() throws Throwable
+ {
+ createTable("CREATE TABLE %s(k int PRIMARY KEY, x int, y int)");
+
+ execute("UPDATE %s SET x = 1 WHERE k = 0");
+
+ flush();
+
+ execute("UPDATE %s SET x = 1, y = 1 WHERE k = 0");
+
+ flush();
+
+ execute("ALTER TABLE %s DROP x");
+
+ compact();
+
+ assertRows(execute("SELECT * FROM %s"), row(0, 1));
+ }
+
+ /**
+ * Similarly to testAlterDropEmptySSTable, checks we don't return empty rows from queries (testAlterDropEmptySSTable
+ * tests the compaction case).
+ */
+ @Test
+ public void testAlterOnlyColumnBehaviorWithFlush() throws Throwable
+ {
+ testAlterOnlyColumnBehaviorWithFlush(true);
+ testAlterOnlyColumnBehaviorWithFlush(false);
+ }
+
+ private void testAlterOnlyColumnBehaviorWithFlush(boolean flushAfterInsert) throws Throwable
+ {
+ createTable("CREATE TABLE %s(k int PRIMARY KEY, x int, y int)");
+
+ execute("UPDATE %s SET x = 1 WHERE k = 0");
+
+ assertRows(execute("SELECT * FROM %s"), row(0, 1, null));
+
+ if (flushAfterInsert)
+ flush();
+
+ execute("ALTER TABLE %s DROP x");
+
+ assertEmpty(execute("SELECT * FROM %s"));
+ }
}