You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by al...@apache.org on 2018/09/25 16:05:32 UTC
[2/6] cassandra git commit: DESC order reads can fail to return the
last Unfiltered in the partition
DESC order reads can fail to return the last Unfiltered in the partition
patch by Aleksey Yeschenko; reviewed by Sam Tunnicliffe and Benedict
Elliott Smith for CASSANDRA-14766
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/45937def
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/45937def
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/45937def
Branch: refs/heads/cassandra-3.11
Commit: 45937def313bbb32024ae890f830e23bcc6ccae5
Parents: 322f7e9
Author: Aleksey Yeshchenko <al...@apple.com>
Authored: Tue Sep 18 13:12:11 2018 +0100
Committer: Aleksey Yeshchenko <al...@apple.com>
Committed: Tue Sep 25 17:02:06 2018 +0100
----------------------------------------------------------------------
CHANGES.txt | 1 +
.../cassandra/db/UnfilteredDeserializer.java | 115 ++++++++++++-------
...bles-legacy_ka_14766-ka-1-CompressionInfo.db | Bin 0 -> 43 bytes
.../legacy_tables-legacy_ka_14766-ka-1-Data.db | Bin 0 -> 103 bytes
...gacy_tables-legacy_ka_14766-ka-1-Digest.sha1 | 1 +
...legacy_tables-legacy_ka_14766-ka-1-Filter.db | Bin 0 -> 16 bytes
.../legacy_tables-legacy_ka_14766-ka-1-Index.db | Bin 0 -> 134 bytes
...cy_tables-legacy_ka_14766-ka-1-Statistics.db | Bin 0 -> 4450 bytes
...egacy_tables-legacy_ka_14766-ka-1-Summary.db | Bin 0 -> 92 bytes
.../legacy_tables-legacy_ka_14766-ka-1-TOC.txt | 8 ++
.../cassandra/io/sstable/LegacySSTableTest.java | 27 ++++-
11 files changed, 112 insertions(+), 40 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/cassandra/blob/45937def/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 195c97c..43628b2 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
3.0.18
+ * DESC order reads can fail to return the last Unfiltered in the partition (CASSANDRA-14766)
* Fix corrupted collection deletions for dropped columns in 3.0 <-> 2.{1,2} messages (CASSANDRA-14568)
* Fix corrupted static collection deletions in 3.0 <-> 2.{1,2} messages (CASSANDRA-14568)
* Handle failures in parallelAllSSTableOperation (cleanup/upgradesstables/etc) (CASSANDRA-14657)
http://git-wip-us.apache.org/repos/asf/cassandra/blob/45937def/src/java/org/apache/cassandra/db/UnfilteredDeserializer.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/UnfilteredDeserializer.java b/src/java/org/apache/cassandra/db/UnfilteredDeserializer.java
index 0aa5741..62ad76a 100644
--- a/src/java/org/apache/cassandra/db/UnfilteredDeserializer.java
+++ b/src/java/org/apache/cassandra/db/UnfilteredDeserializer.java
@@ -245,10 +245,14 @@ public abstract class UnfilteredDeserializer
// The next Unfiltered to return, computed by hasNext()
private Unfiltered next;
- // A temporary storage for an unfiltered that isn't returned next but should be looked at just afterwards
- private Unfiltered saved;
- private boolean isFirst = true;
+ // Saved position in the input after the next Unfiltered that will be consumed
+ private long nextConsumedPosition;
+
+ // A temporary storage for an Unfiltered that isn't returned next but should be looked at just afterwards
+ private Stash stash;
+
+ private boolean couldBeStartOfPartition = true;
// The Unfiltered as read from the old format input
private final UnfilteredIterator iterator;
@@ -258,7 +262,15 @@ public abstract class UnfilteredDeserializer
// Tracks the size of the last LegacyAtom read from disk, because this needs to be accounted
// for when marking lastConsumedPosition after readNext/skipNext
- private long bytesReadForNextAtom;
+ // Reading/skipping an Unfiltered consumes LegacyAtoms from the underlying legacy atom iterator
+ // e.g. hasNext() -> iterator.hasNext() -> iterator.readRow() -> atoms.next()
+ // The stop condition of the loop which groups legacy atoms into rows causes that AtomIterator
+ // to read in the first atom which doesn't belong in the row. So by that point, our position
+ // is actually past the end of the next Unfiltered. To compensate, we record the size of
+ // the last LegacyAtom read and subtract it from the current position when we calculate lastConsumedPosition.
+ // If we don't, then when reading an indexed block, we can over correct and may think that we've
+ // exhausted the block before we actually have.
+ private long bytesReadForNextAtom = 0L;
private OldFormatDeserializer(CFMetaData metadata,
DataInputPlus in,
@@ -313,27 +325,55 @@ public abstract class UnfilteredDeserializer
{
while (next == null)
{
- if (saved == null && !iterator.hasNext())
- return false;
-
- next = saved == null ? iterator.next() : saved;
- saved = null;
-
- // The sstable iterators assume that if there is one, the static row is the first thing this deserializer will return.
- // However, in the old format, a range tombstone with an empty start would sort before any static cell. So we should
- // detect that case and return the static parts first if necessary.
- if (isFirst && iterator.hasNext() && isStatic(iterator.peek()))
+ if (null != stash)
+ {
+ next = stash.unfiltered;
+ nextConsumedPosition = stash.consumedPosition;
+ stash = null;
+ }
+ else
{
- saved = next;
+ if (!iterator.hasNext())
+ return false;
next = iterator.next();
+ nextConsumedPosition = currentPosition() - bytesReadForNextAtom;
+ }
+
+ /*
+ * The sstable iterators assume that if there is one, the static row is the first thing this deserializer will return.
+ * However, in the old format, a range tombstone with an empty start would sort before any static cell. So we should
+ * detect that case and return the static parts first if necessary.
+ */
+ if (couldBeStartOfPartition && next.isRangeTombstoneMarker() && next.clustering().size() == 0 && iterator.hasNext())
+ {
+ Unfiltered unfiltered = iterator.next();
+ long consumedPosition = currentPosition() - bytesReadForNextAtom;
+
+ stash = new Stash(unfiltered, consumedPosition);
+
+ /*
+ * reorder next and stash (see the comment above that explains why), but retain their positions
+ * it's ok to do so since consumedPosition value is only used to determine if we have gone past
+ * the end of the index ‘block’; since the edge case requires that the first value be the ‘bottom’
+ * RT bound (i.e. with no byte buffers), this has a small and well-defined size, and it must be
+ * the case that both unfiltered are in the same index ‘block’ if we began at the beginning of it.
+ * if we don't do this, however, we risk aborting early and not returning the BOTTOM rt bound,
+ * if the static row is large enough to cross block boundaries.
+ */
+ if (isStatic(unfiltered))
+ {
+ stash.unfiltered = next;
+ next = unfiltered;
+ }
}
- isFirst = false;
+ couldBeStartOfPartition = false;
// When reading old tables, we sometimes want to skip static data (due to how staticly defined column of compact
// tables are handled).
if (skipStatic && isStatic(next))
next = null;
}
+
return true;
}
catch (IOError e)
@@ -376,18 +416,17 @@ public abstract class UnfilteredDeserializer
throw new IllegalStateException();
Unfiltered toReturn = next;
next = null;
- lastConsumedPosition = currentPosition() - bytesReadForNextAtom();
+ lastConsumedPosition = nextConsumedPosition;
return toReturn;
}
public void skipNext() throws IOException
{
- if (!hasNext())
- throw new UnsupportedOperationException();
- next = null;
- lastConsumedPosition = currentPosition() - bytesReadForNextAtom();
+ readNext();
}
+ // in case we had to reorder an empty RT bound with a static row, this won't be returning the precise unconsumed size,
+ // that corresponds to the last returned Unfiltered, but use the natural order in the sstable instead
public long bytesReadForUnconsumedData()
{
if (!(in instanceof FileDataInput))
@@ -396,28 +435,26 @@ public abstract class UnfilteredDeserializer
return currentPosition() - lastConsumedPosition;
}
- // Reading/skipping an Unfiltered consumes LegacyAtoms from the underlying legacy atom iterator
- // e.g. hasNext() -> iterator.hasNext() -> iterator.readRow() -> atoms.next()
- // The stop condition of the loop which groups legacy atoms into rows causes that AtomIterator
- // to read in the first atom which doesn't belong in the row. So by that point, our position
- // is actually past the end of the next Unfiltered. To compensate, we record the size of
- // the last LegacyAtom read and subtract it from the current position when we calculate lastConsumedPosition.
- // If we don't, then when reading an indexed block, we can over correct and may think that we've
- // exhausted the block before we actually have.
- private long bytesReadForNextAtom()
- {
- // If we've read anything at all then we will have recorded this in bytesReadForNextAtom,
- // but being extra careful here just incase this method is called before any reads happen.
- return iterator.atoms.next == null ? 0 : bytesReadForNextAtom;
- }
-
public void clearState()
{
next = null;
- saved = null;
+ stash = null;
+ couldBeStartOfPartition = true;
iterator.clearState();
lastConsumedPosition = currentPosition();
- bytesReadForNextAtom = 0;
+ bytesReadForNextAtom = 0L;
+ }
+
+ private static final class Stash
+ {
+ private Unfiltered unfiltered;
+ long consumedPosition;
+
+ private Stash(Unfiltered unfiltered, long consumedPosition)
+ {
+ this.unfiltered = unfiltered;
+ this.consumedPosition = consumedPosition;
+ }
}
// Groups atoms from the input into proper Unfiltered.
@@ -543,7 +580,7 @@ public abstract class UnfilteredDeserializer
// Wraps the input of the deserializer to provide an iterator (and skip shadowed atoms).
// Note: this could use guava AbstractIterator except that we want to be able to clear
// the internal state of the iterator so it's cleaner to do it ourselves.
- private class AtomIterator implements PeekingIterator<LegacyLayout.LegacyAtom>
+ private static class AtomIterator implements PeekingIterator<LegacyLayout.LegacyAtom>
{
private final Supplier<LegacyLayout.LegacyAtom> atomReader;
private boolean isDone;
http://git-wip-us.apache.org/repos/asf/cassandra/blob/45937def/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_14766/legacy_tables-legacy_ka_14766-ka-1-CompressionInfo.db
----------------------------------------------------------------------
diff --git a/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_14766/legacy_tables-legacy_ka_14766-ka-1-CompressionInfo.db b/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_14766/legacy_tables-legacy_ka_14766-ka-1-CompressionInfo.db
new file mode 100644
index 0000000..b5b5246
Binary files /dev/null and b/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_14766/legacy_tables-legacy_ka_14766-ka-1-CompressionInfo.db differ
http://git-wip-us.apache.org/repos/asf/cassandra/blob/45937def/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_14766/legacy_tables-legacy_ka_14766-ka-1-Data.db
----------------------------------------------------------------------
diff --git a/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_14766/legacy_tables-legacy_ka_14766-ka-1-Data.db b/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_14766/legacy_tables-legacy_ka_14766-ka-1-Data.db
new file mode 100644
index 0000000..18cf478
Binary files /dev/null and b/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_14766/legacy_tables-legacy_ka_14766-ka-1-Data.db differ
http://git-wip-us.apache.org/repos/asf/cassandra/blob/45937def/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_14766/legacy_tables-legacy_ka_14766-ka-1-Digest.sha1
----------------------------------------------------------------------
diff --git a/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_14766/legacy_tables-legacy_ka_14766-ka-1-Digest.sha1 b/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_14766/legacy_tables-legacy_ka_14766-ka-1-Digest.sha1
new file mode 100644
index 0000000..f37a2b3
--- /dev/null
+++ b/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_14766/legacy_tables-legacy_ka_14766-ka-1-Digest.sha1
@@ -0,0 +1 @@
+1576541413
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/cassandra/blob/45937def/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_14766/legacy_tables-legacy_ka_14766-ka-1-Filter.db
----------------------------------------------------------------------
diff --git a/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_14766/legacy_tables-legacy_ka_14766-ka-1-Filter.db b/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_14766/legacy_tables-legacy_ka_14766-ka-1-Filter.db
new file mode 100644
index 0000000..7a31048
Binary files /dev/null and b/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_14766/legacy_tables-legacy_ka_14766-ka-1-Filter.db differ
http://git-wip-us.apache.org/repos/asf/cassandra/blob/45937def/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_14766/legacy_tables-legacy_ka_14766-ka-1-Index.db
----------------------------------------------------------------------
diff --git a/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_14766/legacy_tables-legacy_ka_14766-ka-1-Index.db b/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_14766/legacy_tables-legacy_ka_14766-ka-1-Index.db
new file mode 100644
index 0000000..5e4995c
Binary files /dev/null and b/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_14766/legacy_tables-legacy_ka_14766-ka-1-Index.db differ
http://git-wip-us.apache.org/repos/asf/cassandra/blob/45937def/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_14766/legacy_tables-legacy_ka_14766-ka-1-Statistics.db
----------------------------------------------------------------------
diff --git a/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_14766/legacy_tables-legacy_ka_14766-ka-1-Statistics.db b/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_14766/legacy_tables-legacy_ka_14766-ka-1-Statistics.db
new file mode 100644
index 0000000..d4b0526
Binary files /dev/null and b/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_14766/legacy_tables-legacy_ka_14766-ka-1-Statistics.db differ
http://git-wip-us.apache.org/repos/asf/cassandra/blob/45937def/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_14766/legacy_tables-legacy_ka_14766-ka-1-Summary.db
----------------------------------------------------------------------
diff --git a/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_14766/legacy_tables-legacy_ka_14766-ka-1-Summary.db b/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_14766/legacy_tables-legacy_ka_14766-ka-1-Summary.db
new file mode 100644
index 0000000..38cc933
Binary files /dev/null and b/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_14766/legacy_tables-legacy_ka_14766-ka-1-Summary.db differ
http://git-wip-us.apache.org/repos/asf/cassandra/blob/45937def/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_14766/legacy_tables-legacy_ka_14766-ka-1-TOC.txt
----------------------------------------------------------------------
diff --git a/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_14766/legacy_tables-legacy_ka_14766-ka-1-TOC.txt b/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_14766/legacy_tables-legacy_ka_14766-ka-1-TOC.txt
new file mode 100644
index 0000000..db5ac46
--- /dev/null
+++ b/test/data/legacy-sstables/ka/legacy_tables/legacy_ka_14766/legacy_tables-legacy_ka_14766-ka-1-TOC.txt
@@ -0,0 +1,8 @@
+Data.db
+TOC.txt
+Digest.sha1
+Filter.db
+Statistics.db
+CompressionInfo.db
+Summary.db
+Index.db
http://git-wip-us.apache.org/repos/asf/cassandra/blob/45937def/test/unit/org/apache/cassandra/io/sstable/LegacySSTableTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/io/sstable/LegacySSTableTest.java b/test/unit/org/apache/cassandra/io/sstable/LegacySSTableTest.java
index ede4ab6..f10114b 100644
--- a/test/unit/org/apache/cassandra/io/sstable/LegacySSTableTest.java
+++ b/test/unit/org/apache/cassandra/io/sstable/LegacySSTableTest.java
@@ -188,7 +188,32 @@ public class LegacySSTableTest
}
@Test
- public void verifyOldSSTables() throws Exception
+ public void test14766() throws Exception
+ {
+ /*
+ * During upgrades from 2.1 to 3.0, reading from old sstables in reverse order could omit the very last row if the
+ * last indexed block had only two Unfiltered-s. See CASSANDRA-14766 for details.
+ *
+ * The sstable used here has two indexed blocks, with 2 cells/rows of ~500 bytes each, with column index interval of 1kb.
+ * Without the fix SELECT * returns 4 rows in ASC order, but only 3 rows in DESC order, omitting the last one.
+ */
+
+ QueryProcessor.executeInternal("CREATE TABLE legacy_tables.legacy_ka_14766 (pk int, ck int, value text, PRIMARY KEY (pk, ck));");
+ loadLegacyTable("legacy_%s_14766%s", "ka", "");
+
+ UntypedResultSet rs;
+
+ // read all rows in ASC order, expect all 4 to be returned
+ rs = QueryProcessor.executeInternal("SELECT * FROM legacy_tables.legacy_ka_14766 WHERE pk = 0 ORDER BY ck ASC;");
+ Assert.assertEquals(4, rs.size());
+
+ // read all rows in DESC order, expect all 4 to be returned
+ rs = QueryProcessor.executeInternal("SELECT * FROM legacy_tables.legacy_ka_14766 WHERE pk = 0 ORDER BY ck DESC;");
+ Assert.assertEquals(4, rs.size());
+ }
+
+ @Test
+ public void testVerifyOldSSTables() throws Exception
{
for (String legacyVersion : legacyVersions)
{
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@cassandra.apache.org
For additional commands, e-mail: commits-help@cassandra.apache.org