You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by sl...@apache.org on 2012/11/13 08:57:40 UTC
[2/2] git commit: Optimize mostRecentTomstone check in
CC.collectAllData
Optimize mostRecentTomstone check in CC.collectAllData
patch by slebresne; reviewed by jbellis for CASSANDRA-4883
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/53943180
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/53943180
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/53943180
Branch: refs/heads/cassandra-1.2.0
Commit: 53943180a4a41ecc5e145d1e08b0ea4a9d849c8c
Parents: 9118764
Author: Sylvain Lebresne <sy...@datastax.com>
Authored: Tue Nov 13 08:54:37 2012 +0100
Committer: Sylvain Lebresne <sy...@datastax.com>
Committed: Tue Nov 13 08:54:37 2012 +0100
----------------------------------------------------------------------
CHANGES.txt | 1 +
.../org/apache/cassandra/config/CFMetaData.java | 3 +-
.../apache/cassandra/db/CollationController.java | 33 +++++++--------
src/java/org/apache/cassandra/db/DataTracker.java | 31 ++++++--------
.../cassandra/db/CollationControllerTest.java | 20 +++++----
5 files changed, 43 insertions(+), 45 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/cassandra/blob/53943180/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 43fc53a..77cc8a8 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -8,6 +8,7 @@
* Separate tracing from Log4J (CASSANDRA-4861)
* Exclude gcable tombstones from merkle-tree computation (CASSANDRA-4905)
* Better printing of AbstractBounds for tracing (CASSANDRA-4931)
+ * Optimize mostRecentTomstone check in CC.collectAllData (CASSANDRA-4883)
1.2-beta2
http://git-wip-us.apache.org/repos/asf/cassandra/blob/53943180/src/java/org/apache/cassandra/config/CFMetaData.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/config/CFMetaData.java b/src/java/org/apache/cassandra/config/CFMetaData.java
index 921242a..c3a00b1 100644
--- a/src/java/org/apache/cassandra/config/CFMetaData.java
+++ b/src/java/org/apache/cassandra/config/CFMetaData.java
@@ -181,7 +181,8 @@ public final class CFMetaData
+ "thrift_version text,"
+ "cql_version text,"
+ "data_center text,"
- + "rack text"
+ + "rack text,"
+ + "partitioner text,"
+ ") WITH COMMENT='information about the local node'");
public static final CFMetaData TraceSessionsCf = compile(14, "CREATE TABLE " + Tracing.SESSIONS_CF + " ("
http://git-wip-us.apache.org/repos/asf/cassandra/blob/53943180/src/java/org/apache/cassandra/db/CollationController.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/CollationController.java b/src/java/org/apache/cassandra/db/CollationController.java
index 80d0a2c..73c675f 100644
--- a/src/java/org/apache/cassandra/db/CollationController.java
+++ b/src/java/org/apache/cassandra/db/CollationController.java
@@ -241,13 +241,27 @@ public class CollationController
}
}
+ /*
+ * We can't eliminate full sstables based on the timestamp of what we've already read like
+ * in collectTimeOrderedData, but we still want to eliminate sstable whose maxTimestamp < mostRecentTombstone
+ * we've read. We still rely on the sstable ordering by maxTimestamp since if
+ * maxTimestamp_s1 > maxTimestamp_s0,
+ * we're guaranteed that s1 cannot have a row tombstone such that
+ * timestamp(tombstone) > maxTimestamp_s0
+ * since we necessarily have
+ * timestamp(tombstone) <= maxTimestamp_s1
+ * In othere words, iterating in maxTimestamp order allow to do our mostRecentTombstone elimination
+ * in one pass, and minimize the number of sstables for which we read a rowTombstone.
+ */
+ Collections.sort(view.sstables, SSTable.maxTimestampComparator);
+
long mostRecentRowTombstone = Long.MIN_VALUE;
for (SSTableReader sstable : view.sstables)
{
// if we've already seen a row tombstone with a timestamp greater
// than the most recent update to this sstable, we can skip it
if (sstable.getMaxTimestamp() < mostRecentRowTombstone)
- continue;
+ break;
OnDiskAtomIterator iter = filter.getSSTableColumnIterator(sstable);
iterators.add(iter);
@@ -262,23 +276,6 @@ public class CollationController
}
}
- // If we saw a row tombstone, do a second pass through the iterators we
- // obtained from the sstables and drop any whose maxTimestamp < that of the
- // row tombstone
- {
- Iterator<OnDiskAtomIterator> it = iterators.iterator();
- while (it.hasNext())
- {
- OnDiskAtomIterator iter = it.next();
- if ((iter instanceof ISSTableColumnIterator)
- && ((ISSTableColumnIterator) iter).getSStable().getMaxTimestamp() < mostRecentRowTombstone)
- {
- FileUtils.closeQuietly(iter);
- it.remove();
- }
- }
- }
-
// we need to distinguish between "there is no data at all for this row" (BF will let us rebuild that efficiently)
// and "there used to be data, but it's gone now" (we should cache the empty CF so we don't need to rebuild that slower)
if (iterators.isEmpty())
http://git-wip-us.apache.org/repos/asf/cassandra/blob/53943180/src/java/org/apache/cassandra/db/DataTracker.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/DataTracker.java b/src/java/org/apache/cassandra/db/DataTracker.java
index 85da449..7e8887f 100644
--- a/src/java/org/apache/cassandra/db/DataTracker.java
+++ b/src/java/org/apache/cassandra/db/DataTracker.java
@@ -64,7 +64,7 @@ public class DataTracker
return view.get().memtablesPendingFlush;
}
- public List<SSTableReader> getSSTables()
+ public Set<SSTableReader> getSSTables()
{
return view.get().sstables;
}
@@ -300,7 +300,7 @@ public class DataTracker
{
view.set(new View(new Memtable(cfstore),
Collections.<Memtable>emptySet(),
- Collections.<SSTableReader>emptyList(),
+ Collections.<SSTableReader>emptySet(),
Collections.<SSTableReader>emptySet(),
SSTableIntervalTree.empty()));
}
@@ -456,15 +456,10 @@ public class DataTracker
public final Memtable memtable;
public final Set<Memtable> memtablesPendingFlush;
public final Set<SSTableReader> compacting;
- // We can't use a SortedSet here because "the ordering maintained by a sorted set (whether or not an
- // explicit comparator is provided) must be <i>consistent with equals</i>." In particular,
- // ImmutableSortedSet will ignore any objects that compare equally with an existing Set member.
- // Obviously, dropping sstables whose max column timestamp happens to be equal to another's
- // is not acceptable for us. So, we use a List instead.
- public final List<SSTableReader> sstables;
+ public final Set<SSTableReader> sstables;
public final SSTableIntervalTree intervalTree;
- View(Memtable memtable, Set<Memtable> pendingFlush, List<SSTableReader> sstables, Set<SSTableReader> compacting, SSTableIntervalTree intervalTree)
+ View(Memtable memtable, Set<Memtable> pendingFlush, Set<SSTableReader> sstables, Set<SSTableReader> compacting, SSTableIntervalTree intervalTree)
{
this.memtable = memtable;
this.memtablesPendingFlush = pendingFlush;
@@ -492,18 +487,18 @@ public class DataTracker
public View replaceFlushed(Memtable flushedMemtable, SSTableReader newSSTable)
{
Set<Memtable> newPending = ImmutableSet.copyOf(Sets.difference(memtablesPendingFlush, Collections.singleton(flushedMemtable)));
- List<SSTableReader> newSSTables = newSSTable == null
- ? Collections.<SSTableReader>emptyList()
+ Set<SSTableReader> newSSTables = newSSTable == null
+ ? Collections.<SSTableReader>emptySet()
: newSSTables(newSSTable);
SSTableIntervalTree intervalTree = buildIntervalTree(newSSTables);
- return new View(memtable, newPending, Collections.unmodifiableList(newSSTables), compacting, intervalTree);
+ return new View(memtable, newPending, newSSTables, compacting, intervalTree);
}
public View replace(Collection<SSTableReader> oldSSTables, Iterable<SSTableReader> replacements)
{
- List<SSTableReader> newSSTables = newSSTables(oldSSTables, replacements);
+ Set<SSTableReader> newSSTables = newSSTables(oldSSTables, replacements);
SSTableIntervalTree intervalTree = buildIntervalTree(newSSTables);
- return new View(memtable, memtablesPendingFlush, Collections.unmodifiableList(newSSTables), compacting, intervalTree);
+ return new View(memtable, memtablesPendingFlush, newSSTables, compacting, intervalTree);
}
public View markCompacting(Collection<SSTableReader> tomark)
@@ -518,19 +513,19 @@ public class DataTracker
return new View(memtable, memtablesPendingFlush, sstables, compactingNew, intervalTree);
}
- private List<SSTableReader> newSSTables(SSTableReader newSSTable)
+ private Set<SSTableReader> newSSTables(SSTableReader newSSTable)
{
assert newSSTable != null;
// not performance-sensitive, don't obsess over doing a selection merge here
return newSSTables(Collections.<SSTableReader>emptyList(), Collections.singletonList(newSSTable));
}
- private List<SSTableReader> newSSTables(Collection<SSTableReader> oldSSTables, Iterable<SSTableReader> replacements)
+ private Set<SSTableReader> newSSTables(Collection<SSTableReader> oldSSTables, Iterable<SSTableReader> replacements)
{
ImmutableSet<SSTableReader> oldSet = ImmutableSet.copyOf(oldSSTables);
int newSSTablesSize = sstables.size() - oldSSTables.size() + Iterables.size(replacements);
assert newSSTablesSize >= Iterables.size(replacements) : String.format("Incoherent new size %d replacing %s by %s in %s", newSSTablesSize, oldSSTables, replacements, this);
- List<SSTableReader> newSSTables = new ArrayList<SSTableReader>(newSSTablesSize);
+ Set<SSTableReader> newSSTables = new HashSet<SSTableReader>(newSSTablesSize);
for (SSTableReader sstable : sstables)
{
if (!oldSet.contains(sstable))
@@ -538,7 +533,7 @@ public class DataTracker
}
Iterables.addAll(newSSTables, replacements);
assert newSSTables.size() == newSSTablesSize : String.format("Expecting new size of %d, got %d while replacing %s by %s in %s", newSSTablesSize, newSSTables.size(), oldSSTables, replacements, this);
- return newSSTables;
+ return ImmutableSet.copyOf(newSSTables);
}
@Override
http://git-wip-us.apache.org/repos/asf/cassandra/blob/53943180/test/unit/org/apache/cassandra/db/CollationControllerTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/db/CollationControllerTest.java b/test/unit/org/apache/cassandra/db/CollationControllerTest.java
index f469639..4a8e426 100644
--- a/test/unit/org/apache/cassandra/db/CollationControllerTest.java
+++ b/test/unit/org/apache/cassandra/db/CollationControllerTest.java
@@ -30,6 +30,8 @@ import org.apache.cassandra.db.filter.QueryPath;
import org.apache.cassandra.utils.ByteBufferUtil;
import org.junit.Test;
+import org.apache.cassandra.io.sstable.SSTableReader;
+
public class CollationControllerTest extends SchemaLoader
{
@Test
@@ -61,20 +63,22 @@ public class CollationControllerTest extends SchemaLoader
store.forceBlockingFlush();
+ // add yet one more mutation
+ rm = new RowMutation("Keyspace1", dk.key);
+ rm.add(path, ByteBufferUtil.bytes("foobar"), 30);
+ rm.apply();
+ store.forceBlockingFlush();
+
// A NamesQueryFilter goes down one code path (through collectTimeOrderedData())
+ // It should only iterate the last flushed sstable, since it probably contains the most recent value for Column1
QueryFilter filter = QueryFilter.getNamesFilter(dk, path, ByteBufferUtil.bytes("Column1"));
CollationController controller = new CollationController(store, false, filter, Integer.MIN_VALUE);
controller.getTopLevelColumns();
assertEquals(1, controller.getSstablesIterated());
-
+
// SliceQueryFilter goes down another path (through collectAllData())
- // Add another mutation, with a lower timestamp then force another flush
- // so we can assert that we're not reading every sstable
- rm = new RowMutation("Keyspace1", dk.key);
- rm.add(path, ByteBufferUtil.bytes("asdf"), 5);
- rm.apply();
- store.forceBlockingFlush();
-
+ // We will read "only" the last sstable in that case, but because the 2nd sstable has a tombstone that is more
+ // recent than the maxTimestamp of the very first sstable we flushed, we should only read the 2 first sstables.
filter = QueryFilter.getIdentityFilter(dk, path);
controller = new CollationController(store, false, filter, Integer.MIN_VALUE);
controller.getTopLevelColumns();