You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by jb...@apache.org on 2013/11/28 18:55:32 UTC
git commit: Make flush single-pass when partition tombstones are
present patch by jbellis; reviewed by slebresne for CASSANDRA-6417
Updated Branches:
refs/heads/trunk 122bb6b20 -> 58bcdd493
Make flush single-pass when partition tombstones are present
patch by jbellis; reviewed by slebresne for CASSANDRA-6417
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/58bcdd49
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/58bcdd49
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/58bcdd49
Branch: refs/heads/trunk
Commit: 58bcdd493280252d69151361676e60ce8974736d
Parents: 122bb6b
Author: Jonathan Ellis <jb...@apache.org>
Authored: Thu Nov 28 11:55:04 2013 -0600
Committer: Jonathan Ellis <jb...@apache.org>
Committed: Thu Nov 28 11:55:04 2013 -0600
----------------------------------------------------------------------
.../org/apache/cassandra/db/ColumnFamilyStore.java | 16 ++--------------
src/java/org/apache/cassandra/db/ColumnIndex.java | 16 +++++++++++-----
src/java/org/apache/cassandra/db/DeletionTime.java | 4 ++--
src/java/org/apache/cassandra/db/Memtable.java | 10 ----------
4 files changed, 15 insertions(+), 31 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/cassandra/blob/58bcdd49/src/java/org/apache/cassandra/db/ColumnFamilyStore.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/ColumnFamilyStore.java b/src/java/org/apache/cassandra/db/ColumnFamilyStore.java
index ccc15ab..86bb7b4 100644
--- a/src/java/org/apache/cassandra/db/ColumnFamilyStore.java
+++ b/src/java/org/apache/cassandra/db/ColumnFamilyStore.java
@@ -873,7 +873,7 @@ public class ColumnFamilyStore implements ColumnFamilyStoreMBean
}
/*
- This is complicated because we need to preserve deleted columns, supercolumns, and columnfamilies
+ This is complicated because we need to preserve deleted columns and columnfamilies
until they have been deleted for at least GC_GRACE_IN_SECONDS. But, we do not need to preserve
their contents; just the object itself as a "tombstone" that can be used to repair other
replicas that do not know about the deletion.
@@ -885,16 +885,9 @@ public class ColumnFamilyStore implements ColumnFamilyStoreMBean
return null;
}
- removeDeletedColumnsOnly(cf, gcBefore, indexer);
- return removeDeletedCF(cf, gcBefore);
- }
-
- private static long removeDeletedColumnsOnly(ColumnFamily cf, int gcBefore, SecondaryIndexManager.Updater indexer)
- {
Iterator<Column> iter = cf.iterator();
DeletionInfo.InOrderTester tester = cf.inOrderDeletionTester();
boolean hasDroppedColumns = !cf.metadata.getDroppedColumns().isEmpty();
- long removedBytes = 0;
while (iter.hasNext())
{
Column c = iter.next();
@@ -906,15 +899,10 @@ public class ColumnFamilyStore implements ColumnFamilyStoreMBean
{
iter.remove();
indexer.remove(c);
- removedBytes += c.dataSize();
}
}
- return removedBytes;
- }
- public static long removeDeletedColumnsOnly(ColumnFamily cf, int gcBefore)
- {
- return removeDeletedColumnsOnly(cf, gcBefore, SecondaryIndexManager.nullUpdater);
+ return removeDeletedCF(cf, gcBefore);
}
// returns true if
http://git-wip-us.apache.org/repos/asf/cassandra/blob/58bcdd49/src/java/org/apache/cassandra/db/ColumnIndex.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/ColumnIndex.java b/src/java/org/apache/cassandra/db/ColumnIndex.java
index 6dd2028..8729238 100644
--- a/src/java/org/apache/cassandra/db/ColumnIndex.java
+++ b/src/java/org/apache/cassandra/db/ColumnIndex.java
@@ -119,18 +119,25 @@ public class ColumnIndex
public ColumnIndex build(ColumnFamily cf) throws IOException
{
// cf has disentangled the columns and range tombstones, we need to re-interleave them in comparator order
+ Comparator<ByteBuffer> comparator = cf.getComparator();
+ DeletionInfo.InOrderTester tester = cf.deletionInfo().inOrderTester();
Iterator<RangeTombstone> rangeIter = cf.deletionInfo().rangeIterator();
RangeTombstone tombstone = rangeIter.hasNext() ? rangeIter.next() : null;
- Comparator<ByteBuffer> comparator = cf.getComparator();
for (Column c : cf)
{
while (tombstone != null && comparator.compare(c.name(), tombstone.min) >= 0)
{
- add(tombstone);
+ // skip range tombstones that are shadowed by partition tombstones
+ if (!cf.deletionInfo().getTopLevelDeletion().isDeleted(tombstone))
+ add(tombstone);
tombstone = rangeIter.hasNext() ? rangeIter.next() : null;
}
- add(c);
+
+ // We can skip any cell if it's shadowed by a tombstone already. This is a more
+ // general case than was handled by CASSANDRA-2589.
+ if (!tester.isDeleted(c))
+ add(c);
}
while (tombstone != null)
@@ -158,9 +165,8 @@ public class ColumnIndex
OnDiskAtom c = columns.next();
add(c);
}
- ColumnIndex index = build();
- return index;
+ return build();
}
public void add(OnDiskAtom column) throws IOException
http://git-wip-us.apache.org/repos/asf/cassandra/blob/58bcdd49/src/java/org/apache/cassandra/db/DeletionTime.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/DeletionTime.java b/src/java/org/apache/cassandra/db/DeletionTime.java
index 3d6fad4..79244a6 100644
--- a/src/java/org/apache/cassandra/db/DeletionTime.java
+++ b/src/java/org/apache/cassandra/db/DeletionTime.java
@@ -83,9 +83,9 @@ public class DeletionTime implements Comparable<DeletionTime>
return localDeletionTime < gcBefore;
}
- public boolean isDeleted(Column column)
+ public boolean isDeleted(OnDiskAtom atom)
{
- return column.timestamp() <= markedForDeleteAt;
+ return atom.maxTimestamp() <= markedForDeleteAt;
}
public long memorySize()
http://git-wip-us.apache.org/repos/asf/cassandra/blob/58bcdd49/src/java/org/apache/cassandra/db/Memtable.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/Memtable.java b/src/java/org/apache/cassandra/db/Memtable.java
index 6819cda..064851a 100644
--- a/src/java/org/apache/cassandra/db/Memtable.java
+++ b/src/java/org/apache/cassandra/db/Memtable.java
@@ -351,16 +351,6 @@ public class Memtable
// See CASSANDRA-4667.
if (cfs.name.equals(SystemKeyspace.BATCHLOG_CF) && cfs.keyspace.getName().equals(Keyspace.SYSTEM_KS) && !(cf.getColumnCount() == 0))
continue;
-
- // Pedantically, you could purge column level tombstones that are past GcGRace when writing to the SSTable.
- // But it can result in unexpected behaviour where deletes never make it to disk,
- // as they are lost and so cannot override existing column values. So we only remove deleted columns if there
- // is a CF level tombstone to ensure the delete makes it into an SSTable.
- // We also shouldn't be dropping any columns obsoleted by partition and/or range tombstones in case
- // the table has secondary indexes, or else the stale entries wouldn't be cleaned up during compaction,
- // and will only be dropped during 2i query read-repair, if at all.
- if (!cfs.indexManager.hasIndexes())
- currentSize.addAndGet(-ColumnFamilyStore.removeDeletedColumnsOnly(cf, Integer.MIN_VALUE));
}
if (cf.getColumnCount() > 0 || cf.isMarkedForDelete())