You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by xe...@apache.org on 2012/01/10 18:41:12 UTC
[4/4] git commit: Fix assertionError for CF with gc_grace = 0
Fix assertionError for CF with gc_grace = 0
patch by slebresne and jbellis for CASSANDRA-3579
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/89e54091
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/89e54091
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/89e54091
Branch: refs/heads/trunk
Commit: 89e54091fd2b55363f3a4917d0883d688c9e8a9c
Parents: 621bc0c
Author: Sylvain Lebresne <sy...@datastax.com>
Authored: Mon Jan 9 17:56:00 2012 +0100
Committer: Sylvain Lebresne <sy...@datastax.com>
Committed: Mon Jan 9 17:56:00 2012 +0100
----------------------------------------------------------------------
CHANGES.txt | 1 +
.../cassandra/db/AbstractColumnContainer.java | 2 +-
.../org/apache/cassandra/db/ColumnFamilyStore.java | 13 +++++++------
.../org/apache/cassandra/db/ExpiringColumn.java | 12 ++++++++++++
.../apache/cassandra/db/filter/QueryFilter.java | 2 +-
5 files changed, 22 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/cassandra/blob/89e54091/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 859401a..cadd3cc 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -17,6 +17,7 @@
* Reset min/max compaction threshold when creating size tiered compaction
strategy (CASSANDRA-3666)
* Don't ignore IOException during compaction (CASSANDRA-3655)
+ * Fix assertion error for CF with gc_grace=0 (CASSANDRA-3579)
Merged from 0.8:
* avoid logging (harmless) exception when GC takes < 1ms (CASSANDRA-3656)
* prevent new nodes from thinking down nodes are up forever (CASSANDRA-3626)
http://git-wip-us.apache.org/repos/asf/cassandra/blob/89e54091/src/java/org/apache/cassandra/db/AbstractColumnContainer.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/AbstractColumnContainer.java b/src/java/org/apache/cassandra/db/AbstractColumnContainer.java
index c184f9f..87e75eb 100644
--- a/src/java/org/apache/cassandra/db/AbstractColumnContainer.java
+++ b/src/java/org/apache/cassandra/db/AbstractColumnContainer.java
@@ -102,7 +102,7 @@ public abstract class AbstractColumnContainer implements IColumnContainer, IIter
// Stop if either we don't need to change the deletion info (it's
// still MIN_VALUE or not expired yet) or we've succesfully changed it
if (current.localDeletionTime == Integer.MIN_VALUE
- || current.localDeletionTime > gcBefore
+ || current.localDeletionTime >= gcBefore
|| deletionInfo.compareAndSet(current, new DeletionInfo()))
{
break;
http://git-wip-us.apache.org/repos/asf/cassandra/blob/89e54091/src/java/org/apache/cassandra/db/ColumnFamilyStore.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/ColumnFamilyStore.java b/src/java/org/apache/cassandra/db/ColumnFamilyStore.java
index 983adc9..151564c 100644
--- a/src/java/org/apache/cassandra/db/ColumnFamilyStore.java
+++ b/src/java/org/apache/cassandra/db/ColumnFamilyStore.java
@@ -823,9 +823,7 @@ public class ColumnFamilyStore implements ColumnFamilyStoreMBean
public static ColumnFamily removeDeletedCF(ColumnFamily cf, int gcBefore)
{
- // in case of a timestamp tie, tombstones get priority over non-tombstones.
- // (we want this to be deterministic to avoid confusion.)
- if (cf.getColumnCount() == 0 && cf.getLocalDeletionTime() <= gcBefore)
+ if (cf.getColumnCount() == 0 && cf.getLocalDeletionTime() < gcBefore)
return null;
cf.maybeResetDeletionTimes(gcBefore);
@@ -867,7 +865,10 @@ public class ColumnFamilyStore implements ColumnFamilyStoreMBean
// remove columns if
// (a) the column itself is tombstoned or
// (b) the CF is tombstoned and the column is not newer than it
- if ((c.isMarkedForDelete() && c.getLocalDeletionTime() <= gcBefore)
+ //
+ // Note that we need the inequality below for case (a) to be strict for expiring columns
+ // to work correctly -- see the comment in ExpiringColumn.isMarkedForDelete().
+ if ((c.isMarkedForDelete() && c.getLocalDeletionTime() < gcBefore)
|| c.timestamp() <= cf.getMarkedForDeleteAt())
{
iter.remove();
@@ -893,12 +894,12 @@ public class ColumnFamilyStore implements ColumnFamilyStoreMBean
// (a) the subcolumn itself is tombstoned or
// (b) the supercolumn is tombstoned and the subcolumn is not newer than it
if (subColumn.timestamp() <= minTimestamp
- || (subColumn.isMarkedForDelete() && subColumn.getLocalDeletionTime() <= gcBefore))
+ || (subColumn.isMarkedForDelete() && subColumn.getLocalDeletionTime() < gcBefore))
{
subIter.remove();
}
}
- if (c.getSubColumns().isEmpty() && c.getLocalDeletionTime() <= gcBefore)
+ if (c.getSubColumns().isEmpty() && c.getLocalDeletionTime() < gcBefore)
{
iter.remove();
}
http://git-wip-us.apache.org/repos/asf/cassandra/blob/89e54091/src/java/org/apache/cassandra/db/ExpiringColumn.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/ExpiringColumn.java b/src/java/org/apache/cassandra/db/ExpiringColumn.java
index cb79590..2340ef0 100644
--- a/src/java/org/apache/cassandra/db/ExpiringColumn.java
+++ b/src/java/org/apache/cassandra/db/ExpiringColumn.java
@@ -77,6 +77,18 @@ public class ExpiringColumn extends Column
@Override
public boolean isMarkedForDelete()
{
+ /*
+ * For compaction, we need to ensure that at all time if
+ * localExpirationTime < gcbefore, then isMarkedForDelete() == true
+ * (otherwise LCR may expire columns between it's two phases compaction -- see #3579).
+ *
+ * Since during compaction we know that at all time, gcbefore <= now
+ * (the = is important in case where gc_grace=0), it follows that to
+ * ensure the propery above we need for isMarkedForDelete to be
+ * now > localExpirationTime (*not* now >= localExpiration). For the
+ * same reason, compaction should consider a column tomstoned if
+ * getLocalDeletionTime() < gcbefore, *not* if getLocalDeletionTime() <= gcbefore.
+ */
return (int) (System.currentTimeMillis() / 1000 ) > localExpirationTime;
}
http://git-wip-us.apache.org/repos/asf/cassandra/blob/89e54091/src/java/org/apache/cassandra/db/filter/QueryFilter.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/filter/QueryFilter.java b/src/java/org/apache/cassandra/db/filter/QueryFilter.java
index c27d636..3405c46 100644
--- a/src/java/org/apache/cassandra/db/filter/QueryFilter.java
+++ b/src/java/org/apache/cassandra/db/filter/QueryFilter.java
@@ -155,7 +155,7 @@ public class QueryFilter
// and if its container is deleted, the column must be changed more recently than the container tombstone (2)
// (since otherwise, the only thing repair cares about is the container tombstone)
long maxChange = column.mostRecentLiveChangeAt();
- return (!column.isMarkedForDelete() || column.getLocalDeletionTime() > gcBefore || maxChange > column.getMarkedForDeleteAt()) // (1)
+ return (!column.isMarkedForDelete() || column.getLocalDeletionTime() >= gcBefore || maxChange > column.getMarkedForDeleteAt()) // (1)
&& (!container.isMarkedForDelete() || maxChange > container.getMarkedForDeleteAt()); // (2)
}