You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by ma...@apache.org on 2015/08/17 08:55:23 UTC
cassandra git commit: Make getFullyExpiredSSTables less expensive.
Repository: cassandra
Updated Branches:
refs/heads/cassandra-2.0 50e5802a7 -> f53aacba8
Make getFullyExpiredSSTables less expensive.
Patch by marcuse; reviewed by yukim for CASSANDRA-9882
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/f53aacba
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/f53aacba
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/f53aacba
Branch: refs/heads/cassandra-2.0
Commit: f53aacba80de3c3441e1524be9498f6d15c5c411
Parents: 50e5802
Author: Marcus Eriksson <ma...@apache.org>
Authored: Mon Aug 17 08:29:45 2015 +0200
Committer: Marcus Eriksson <ma...@apache.org>
Committed: Mon Aug 17 08:29:45 2015 +0200
----------------------------------------------------------------------
CHANGES.txt | 1 +
.../apache/cassandra/db/ColumnFamilyStore.java | 55 +++++++++++++++++---
.../DateTieredCompactionStrategy.java | 13 ++++-
.../DateTieredCompactionStrategyTest.java | 1 +
4 files changed, 60 insertions(+), 10 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/cassandra/blob/f53aacba/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 7d84538..905445e 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
2.0.17
+ * Make getFullyExpiredSSTables less expensive (CASSANDRA-9882)
* Add tool to find why expired sstables are not getting dropped (CASSANDRA-10015)
* Remove erroneous pending HH tasks from tpstats/jmx (CASSANDRA-9129)
* Don't cast expected bf size to an int (CASSANDRA-9959)
http://git-wip-us.apache.org/repos/asf/cassandra/blob/f53aacba/src/java/org/apache/cassandra/db/ColumnFamilyStore.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/ColumnFamilyStore.java b/src/java/org/apache/cassandra/db/ColumnFamilyStore.java
index c125cf0..eb688f7 100644
--- a/src/java/org/apache/cassandra/db/ColumnFamilyStore.java
+++ b/src/java/org/apache/cassandra/db/ColumnFamilyStore.java
@@ -1028,17 +1028,56 @@ public class ColumnFamilyStore implements ColumnFamilyStoreMBean
if (sstables.isEmpty())
return ImmutableSet.of();
- DataTracker.SSTableIntervalTree tree = data.getView().intervalTree;
-
- Set<SSTableReader> results = null;
- for (SSTableReader sstable : sstables)
+ List<SSTableReader> sortedByFirst = new ArrayList<>(sstables);
+ Collections.sort(sortedByFirst, new Comparator<SSTableReader>()
{
- Set<SSTableReader> overlaps = ImmutableSet.copyOf(tree.search(Interval.<RowPosition, SSTableReader>create(sstable.first, sstable.last)));
- results = results == null ? overlaps : Sets.union(results, overlaps).immutableCopy();
+ @Override
+ public int compare(SSTableReader o1, SSTableReader o2)
+ {
+ return o1.first.compareTo(o2.first);
+ }
+ });
+ List<Interval<RowPosition, SSTableReader>> intervals = new ArrayList<>();
+ DecoratedKey first = null, last = null;
+ /*
+ normalize the intervals covered by the sstables
+ assume we have sstables like this (brackets representing first/last key in the sstable);
+ [ ] [ ] [ ] [ ]
+ [ ] [ ]
+ then we can, instead of searching the interval tree 6 times, normalize the intervals and
+ only query the tree 2 times, for these intervals;
+ [ ] [ ]
+ */
+ for (SSTableReader sstable : sortedByFirst)
+ {
+ if (first == null)
+ {
+ first = sstable.first;
+ last = sstable.last;
+ }
+ else
+ {
+ if (sstable.first.compareTo(last) <= 0) // we do overlap
+ {
+ if (sstable.last.compareTo(last) > 0)
+ last = sstable.last;
+ }
+ else
+ {
+ intervals.add(Interval.<RowPosition, SSTableReader>create(first, last));
+ first = sstable.first;
+ last = sstable.last;
+ }
+ }
}
- results = Sets.difference(results, ImmutableSet.copyOf(sstables));
+ intervals.add(Interval.<RowPosition, SSTableReader>create(first, last));
+ DataTracker.SSTableIntervalTree tree = data.getView().intervalTree;
+ Set<SSTableReader> results = new HashSet<>();
+
+ for (Interval<RowPosition, SSTableReader> interval : intervals)
+ results.addAll(tree.search(interval));
- return results;
+ return Sets.difference(results, ImmutableSet.copyOf(sstables));
}
/**
http://git-wip-us.apache.org/repos/asf/cassandra/blob/f53aacba/src/java/org/apache/cassandra/db/compaction/DateTieredCompactionStrategy.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/compaction/DateTieredCompactionStrategy.java b/src/java/org/apache/cassandra/db/compaction/DateTieredCompactionStrategy.java
index df28bd4..fea4995 100644
--- a/src/java/org/apache/cassandra/db/compaction/DateTieredCompactionStrategy.java
+++ b/src/java/org/apache/cassandra/db/compaction/DateTieredCompactionStrategy.java
@@ -18,6 +18,7 @@
package org.apache.cassandra.db.compaction;
import java.util.*;
+import java.util.concurrent.TimeUnit;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Predicate;
@@ -37,6 +38,8 @@ public class DateTieredCompactionStrategy extends AbstractCompactionStrategy
protected DateTieredCompactionStrategyOptions options;
protected volatile int estimatedRemainingTasks;
+ @VisibleForTesting
+ long lastExpiredCheck;
public DateTieredCompactionStrategy(ColumnFamilyStore cfs, Map<String, String> options)
{
@@ -83,8 +86,14 @@ public class DateTieredCompactionStrategy extends AbstractCompactionStrategy
Set<SSTableReader> uncompacting = cfs.getUncompactingSSTables();
- // Find fully expired SSTables. Those will be included no matter what.
- Set<SSTableReader> expired = CompactionController.getFullyExpiredSSTables(cfs, uncompacting, cfs.getOverlappingSSTables(uncompacting), gcBefore);
+ Set<SSTableReader> expired = Collections.emptySet();
+ // we only check for expired sstables every 10 minutes due to it being an expensive operation
+ if (System.currentTimeMillis() - lastExpiredCheck > TimeUnit.MINUTES.toMillis(10))
+ {
+ // Find fully expired SSTables. Those will be included no matter what.
+ expired = CompactionController.getFullyExpiredSSTables(cfs, uncompacting, cfs.getOverlappingSSTables(uncompacting), gcBefore);
+ lastExpiredCheck = System.currentTimeMillis();
+ }
Set<SSTableReader> candidates = Sets.newHashSet(filterSuspectSSTables(uncompacting));
List<SSTableReader> compactionCandidates = new ArrayList<>(getNextNonExpiredSSTables(Sets.difference(candidates, expired), gcBefore));
http://git-wip-us.apache.org/repos/asf/cassandra/blob/f53aacba/test/unit/org/apache/cassandra/db/compaction/DateTieredCompactionStrategyTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/db/compaction/DateTieredCompactionStrategyTest.java b/test/unit/org/apache/cassandra/db/compaction/DateTieredCompactionStrategyTest.java
index d7caf6b..0084a16 100644
--- a/test/unit/org/apache/cassandra/db/compaction/DateTieredCompactionStrategyTest.java
+++ b/test/unit/org/apache/cassandra/db/compaction/DateTieredCompactionStrategyTest.java
@@ -308,6 +308,7 @@ public class DateTieredCompactionStrategyTest extends SchemaLoader
DateTieredCompactionStrategy dtcs = new DateTieredCompactionStrategy(cfs, options);
dtcs.startup();
assertNull(dtcs.getNextBackgroundTask((int) (System.currentTimeMillis() / 1000)));
+ dtcs.lastExpiredCheck = 0;
Thread.sleep(2000);
AbstractCompactionTask t = dtcs.getNextBackgroundTask((int) (System.currentTimeMillis()/1000));
assertNotNull(t);