You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by jb...@apache.org on 2012/06/15 00:40:40 UTC
[2/3] git commit: avoid promoting tiny sstables patch by jbellis;
reviewed by yukim for CASSANDRA-4341
avoid promoting tiny sstables
patch by jbellis; reviewed by yukim for CASSANDRA-4341
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/a4fab90a
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/a4fab90a
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/a4fab90a
Branch: refs/heads/trunk
Commit: a4fab90af0063110e1118fb69b9c5923a415b9d3
Parents: 33f1bac
Author: Jonathan Ellis <jb...@apache.org>
Authored: Thu Jun 14 14:30:36 2012 -0500
Committer: Jonathan Ellis <jb...@apache.org>
Committed: Thu Jun 14 17:40:15 2012 -0500
----------------------------------------------------------------------
CHANGES.txt | 1 +
.../cassandra/db/compaction/LeveledManifest.java | 71 +++++++++++----
2 files changed, 55 insertions(+), 17 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/cassandra/blob/a4fab90a/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index fcfff37..b0e667d 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
1.1.2
+ * LCS no longer promotes tiny sstables out of L0 (CASSANDRA-4341)
* skip tombstones during hint replay (CASSANDRA-4320)
* fix NPE in compactionstats (CASSANDRA-4318)
* enforce 1m min keycache for auto (CASSANDRA-4306)
http://git-wip-us.apache.org/repos/asf/cassandra/blob/a4fab90a/src/java/org/apache/cassandra/db/compaction/LeveledManifest.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/compaction/LeveledManifest.java b/src/java/org/apache/cassandra/db/compaction/LeveledManifest.java
index b2ba857..4ed5fac 100644
--- a/src/java/org/apache/cassandra/db/compaction/LeveledManifest.java
+++ b/src/java/org/apache/cassandra/db/compaction/LeveledManifest.java
@@ -63,12 +63,12 @@ public class LeveledManifest
private final List<SSTableReader>[] generations;
private final Map<SSTableReader, Integer> sstableGenerations;
private final RowPosition[] lastCompactedKeys;
- private final int maxSSTableSizeInMB;
+ private final int maxSSTableSizeInBytes;
private LeveledManifest(ColumnFamilyStore cfs, int maxSSTableSizeInMB)
{
this.cfs = cfs;
- this.maxSSTableSizeInMB = maxSSTableSizeInMB;
+ this.maxSSTableSizeInBytes = maxSSTableSizeInMB * 1024 * 1024;
// allocate enough generations for a PB of data
int n = (int) Math.log10(1000 * 1000 * 1000 / maxSSTableSizeInMB);
@@ -178,9 +178,18 @@ public class LeveledManifest
if (!added.iterator().hasNext())
return;
- int newLevel = minimumLevel == maximumLevel ? maximumLevel + 1 : maximumLevel;
- newLevel = skipLevels(newLevel, added);
- assert newLevel > 0;
+ int newLevel;
+ if (minimumLevel == 0 && maximumLevel == 0 && SSTable.getTotalBytes(removed) < maxSSTableSizeInBytes)
+ {
+ // special case for tiny L0 sstables; see CASSANDRA-4341
+ newLevel = 0;
+ }
+ else
+ {
+ newLevel = minimumLevel == maximumLevel ? maximumLevel + 1 : maximumLevel;
+ newLevel = skipLevels(newLevel, added);
+ assert newLevel > 0;
+ }
if (logger.isDebugEnabled())
logger.debug("Adding [{}] at L{}", toString(added), newLevel);
@@ -225,8 +234,8 @@ public class LeveledManifest
private long maxBytesForLevel(int level)
{
if (level == 0)
- return 4L * maxSSTableSizeInMB * 1024 * 1024;
- double bytes = Math.pow(10, level) * maxSSTableSizeInMB * 1024 * 1024;
+ return 4L * maxSSTableSizeInBytes;
+ double bytes = Math.pow(10, level) * maxSSTableSizeInBytes;
if (bytes > Long.MAX_VALUE)
throw new RuntimeException("At most " + Long.MAX_VALUE + " bytes may be in a compaction level; your maxSSTableSize must be absurdly high to compute " + bytes);
return (long) bytes;
@@ -265,7 +274,7 @@ public class LeveledManifest
// L0 gets a special case that if we don't have anything more important to do,
// we'll go ahead and compact even just one sstable
- if (score > 1.001 || i == 0)
+ if (score > 1.001 || (i == 0 && sstables.size() > 1))
{
Collection<SSTableReader> candidates = getCandidatesFor(i);
@@ -368,17 +377,45 @@ public class LeveledManifest
if (level == 0)
{
- // because L0 files may overlap each other, we treat compactions there specially:
- // a L0 compaction also checks other L0 files for overlap.
+ // L0 is the dumping ground for new sstables which thus may overlap each other.
+ //
+ // We treat L0 compactions specially:
+ // 1a. add sstables to the candidate set until we have at least maxSSTableSizeInMB
+ // 1b. prefer choosing older sstables as candidates, to newer ones
+ // 1c. any L0 sstables that overlap a candidate, will also become candidates
+ // 2. At most MAX_COMPACTING_L0 sstables will be compacted at once
+ // 3. If total candidate size is less than maxSSTableSizeInMB, we won't bother compacting with L1,
+ // and the result of the compaction will stay in L0 instead of being promoted (see promote())
Set<SSTableReader> candidates = new HashSet<SSTableReader>();
- // pick the oldest sstable from L0, and any that overlap with it
+ Set<SSTableReader> remaining = new HashSet<SSTableReader>(generations[0]);
List<SSTableReader> ageSortedSSTables = new ArrayList<SSTableReader>(generations[0]);
Collections.sort(ageSortedSSTables, SSTable.maxTimestampComparator);
- List<SSTableReader> L0 = overlapping(ageSortedSSTables.get(0), generations[0]);
- L0 = L0.size() > MAX_COMPACTING_L0 ? L0.subList(0, MAX_COMPACTING_L0) : L0;
- // add the overlapping ones from L1
- for (SSTableReader sstable : L0)
- candidates.addAll(overlapping(sstable, generations[1]));
+ for (SSTableReader sstable : ageSortedSSTables)
+ {
+ if (candidates.contains(sstable))
+ continue;
+
+ List<SSTableReader> newCandidates = overlapping(sstable, remaining);
+ candidates.addAll(newCandidates);
+ remaining.removeAll(newCandidates);
+
+ if (candidates.size() > MAX_COMPACTING_L0)
+ {
+ // limit to only the MAX_COMPACTING_L0 oldest candidates
+ List<SSTableReader> ageSortedCandidates = new ArrayList<SSTableReader>(candidates);
+ Collections.sort(ageSortedCandidates, SSTable.maxTimestampComparator);
+ return ageSortedCandidates.subList(0, MAX_COMPACTING_L0);
+ }
+
+ if (SSTable.getTotalBytes(candidates) > maxSSTableSizeInBytes)
+ {
+ // add sstables from L1 that overlap candidates
+ for (SSTableReader candidate : new ArrayList<SSTableReader>(candidates))
+ candidates.addAll(overlapping(candidate, generations[1]));
+ break;
+ }
+ }
+
return candidates;
}
@@ -470,7 +507,7 @@ public class LeveledManifest
for (int i = generations.length - 1; i >= 0; i--)
{
List<SSTableReader> sstables = generations[i];
- estimated[i] = Math.max(0L, SSTableReader.getTotalBytes(sstables) - maxBytesForLevel(i)) / (maxSSTableSizeInMB * 1024L * 1024);
+ estimated[i] = Math.max(0L, SSTableReader.getTotalBytes(sstables) - maxBytesForLevel(i)) / maxSSTableSizeInBytes;
tasks += estimated[i];
}