You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by md...@apache.org on 2016/04/20 12:05:15 UTC
svn commit: r1740098 - in /jackrabbit/oak/trunk/oak-segment-next/src:
main/java/org/apache/jackrabbit/oak/plugins/segment/
main/java/org/apache/jackrabbit/oak/plugins/segment/file/
test/java/org/apache/jackrabbit/oak/plugins/segment/
Author: mduerig
Date: Wed Apr 20 10:05:15 2016
New Revision: 1740098
URL: http://svn.apache.org/viewvc?rev=1740098&view=rev
Log:
OAK-3348: Cross gc sessions might introduce references to pre-compacted segments
Implement generation based cleanup strategy: remove data segments that are older than 2 generations and remove bulk segments that are not reachable anymore (mark and sweep).
Modified:
jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java
jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileStore.java
jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarReader.java
jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarWriter.java
jackrabbit/oak/trunk/oak-segment-next/src/test/java/org/apache/jackrabbit/oak/plugins/segment/CompactionAndCleanupIT.java
Modified: jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java?rev=1740098&r1=1740097&r2=1740098&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java (original)
+++ jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java Wed Apr 20 10:05:15 2016
@@ -120,7 +120,7 @@ public class Segment {
static final int BLOBREF_COUNT_OFFSET = 8;
- static final int GC_GEN_OFFSET = 10;
+ public static final int GC_GEN_OFFSET = 10;
private final SegmentTracker tracker;
@@ -312,6 +312,7 @@ public class Segment {
return data.getShort(ROOT_COUNT_OFFSET) & 0xffff;
}
+ // could we make it part of the tar file index!?
public int getGcGen() {
return data.getInt(GC_GEN_OFFSET);
}
Modified: jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileStore.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileStore.java?rev=1740098&r1=1740097&r2=1740098&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileStore.java (original)
+++ jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileStore.java Wed Apr 20 10:05:15 2016
@@ -32,6 +32,7 @@ import static java.util.concurrent.TimeU
import static java.util.concurrent.TimeUnit.MINUTES;
import static org.apache.jackrabbit.oak.commons.IOUtils.humanReadableByteCount;
import static org.apache.jackrabbit.oak.plugins.memory.EmptyNodeState.EMPTY_NODE;
+import static org.apache.jackrabbit.oak.plugins.segment.SegmentId.isDataSegmentId;
import static org.apache.jackrabbit.oak.plugins.segment.compaction.CompactionStrategy.NO_COMPACTION;
import java.io.Closeable;
@@ -837,7 +838,7 @@ public class FileStore implements Segmen
public List<File> cleanup() throws IOException {
Stopwatch watch = Stopwatch.createStarted();
long initialSize = size();
- Set<UUID> referencedIds = newHashSet();
+ Set<UUID> bulkRefs = newHashSet();
Map<TarReader, TarReader> cleaned = newLinkedHashMap();
fileStoreLock.writeLock().lock();
@@ -853,9 +854,10 @@ public class FileStore implements Segmen
System.gc();
for (SegmentId id : tracker.getReferencedSegmentIds()) {
- referencedIds.add(id.asUUID());
+ if (!isDataSegmentId(id.getLeastSignificantBits())) {
+ bulkRefs.add(id.asUUID());
+ }
}
- writer.collectReferences(referencedIds);
for (TarReader reader : readers) {
cleaned.put(reader, reader);
}
@@ -865,11 +867,18 @@ public class FileStore implements Segmen
// Do actual cleanup outside of the lock to prevent blocking
// concurrent writers for a long time
- includeForwardReferences(cleaned.keySet(), referencedIds);
- LinkedList<File> toRemove = newLinkedList();
- Set<UUID> cleanedIds = newHashSet();
+ int generation = getGcGen() - 1;
+ Set<UUID> reclaim = newHashSet();
for (TarReader reader : cleaned.keySet()) {
- cleaned.put(reader, reader.cleanup(referencedIds, cleanedIds));
+ reader.mark(bulkRefs, reclaim, generation);
+ log.info("Size of bulk references/reclaim set {}/{}", bulkRefs.size(), reclaim.size());
+ if (shutdown) {
+ gcMonitor.info("TarMK GC #{}: cleanup interrupted", gcCount);
+ break;
+ }
+ }
+ for (TarReader reader : cleaned.keySet()) {
+ cleaned.put(reader, reader.sweep(reclaim));
if (shutdown) {
gcMonitor.info("TarMK GC #{}: cleanup interrupted", gcCount);
break;
@@ -902,6 +911,7 @@ public class FileStore implements Segmen
// Close old readers *after* setting readers to the new readers to avoid accessing
// a closed reader from readSegment()
+ LinkedList<File> toRemove = newLinkedList();
for (TarReader oldReader : oldReaders) {
closeAndLogOnFail(oldReader);
File file = oldReader.getFile();
@@ -922,28 +932,6 @@ public class FileStore implements Segmen
}
/**
- * Include the ids of all segments transitively reachable through forward references from
- * {@code referencedIds}. See OAK-3864.
- */
- private void includeForwardReferences(Iterable<TarReader> readers, Set<UUID> referencedIds)
- throws IOException {
- Set<UUID> fRefs = newHashSet(referencedIds);
- do {
- // Add direct forward references
- for (TarReader reader : readers) {
- reader.calculateForwardReferences(fRefs);
- if (fRefs.isEmpty()) {
- break; // Optimisation: bail out if no references left
- }
- }
- if (!fRefs.isEmpty()) {
- gcMonitor.info("TarMK GC #{}: cleanup found forward references to {}", gcCount, fRefs);
- }
- // ... as long as new forward references are found.
- } while (referencedIds.addAll(fRefs));
- }
-
- /**
* Returns the cancellation policy for the compaction phase. If the disk
* space was considered insufficient at least once during compaction (or if
* the space was never sufficient to begin with), compaction is considered
@@ -1351,7 +1339,7 @@ public class FileStore implements Segmen
for (UUID uuid : reader.getUUIDs()) {
graph.put(uuid, null);
}
- Map<UUID, List<UUID>> g = reader.getGraph();
+ Map<UUID, List<UUID>> g = reader.getGraph(false);
if (g != null) {
graph.putAll(g);
}
@@ -1419,6 +1407,25 @@ public class FileStore implements Segmen
}
/**
+ * Include the ids of all segments transitively reachable through forward references from
+ * {@code referencedIds}. See OAK-3864.
+ */
+ private static void includeForwardReferences(Iterable<TarReader> readers, Set<UUID> referencedIds)
+ throws IOException {
+ Set<UUID> fRefs = newHashSet(referencedIds);
+ do {
+ // Add direct forward references
+ for (TarReader reader : readers) {
+ reader.calculateForwardReferences(fRefs);
+ if (fRefs.isEmpty()) {
+ break; // Optimisation: bail out if no references left
+ }
+ }
+ // ... as long as new forward references are found.
+ } while (referencedIds.addAll(fRefs));
+ }
+
+ /**
* Build the graph of segments reachable from an initial set of segments
* @param roots the initial set of segments
* @param visitor visitor receiving call back while following the segment graph
@@ -1429,7 +1436,7 @@ public class FileStore implements Segmen
@Nonnull SegmentGraphVisitor visitor) throws IOException {
List<TarReader> readers = super.readers;
- super.includeForwardReferences(readers, roots);
+ includeForwardReferences(readers, roots);
for (TarReader reader : readers) {
reader.traverseSegmentGraph(checkNotNull(roots), checkNotNull(visitor));
}
Modified: jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarReader.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarReader.java?rev=1740098&r1=1740097&r2=1740098&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarReader.java (original)
+++ jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarReader.java Wed Apr 20 10:05:15 2016
@@ -26,6 +26,7 @@ import static com.google.common.collect.
import static com.google.common.collect.Sets.newHashSet;
import static com.google.common.collect.Sets.newHashSetWithExpectedSize;
import static java.util.Collections.singletonList;
+import static org.apache.jackrabbit.oak.plugins.segment.Segment.GC_GEN_OFFSET;
import static org.apache.jackrabbit.oak.plugins.segment.Segment.REF_COUNT_OFFSET;
import static org.apache.jackrabbit.oak.plugins.segment.SegmentId.isDataSegmentId;
import static org.apache.jackrabbit.oak.plugins.segment.file.TarWriter.GRAPH_MAGIC;
@@ -47,7 +48,6 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.CRC32;
-import javax.annotation.CheckForNull;
import javax.annotation.Nonnull;
import org.apache.commons.io.FileUtils;
@@ -506,6 +506,8 @@ class TarReader implements Closeable {
private volatile boolean closed;
+ private volatile boolean hasGraph;
+
private TarReader(File file, FileAccess access, ByteBuffer index) {
this.file = file;
this.access = access;
@@ -650,10 +652,11 @@ class TarReader implements Closeable {
return entries;
}
- @CheckForNull
+ @Nonnull
private List<UUID> getReferences(TarEntry entry, UUID id, Map<UUID, List<UUID>> graph) throws IOException {
if (graph != null) {
- return graph.get(id);
+ List<UUID> uuids = graph.get(id);
+ return uuids == null ? Collections.<UUID>emptyList() : uuids;
} else {
// a pre-compiled graph is not available, so read the
// references directly from this segment
@@ -684,7 +687,7 @@ class TarReader implements Closeable {
@Nonnull SegmentGraphVisitor visitor) throws IOException {
checkNotNull(roots);
checkNotNull(visitor);
- Map<UUID, List<UUID>> graph = getGraph();
+ Map<UUID, List<UUID>> graph = getGraph(false);
TarEntry[] entries = getEntries();
for (int i = entries.length - 1; i >= 0; i--) {
@@ -692,14 +695,9 @@ class TarReader implements Closeable {
UUID id = new UUID(entry.msb(), entry.lsb());
if (roots.remove(id) && isDataSegmentId(entry.lsb())) {
// this is a referenced data segment, so follow the graph
- List<UUID> refIds = getReferences(entry, id, graph);
- if (refIds != null) {
- for (UUID refId : refIds) {
- visitor.accept(id, refId);
- roots.add(refId);
- }
- } else {
- visitor.accept(id, null);
+ for (UUID refId : getReferences(entry, id, graph)) {
+ visitor.accept(id, refId);
+ roots.add(refId);
}
} else {
// this segment is not referenced anywhere
@@ -717,65 +715,70 @@ class TarReader implements Closeable {
*
* @throws IOException
*/
+ // FIXME OAK-3348 reference from utilities only. Can we move this somewhere else?
void calculateForwardReferences(Set<UUID> referencedIds) throws IOException {
- Map<UUID, List<UUID>> graph = getGraph();
+ Map<UUID, List<UUID>> graph = getGraph(false);
TarEntry[] entries = getEntries();
for (int i = entries.length - 1; i >= 0; i--) {
TarEntry entry = entries[i];
UUID id = new UUID(entry.msb(), entry.lsb());
if (referencedIds.remove(id)) {
if (isDataSegmentId(entry.lsb())) {
- // this is a referenced data segment, so follow the graph
- List<UUID> refIds = getReferences(entry, id, graph);
- if (refIds != null) {
- referencedIds.addAll(refIds);
+ referencedIds.addAll(getReferences(entry, id, graph));
+ }
+ }
+ }
+ }
+
+ private int getGCGeneration(TarEntry entry) throws IOException {
+ ByteBuffer segment = access.read(entry.offset(), GC_GEN_OFFSET + 4);
+ return segment.getInt(GC_GEN_OFFSET);
+ }
+
+ void mark(Set<UUID> bulkRefs, Set<UUID> reclaim, int generation) throws IOException {
+ Map<UUID, List<UUID>> graph = getGraph(true);
+ TarEntry[] entries = getEntries();
+ for (int i = entries.length - 1; i >= 0; i--) {
+ TarEntry entry = entries[i];
+ UUID id = new UUID(entry.msb(), entry.lsb());
+ if ((!isDataSegmentId(entry.lsb()) && !bulkRefs.remove(id)) ||
+ (isDataSegmentId(entry.lsb()) && getGCGeneration(entry) < generation)) {
+ // non references bulk segment or old data segment
+ reclaim.add(id);
+ } else {
+ if (isDataSegmentId(entry.lsb())) {
+ for (UUID refId : getReferences(entry, id, graph)) {
+ if (!isDataSegmentId(refId.getLeastSignificantBits())) {
+ // keep the extra check for bulk segments for the case where a
+ // pre-compiled graph is not available and getReferences also
+ // includes data references
+ if (!reclaim.remove(id)) {
+ bulkRefs.add(refId);
+ }
+ }
}
}
}
}
}
- /**
- * Garbage collects segments in this file. First it collects the set of
- * segments that are referenced / reachable, then (if more than 25% is
- * garbage) creates a new generation of the file.
- * <p>
- * The old generation files are not removed (they can't easily be removed,
- * for memory mapped files).
- *
- * @param referencedIds the referenced segment ids (input and output).
- * @param removed a set which will receive the uuids of all segments that
- * have been cleaned.
- * @return this (if the file is kept as is), or the new generation file, or
- * null if the file is fully garbage
- */
- synchronized TarReader cleanup(Set<UUID> referencedIds, Set<UUID> removed) throws IOException {
+ TarReader sweep(Set<UUID> reclaim) throws IOException {
String name = file.getName();
log.debug("Cleaning up {}", name);
Set<UUID> cleaned = newHashSet();
- Map<UUID, List<UUID>> graph = getGraph();
- TarEntry[] entries = getEntries();
-
int size = 0;
int count = 0;
- for (int i = entries.length - 1; i >= 0; i--) {
+ TarEntry[] entries = getEntries();
+ for (int i = 0; i < entries.length; i++) {
TarEntry entry = entries[i];
UUID id = new UUID(entry.msb(), entry.lsb());
- if (!referencedIds.remove(id)) {
- // this segment is not referenced anywhere
+ if (reclaim.contains(id)) {
cleaned.add(id);
entries[i] = null;
} else {
size += getEntrySize(entry.size());
count += 1;
- if (isDataSegmentId(entry.lsb())) {
- // this is a referenced data segment, so follow the graph
- List<UUID> refIds = getReferences(entry, id, graph);
- if (refIds != null) {
- referencedIds.addAll(refIds);
- }
- }
}
}
size += getEntrySize(24 * count + 16);
@@ -783,10 +786,10 @@ class TarReader implements Closeable {
if (count == 0) {
log.debug("None of the entries of {} are referenceable.", name);
- removed.addAll(cleaned);
logCleanedSegments(cleaned);
return null;
- } else if (size >= access.length() * 3 / 4 && graph != null) {
+ }
+ if (size >= access.length() * 3 / 4 && hasGraph()) {
// the space savings are not worth it at less than 25%,
// unless this tar file lacks a pre-compiled segment graph
// in which case we'll always generate a new tar file with
@@ -823,7 +826,6 @@ class TarReader implements Closeable {
singletonList(newFile), access.isMemoryMapped());
if (reader != null) {
logCleanedSegments(cleaned);
- removed.addAll(cleaned);
return reader;
} else {
log.warn("Failed to open cleaned up tar file {}", file);
@@ -872,15 +874,24 @@ class TarReader implements Closeable {
* @return the parsed graph, or {@code null} if one was not found
* @throws IOException if the tar file could not be read
*/
- Map<UUID, List<UUID>> getGraph() throws IOException {
+ Map<UUID, List<UUID>> getGraph(boolean bulkOnly) throws IOException {
ByteBuffer graph = loadGraph();
if (graph == null) {
return null;
} else {
- return parseGraph(graph);
+ return parseGraph(graph, bulkOnly);
}
}
+ private boolean hasGraph() {
+ if (!hasGraph) {
+ try {
+ loadGraph();
+ } catch (IOException ignore) { }
+ }
+ return hasGraph;
+ }
+
/**
* Loads the optional pre-compiled graph entry from the given tar file.
*
@@ -921,10 +932,11 @@ class TarReader implements Closeable {
return null; // checksum mismatch
}
+ hasGraph = true;
return graph;
}
- private static Map<UUID, List<UUID>> parseGraph(ByteBuffer graphByteBuffer) {
+ private static Map<UUID, List<UUID>> parseGraph(ByteBuffer graphByteBuffer, boolean bulkOnly) {
int count = graphByteBuffer.getInt(graphByteBuffer.limit() - 12);
ByteBuffer buffer = graphByteBuffer.duplicate();
@@ -941,7 +953,10 @@ class TarReader implements Closeable {
List<UUID> list = newArrayList();
int refid = buffer.getInt();
while (refid != -1) {
- list.add(uuids.get(refid));
+ UUID ref = uuids.get(refid);
+ if (!bulkOnly || !isDataSegmentId(ref.getLeastSignificantBits())) {
+ list.add(ref);
+ }
refid = buffer.getInt();
}
graph.put(uuid, list);
Modified: jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarWriter.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarWriter.java?rev=1740098&r1=1740097&r2=1740098&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarWriter.java (original)
+++ jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarWriter.java Wed Apr 20 10:05:15 2016
@@ -474,6 +474,8 @@ class TarWriter implements Closeable {
return header;
}
+ // to offer an alternative cleanup strategy based on reachability, in which
+ // case it will still be needed
/**
* Add all segment ids that are reachable from {@code referencedIds} via
* this writer's segment graph and subsequently remove those segment ids
Modified: jackrabbit/oak/trunk/oak-segment-next/src/test/java/org/apache/jackrabbit/oak/plugins/segment/CompactionAndCleanupIT.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-segment-next/src/test/java/org/apache/jackrabbit/oak/plugins/segment/CompactionAndCleanupIT.java?rev=1740098&r1=1740097&r2=1740098&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-segment-next/src/test/java/org/apache/jackrabbit/oak/plugins/segment/CompactionAndCleanupIT.java (original)
+++ jackrabbit/oak/trunk/oak-segment-next/src/test/java/org/apache/jackrabbit/oak/plugins/segment/CompactionAndCleanupIT.java Wed Apr 20 10:05:15 2016
@@ -374,6 +374,8 @@ public class CompactionAndCleanupIT {
nodeStore.merge(preGCBuilder, EmptyHook.INSTANCE, CommitInfo.EMPTY);
}
+ // (currently hard coded to 2);
+ fileStore.compact();
fileStore.compact();
// case 2: merge above changes after compact
@@ -390,11 +392,7 @@ public class CompactionAndCleanupIT {
// Re-initialise the file store to simulate off-line gc
fileStore = FileStore.builder(repoDir).withMaxFileSize(2).build();
try {
- // The 1M blob should get gc-ed. This works for case 1.
- // However it doesn't for case 2 as merging after compaction
- // apparently creates references from the current segment
- // to the pre-compacted segment to which above changes have
- // been pre-written.
+ // The 1M blob should get gc-ed
fileStore.cleanup();
assertTrue(ref + " repository size " + fileStore.size() + " < " + 1024 * 1024,
fileStore.size() < 1024 * 1024);