You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by md...@apache.org on 2016/04/20 12:05:15 UTC

svn commit: r1740098 - in /jackrabbit/oak/trunk/oak-segment-next/src: main/java/org/apache/jackrabbit/oak/plugins/segment/ main/java/org/apache/jackrabbit/oak/plugins/segment/file/ test/java/org/apache/jackrabbit/oak/plugins/segment/

Author: mduerig
Date: Wed Apr 20 10:05:15 2016
New Revision: 1740098

URL: http://svn.apache.org/viewvc?rev=1740098&view=rev
Log:
OAK-3348: Cross gc sessions might introduce references to pre-compacted segments
Implement generation based cleanup strategy: remove data segments that are older than 2 generations and remove bulk segments that are not reachable anymore (mark and sweep).

Modified:
    jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java
    jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileStore.java
    jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarReader.java
    jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarWriter.java
    jackrabbit/oak/trunk/oak-segment-next/src/test/java/org/apache/jackrabbit/oak/plugins/segment/CompactionAndCleanupIT.java

Modified: jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java?rev=1740098&r1=1740097&r2=1740098&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java (original)
+++ jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java Wed Apr 20 10:05:15 2016
@@ -120,7 +120,7 @@ public class Segment {
 
     static final int BLOBREF_COUNT_OFFSET = 8;
 
-    static final int GC_GEN_OFFSET = 10;
+    public static final int GC_GEN_OFFSET = 10;
 
     private final SegmentTracker tracker;
 
@@ -312,6 +312,7 @@ public class Segment {
         return data.getShort(ROOT_COUNT_OFFSET) & 0xffff;
     }
 
+    // could we make it part of the tar file index!?
     public int getGcGen() {
         return data.getInt(GC_GEN_OFFSET);
     }

Modified: jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileStore.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileStore.java?rev=1740098&r1=1740097&r2=1740098&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileStore.java (original)
+++ jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileStore.java Wed Apr 20 10:05:15 2016
@@ -32,6 +32,7 @@ import static java.util.concurrent.TimeU
 import static java.util.concurrent.TimeUnit.MINUTES;
 import static org.apache.jackrabbit.oak.commons.IOUtils.humanReadableByteCount;
 import static org.apache.jackrabbit.oak.plugins.memory.EmptyNodeState.EMPTY_NODE;
+import static org.apache.jackrabbit.oak.plugins.segment.SegmentId.isDataSegmentId;
 import static org.apache.jackrabbit.oak.plugins.segment.compaction.CompactionStrategy.NO_COMPACTION;
 
 import java.io.Closeable;
@@ -837,7 +838,7 @@ public class FileStore implements Segmen
     public List<File> cleanup() throws IOException {
         Stopwatch watch = Stopwatch.createStarted();
         long initialSize = size();
-        Set<UUID> referencedIds = newHashSet();
+        Set<UUID> bulkRefs = newHashSet();
         Map<TarReader, TarReader> cleaned = newLinkedHashMap();
 
         fileStoreLock.writeLock().lock();
@@ -853,9 +854,10 @@ public class FileStore implements Segmen
             System.gc();
 
             for (SegmentId id : tracker.getReferencedSegmentIds()) {
-                referencedIds.add(id.asUUID());
+                if (!isDataSegmentId(id.getLeastSignificantBits())) {
+                    bulkRefs.add(id.asUUID());
+                }
             }
-            writer.collectReferences(referencedIds);
             for (TarReader reader : readers) {
                 cleaned.put(reader, reader);
             }
@@ -865,11 +867,18 @@ public class FileStore implements Segmen
 
         // Do actual cleanup outside of the lock to prevent blocking
         // concurrent writers for a long time
-        includeForwardReferences(cleaned.keySet(), referencedIds);
-        LinkedList<File> toRemove = newLinkedList();
-        Set<UUID> cleanedIds = newHashSet();
+        int generation = getGcGen() - 1;
+        Set<UUID> reclaim = newHashSet();
         for (TarReader reader : cleaned.keySet()) {
-            cleaned.put(reader, reader.cleanup(referencedIds, cleanedIds));
+            reader.mark(bulkRefs, reclaim, generation);
+            log.info("Size of bulk references/reclaim set {}/{}", bulkRefs.size(), reclaim.size());
+            if (shutdown) {
+                gcMonitor.info("TarMK GC #{}: cleanup interrupted", gcCount);
+                break;
+            }
+        }
+        for (TarReader reader : cleaned.keySet()) {
+            cleaned.put(reader, reader.sweep(reclaim));
             if (shutdown) {
                 gcMonitor.info("TarMK GC #{}: cleanup interrupted", gcCount);
                 break;
@@ -902,6 +911,7 @@ public class FileStore implements Segmen
 
         // Close old readers *after* setting readers to the new readers to avoid accessing
         // a closed reader from readSegment()
+        LinkedList<File> toRemove = newLinkedList();
         for (TarReader oldReader : oldReaders) {
             closeAndLogOnFail(oldReader);
             File file = oldReader.getFile();
@@ -922,28 +932,6 @@ public class FileStore implements Segmen
     }
 
     /**
-     * Include the ids of all segments transitively reachable through forward references from
-     * {@code referencedIds}. See OAK-3864.
-     */
-    private void includeForwardReferences(Iterable<TarReader> readers, Set<UUID> referencedIds)
-            throws IOException {
-        Set<UUID> fRefs = newHashSet(referencedIds);
-        do {
-            // Add direct forward references
-            for (TarReader reader : readers) {
-                reader.calculateForwardReferences(fRefs);
-                if (fRefs.isEmpty()) {
-                    break;  // Optimisation: bail out if no references left
-                }
-            }
-            if (!fRefs.isEmpty()) {
-                gcMonitor.info("TarMK GC #{}: cleanup found forward references to {}", gcCount, fRefs);
-            }
-            // ... as long as new forward references are found.
-        } while (referencedIds.addAll(fRefs));
-    }
-
-    /**
      * Returns the cancellation policy for the compaction phase. If the disk
      * space was considered insufficient at least once during compaction (or if
      * the space was never sufficient to begin with), compaction is considered
@@ -1351,7 +1339,7 @@ public class FileStore implements Segmen
                 for (UUID uuid : reader.getUUIDs()) {
                     graph.put(uuid, null);
                 }
-                Map<UUID, List<UUID>> g = reader.getGraph();
+                Map<UUID, List<UUID>> g = reader.getGraph(false);
                 if (g != null) {
                     graph.putAll(g);
                 }
@@ -1419,6 +1407,25 @@ public class FileStore implements Segmen
         }
 
         /**
+         * Include the ids of all segments transitively reachable through forward references from
+         * {@code referencedIds}. See OAK-3864.
+         */
+        private static void includeForwardReferences(Iterable<TarReader> readers, Set<UUID> referencedIds)
+            throws IOException {
+            Set<UUID> fRefs = newHashSet(referencedIds);
+            do {
+                // Add direct forward references
+                for (TarReader reader : readers) {
+                    reader.calculateForwardReferences(fRefs);
+                    if (fRefs.isEmpty()) {
+                        break;  // Optimisation: bail out if no references left
+                    }
+                }
+                // ... as long as new forward references are found.
+            } while (referencedIds.addAll(fRefs));
+        }
+
+        /**
          * Build the graph of segments reachable from an initial set of segments
          * @param roots     the initial set of segments
          * @param visitor   visitor receiving call back while following the segment graph
@@ -1429,7 +1436,7 @@ public class FileStore implements Segmen
             @Nonnull SegmentGraphVisitor visitor) throws IOException {
 
             List<TarReader> readers = super.readers;
-            super.includeForwardReferences(readers, roots);
+            includeForwardReferences(readers, roots);
             for (TarReader reader : readers) {
                 reader.traverseSegmentGraph(checkNotNull(roots), checkNotNull(visitor));
             }

Modified: jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarReader.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarReader.java?rev=1740098&r1=1740097&r2=1740098&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarReader.java (original)
+++ jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarReader.java Wed Apr 20 10:05:15 2016
@@ -26,6 +26,7 @@ import static com.google.common.collect.
 import static com.google.common.collect.Sets.newHashSet;
 import static com.google.common.collect.Sets.newHashSetWithExpectedSize;
 import static java.util.Collections.singletonList;
+import static org.apache.jackrabbit.oak.plugins.segment.Segment.GC_GEN_OFFSET;
 import static org.apache.jackrabbit.oak.plugins.segment.Segment.REF_COUNT_OFFSET;
 import static org.apache.jackrabbit.oak.plugins.segment.SegmentId.isDataSegmentId;
 import static org.apache.jackrabbit.oak.plugins.segment.file.TarWriter.GRAPH_MAGIC;
@@ -47,7 +48,6 @@ import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import java.util.zip.CRC32;
 
-import javax.annotation.CheckForNull;
 import javax.annotation.Nonnull;
 
 import org.apache.commons.io.FileUtils;
@@ -506,6 +506,8 @@ class TarReader implements Closeable {
 
     private volatile boolean closed;
 
+    private volatile boolean hasGraph;
+
     private TarReader(File file, FileAccess access, ByteBuffer index) {
         this.file = file;
         this.access = access;
@@ -650,10 +652,11 @@ class TarReader implements Closeable {
         return entries;
     }
 
-    @CheckForNull
+    @Nonnull
     private List<UUID> getReferences(TarEntry entry, UUID id, Map<UUID, List<UUID>> graph) throws IOException {
         if (graph != null) {
-            return graph.get(id);
+            List<UUID> uuids = graph.get(id);
+            return uuids == null ? Collections.<UUID>emptyList() : uuids;
         } else {
             // a pre-compiled graph is not available, so read the
             // references directly from this segment
@@ -684,7 +687,7 @@ class TarReader implements Closeable {
         @Nonnull SegmentGraphVisitor visitor) throws IOException {
         checkNotNull(roots);
         checkNotNull(visitor);
-        Map<UUID, List<UUID>> graph = getGraph();
+        Map<UUID, List<UUID>> graph = getGraph(false);
 
         TarEntry[] entries = getEntries();
         for (int i = entries.length - 1; i >= 0; i--) {
@@ -692,14 +695,9 @@ class TarReader implements Closeable {
             UUID id = new UUID(entry.msb(), entry.lsb());
             if (roots.remove(id) && isDataSegmentId(entry.lsb())) {
                 // this is a referenced data segment, so follow the graph
-                List<UUID> refIds = getReferences(entry, id, graph);
-                if (refIds != null) {
-                    for (UUID refId : refIds) {
-                        visitor.accept(id, refId);
-                        roots.add(refId);
-                    }
-                } else {
-                    visitor.accept(id, null);
+                for (UUID refId : getReferences(entry, id, graph)) {
+                    visitor.accept(id, refId);
+                    roots.add(refId);
                 }
             } else {
                 // this segment is not referenced anywhere
@@ -717,65 +715,70 @@ class TarReader implements Closeable {
      *
      * @throws IOException
      */
+    // FIXME OAK-3348 reference from utilities only. Can we move this somewhere else?
     void calculateForwardReferences(Set<UUID> referencedIds) throws IOException {
-        Map<UUID, List<UUID>> graph = getGraph();
+        Map<UUID, List<UUID>> graph = getGraph(false);
         TarEntry[] entries = getEntries();
         for (int i = entries.length - 1; i >= 0; i--) {
             TarEntry entry = entries[i];
             UUID id = new UUID(entry.msb(), entry.lsb());
             if (referencedIds.remove(id)) {
                 if (isDataSegmentId(entry.lsb())) {
-                    // this is a referenced data segment, so follow the graph
-                    List<UUID> refIds = getReferences(entry, id, graph);
-                    if (refIds != null) {
-                        referencedIds.addAll(refIds);
+                    referencedIds.addAll(getReferences(entry, id, graph));
+                }
+            }
+        }
+    }
+
+    private int getGCGeneration(TarEntry entry) throws IOException {
+        ByteBuffer segment = access.read(entry.offset(), GC_GEN_OFFSET + 4);
+        return segment.getInt(GC_GEN_OFFSET);
+    }
+
+    void mark(Set<UUID> bulkRefs, Set<UUID> reclaim, int generation) throws IOException {
+        Map<UUID, List<UUID>> graph = getGraph(true);
+        TarEntry[] entries = getEntries();
+        for (int i = entries.length - 1; i >= 0; i--) {
+            TarEntry entry = entries[i];
+            UUID id = new UUID(entry.msb(), entry.lsb());
+            if ((!isDataSegmentId(entry.lsb()) && !bulkRefs.remove(id)) ||
+                (isDataSegmentId(entry.lsb()) && getGCGeneration(entry) < generation)) {
+                // non references bulk segment or old data segment
+                reclaim.add(id);
+            } else {
+                if (isDataSegmentId(entry.lsb())) {
+                    for (UUID refId : getReferences(entry, id, graph)) {
+                        if (!isDataSegmentId(refId.getLeastSignificantBits())) {
+                            // keep the extra check for bulk segments for the case where a
+                            // pre-compiled graph is not available and getReferences also
+                            // includes data references
+                            if (!reclaim.remove(id)) {
+                                bulkRefs.add(refId);
+                            }
+                        }
                     }
                 }
             }
         }
     }
 
-    /**
-     * Garbage collects segments in this file. First it collects the set of
-     * segments that are referenced / reachable, then (if more than 25% is
-     * garbage) creates a new generation of the file.
-     * <p>
-     * The old generation files are not removed (they can't easily be removed,
-     * for memory mapped files).
-     * 
-     * @param referencedIds the referenced segment ids (input and output).
-     * @param removed a set which will receive the uuids of all segments that
-     *                have been cleaned.
-     * @return this (if the file is kept as is), or the new generation file, or
-     *         null if the file is fully garbage
-     */
-    synchronized TarReader cleanup(Set<UUID> referencedIds, Set<UUID> removed) throws IOException {
+    TarReader sweep(Set<UUID> reclaim) throws IOException {
         String name = file.getName();
         log.debug("Cleaning up {}", name);
 
         Set<UUID> cleaned = newHashSet();
-        Map<UUID, List<UUID>> graph = getGraph();
-        TarEntry[] entries = getEntries();
-
         int size = 0;
         int count = 0;
-        for (int i = entries.length - 1; i >= 0; i--) {
+        TarEntry[] entries = getEntries();
+        for (int i = 0; i < entries.length; i++) {
             TarEntry entry = entries[i];
             UUID id = new UUID(entry.msb(), entry.lsb());
-            if (!referencedIds.remove(id)) {
-                // this segment is not referenced anywhere
+            if (reclaim.contains(id)) {
                 cleaned.add(id);
                 entries[i] = null;
             } else {
                 size += getEntrySize(entry.size());
                 count += 1;
-                if (isDataSegmentId(entry.lsb())) {
-                    // this is a referenced data segment, so follow the graph
-                    List<UUID> refIds = getReferences(entry, id, graph);
-                    if (refIds != null) {
-                        referencedIds.addAll(refIds);
-                    }
-                }
             }
         }
         size += getEntrySize(24 * count + 16);
@@ -783,10 +786,10 @@ class TarReader implements Closeable {
 
         if (count == 0) {
             log.debug("None of the entries of {} are referenceable.", name);
-            removed.addAll(cleaned);
             logCleanedSegments(cleaned);
             return null;
-        } else if (size >= access.length() * 3 / 4 && graph != null) {
+        }
+        if (size >= access.length() * 3 / 4 && hasGraph()) {
             // the space savings are not worth it at less than 25%,
             // unless this tar file lacks a pre-compiled segment graph
             // in which case we'll always generate a new tar file with
@@ -823,7 +826,6 @@ class TarReader implements Closeable {
                 singletonList(newFile), access.isMemoryMapped());
         if (reader != null) {
             logCleanedSegments(cleaned);
-            removed.addAll(cleaned);
             return reader;
         } else {
             log.warn("Failed to open cleaned up tar file {}", file);
@@ -872,15 +874,24 @@ class TarReader implements Closeable {
      * @return the parsed graph, or {@code null} if one was not found
      * @throws IOException if the tar file could not be read
      */
-    Map<UUID, List<UUID>> getGraph() throws IOException {
+    Map<UUID, List<UUID>> getGraph(boolean bulkOnly) throws IOException {
         ByteBuffer graph = loadGraph();
         if (graph == null) {
             return null;
         } else {
-            return parseGraph(graph);
+            return parseGraph(graph, bulkOnly);
         }
     }
 
+    private boolean hasGraph() {
+        if (!hasGraph) {
+            try {
+                loadGraph();
+            } catch (IOException ignore) { }
+        }
+        return hasGraph;
+    }
+
     /**
      * Loads the optional pre-compiled graph entry from the given tar file.
      *
@@ -921,10 +932,11 @@ class TarReader implements Closeable {
             return null; // checksum mismatch
         }
 
+        hasGraph = true;
         return graph;
     }
 
-    private static Map<UUID, List<UUID>> parseGraph(ByteBuffer graphByteBuffer) {
+    private static Map<UUID, List<UUID>> parseGraph(ByteBuffer graphByteBuffer, boolean bulkOnly) {
         int count = graphByteBuffer.getInt(graphByteBuffer.limit() - 12);
 
         ByteBuffer buffer = graphByteBuffer.duplicate();
@@ -941,7 +953,10 @@ class TarReader implements Closeable {
             List<UUID> list = newArrayList();
             int refid = buffer.getInt();
             while (refid != -1) {
-                list.add(uuids.get(refid));
+                UUID ref = uuids.get(refid);
+                if (!bulkOnly || !isDataSegmentId(ref.getLeastSignificantBits())) {
+                    list.add(ref);
+                }
                 refid = buffer.getInt();
             }
             graph.put(uuid, list);

Modified: jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarWriter.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarWriter.java?rev=1740098&r1=1740097&r2=1740098&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarWriter.java (original)
+++ jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarWriter.java Wed Apr 20 10:05:15 2016
@@ -474,6 +474,8 @@ class TarWriter implements Closeable {
         return header;
     }
 
+    // to offer an alternative cleanup strategy based on reachability, in which
+    // case it will still be needed
     /**
      * Add all segment ids that are reachable from {@code referencedIds} via
      * this writer's segment graph and subsequently remove those segment ids

Modified: jackrabbit/oak/trunk/oak-segment-next/src/test/java/org/apache/jackrabbit/oak/plugins/segment/CompactionAndCleanupIT.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-segment-next/src/test/java/org/apache/jackrabbit/oak/plugins/segment/CompactionAndCleanupIT.java?rev=1740098&r1=1740097&r2=1740098&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-segment-next/src/test/java/org/apache/jackrabbit/oak/plugins/segment/CompactionAndCleanupIT.java (original)
+++ jackrabbit/oak/trunk/oak-segment-next/src/test/java/org/apache/jackrabbit/oak/plugins/segment/CompactionAndCleanupIT.java Wed Apr 20 10:05:15 2016
@@ -374,6 +374,8 @@ public class CompactionAndCleanupIT {
                     nodeStore.merge(preGCBuilder, EmptyHook.INSTANCE, CommitInfo.EMPTY);
                 }
 
+                // (currently hard coded to 2);
+                fileStore.compact();
                 fileStore.compact();
 
                 // case 2: merge above changes after compact
@@ -390,11 +392,7 @@ public class CompactionAndCleanupIT {
             // Re-initialise the file store to simulate off-line gc
             fileStore = FileStore.builder(repoDir).withMaxFileSize(2).build();
             try {
-                // The 1M blob should get gc-ed. This works for case 1.
-                // However it doesn't for case 2 as merging after compaction
-                // apparently creates references from the current segment
-                // to the pre-compacted segment to which above changes have
-                // been pre-written.
+                // The 1M blob should get gc-ed
                 fileStore.cleanup();
                 assertTrue(ref + " repository size " + fileStore.size() + " < " + 1024 * 1024,
                         fileStore.size() < 1024 * 1024);