You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by ju...@apache.org on 2014/04/01 17:40:00 UTC

svn commit: r1583717 - in /jackrabbit/oak/trunk/oak-core/src: main/java/org/apache/jackrabbit/oak/plugins/segment/file/ test/java/org/apache/jackrabbit/oak/plugins/segment/file/

Author: jukka
Date: Tue Apr  1 15:39:59 2014
New Revision: 1583717

URL: http://svn.apache.org/r1583717
Log:
OAK-631: SegmentMK: Implement garbage collection

Use just a single sequence of tar files to simplify GC ordering

Modified:
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileStore.java
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarFile.java
    jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/file/FileStoreTest.java

Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileStore.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileStore.java?rev=1583717&r1=1583716&r2=1583717&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileStore.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileStore.java Tue Apr  1 15:39:59 2014
@@ -19,8 +19,12 @@ package org.apache.jackrabbit.oak.plugin
 import static com.google.common.base.Preconditions.checkNotNull;
 import static com.google.common.base.Preconditions.checkState;
 import static com.google.common.collect.Lists.newArrayList;
+import static com.google.common.collect.Lists.newArrayListWithCapacity;
 import static com.google.common.collect.Lists.newCopyOnWriteArrayList;
 import static com.google.common.collect.Lists.newLinkedList;
+import static com.google.common.collect.Maps.newHashMap;
+import static com.google.common.collect.Maps.newTreeMap;
+import static java.lang.String.format;
 import static java.util.concurrent.TimeUnit.SECONDS;
 import static org.apache.jackrabbit.oak.plugins.memory.EmptyNodeState.EMPTY_NODE;
 
@@ -28,12 +32,17 @@ import java.io.File;
 import java.io.IOException;
 import java.io.RandomAccessFile;
 import java.nio.ByteBuffer;
+import java.util.Arrays;
 import java.util.LinkedList;
 import java.util.List;
+import java.util.Map;
 import java.util.Set;
+import java.util.SortedMap;
 import java.util.UUID;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.atomic.AtomicReference;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 import org.apache.jackrabbit.oak.api.Blob;
 import org.apache.jackrabbit.oak.plugins.blob.BlobStoreBlob;
@@ -55,7 +64,10 @@ public class FileStore implements Segmen
 
     private static final int MB = 1024 * 1024;
 
-    private static final String FILE_NAME_FORMAT = "%s%05d%s.tar";
+    private static final Pattern FILE_NAME_PATTERN =
+            Pattern.compile("(data|bulk)((0|[1-9][0-9]*)[0-9]{4})(a?).tar");
+
+    private static final String FILE_NAME_FORMAT = "data%05d%s.tar";
 
     private static final String JOURNAL_FILE_NAME = "journal.log";
 
@@ -72,9 +84,7 @@ public class FileStore implements Segmen
 
     private final boolean memoryMapping;
 
-    private final List<TarFile> bulkFiles = newCopyOnWriteArrayList();
-
-    private final List<TarFile> dataFiles = newCopyOnWriteArrayList();
+    private final List<TarFile> files;
 
     private final RandomAccessFile journalFile;
 
@@ -136,33 +146,15 @@ public class FileStore implements Segmen
         this.maxFileSize = maxFileSizeMB * MB;
         this.memoryMapping = memoryMapping;
 
-        for (int i = 0; true; i++) {
-            String name = String.format(FILE_NAME_FORMAT, "bulk", i, "a");
-            File file = new File(directory, name);
-            if (!file.isFile()) {
-                name = String.format(FILE_NAME_FORMAT, "bulk", i, "");
-                file = new File(directory, name);
-            }
-            if (file.isFile()) {
-                bulkFiles.add(new TarFile(file, maxFileSize, memoryMapping));
-            } else {
-                break;
-            }
-        }
-
-        for (int i = 0; true; i++) {
-            String name = String.format(FILE_NAME_FORMAT, "data", i, "a");
-            File file = new File(directory, name);
-            if (!file.isFile()) {
-                name = String.format(FILE_NAME_FORMAT, "data", i, "");
-                file = new File(directory, name);
-            }
-            if (file.isFile()) {
-                dataFiles.add(new TarFile(file, maxFileSize, memoryMapping));
-            } else {
-                break;
-            }
+        Map<Integer, File> map = collectFiles(directory);
+        List<TarFile> list = newArrayListWithCapacity(map.size());
+        Integer[] indices = map.keySet().toArray(new Integer[map.size()]);
+        Arrays.sort(indices);
+        for (Integer index : indices) {
+            File file = map.get(index);
+            list.add(new TarFile(file, maxFileSize, memoryMapping));
         }
+        this.files = newCopyOnWriteArrayList(list);
 
         journalFile = new RandomAccessFile(
                 new File(directory, JOURNAL_FILE_NAME), "rw");
@@ -180,7 +172,10 @@ public class FileStore implements Segmen
         RecordId id = null;
         while (id == null && !heads.isEmpty()) {
             RecordId last = heads.removeLast();
-            if (containsSegment(last.getSegmentId(), dataFiles)) {
+            SegmentId segmentId = last.getSegmentId();
+            if (containsSegment(
+                    segmentId.getMostSignificantBits(),
+                    segmentId.getLeastSignificantBits())) {
                 id = last;
             } else {
                 log.warn("Unable to committed revision {}, rewinding...", last);
@@ -223,6 +218,62 @@ public class FileStore implements Segmen
         flushThread.start();
     }
 
+    static SortedMap<Integer, File> collectFiles(File directory) throws IOException {
+        SortedMap<Integer, File> dataFiles = newTreeMap();
+        Map<Integer, File> bulkFiles = newHashMap();
+
+        for (File file : directory.listFiles()) {
+            Matcher matcher = FILE_NAME_PATTERN.matcher(file.getName());
+            if (matcher.matches()) {
+                Integer index = Integer.parseInt(matcher.group(2));
+                if ("data".equals(matcher.group(1))) {
+                    checkState(dataFiles.put(index, file) == null);
+                } else {
+                    checkState(bulkFiles.put(index, file) == null);
+                }
+            }
+        }
+
+        if (!bulkFiles.isEmpty()) {
+            log.info("Upgrading TarMK file names in {}", directory);
+
+            if (!dataFiles.isEmpty()) {
+                // first put all the data segments at the end of the list
+                Integer[] indices =
+                        dataFiles.keySet().toArray(new Integer[dataFiles.size()]);
+                int position = Math.max(
+                        indices[indices.length - 1] + 1,
+                        bulkFiles.size());
+                for (Integer index : indices) {
+                    File file = dataFiles.remove(index);
+                    Integer newIndex = position++;
+                    File newFile = new File(
+                            directory, format(FILE_NAME_FORMAT, newIndex, "a"));
+                    log.info("Renaming {} to {}", file, newFile);
+                    file.renameTo(newFile);
+                    dataFiles.put(newIndex, newFile);
+                }
+            }
+
+            // then add all the bulk segments at the beginning of the list
+            Integer[] indices =
+                    bulkFiles.keySet().toArray(new Integer[bulkFiles.size()]);
+            Arrays.sort(indices);
+            int position = 0;
+            for (Integer index : indices) {
+                File file = bulkFiles.remove(index);
+                Integer newIndex = position++;
+                File newFile = new File(
+                        directory, format(FILE_NAME_FORMAT, newIndex, "a"));
+                log.info("Renaming {} to {}", file, newFile);
+                file.renameTo(newFile);
+                dataFiles.put(newIndex, newFile);
+            }
+        }
+
+        return dataFiles;
+    }
+
     public void flush() throws IOException {
         synchronized (persistedHead) {
             RecordId before = persistedHead.get();
@@ -234,10 +285,7 @@ public class FileStore implements Segmen
 
                 synchronized (this) {
                     boolean success = true;
-                    for (TarFile file : bulkFiles) {
-                        success = success && file.flush();
-                    }
-                    for (TarFile file : dataFiles) {
+                    for (TarFile file : files) {
                         success = success && file.flush();
                     }
                     if (!success) {
@@ -256,14 +304,7 @@ public class FileStore implements Segmen
 
     public Iterable<SegmentId> getSegmentIds() {
         List<SegmentId> ids = newArrayList();
-        for (TarFile file : dataFiles) {
-            for (UUID uuid : file.getUUIDs()) {
-                ids.add(tracker.getSegmentId(
-                        uuid.getMostSignificantBits(),
-                        uuid.getLeastSignificantBits()));
-            }
-        }
-        for (TarFile file : bulkFiles) {
+        for (TarFile file : files) {
             for (UUID uuid : file.getUUIDs()) {
                 ids.add(tracker.getSegmentId(
                         uuid.getMostSignificantBits(),
@@ -307,14 +348,10 @@ public class FileStore implements Segmen
 
                 journalFile.close();
 
-                for (TarFile file : bulkFiles) {
+                for (TarFile file : files) {
                     file.close();
                 }
-                bulkFiles.clear();
-                for (TarFile file : dataFiles) {
-                    file.close();
-                }
-                dataFiles.clear();
+                files.clear();
 
                 System.gc(); // for any memory-mappings that are no longer used
             }
@@ -328,42 +365,32 @@ public class FileStore implements Segmen
     public boolean containsSegment(SegmentId id) {
         if (id.getTracker() == tracker) {
             return true;
-        } else if (id.isDataSegmentId()) {
-            return containsSegment(id, dataFiles);
-        } else {
-            return containsSegment(id, bulkFiles);
         }
-    }
 
-    @Override
-    public Segment readSegment(SegmentId id) {
-        if (id.isBulkSegmentId()) {
-            return loadSegment(id, bulkFiles);
-        } else {
-            return loadSegment(id, dataFiles);
-        }
-    }
-
-    private boolean containsSegment(SegmentId id, List<TarFile> files) {
         long msb = id.getMostSignificantBits();
         long lsb = id.getLeastSignificantBits();
-        for (TarFile file : files) {
+        return containsSegment(msb, lsb);
+    }
+
+    private boolean containsSegment(long msb, long lsb) {
+        for (TarFile file : files.toArray(new TarFile[0])) {
             try {
                 ByteBuffer buffer = file.readEntry(msb, lsb);
                 if (buffer != null) {
                     return true;
                 }
             } catch (IOException e) {
-                throw new RuntimeException(
-                        "Failed to access file " + file, e);
+                log.warn("Failed to access file " + file, e);
             }
         }
         return false;
     }
 
-    private Segment loadSegment(SegmentId id, List<TarFile> files) {
+    @Override
+    public Segment readSegment(SegmentId id) {
         long msb = id.getMostSignificantBits();
         long lsb = id.getLeastSignificantBits();
+
         for (TarFile file : files) {
             try {
                 ByteBuffer buffer = file.readEntry(msb, lsb);
@@ -371,8 +398,7 @@ public class FileStore implements Segmen
                     return new Segment(tracker, id, buffer);
                 }
             } catch (IOException e) {
-                throw new RuntimeException(
-                        "Failed to access file " + file, e);
+                log.warn("Failed to access file " + file, e);
             }
         }
 
@@ -382,22 +408,13 @@ public class FileStore implements Segmen
     @Override
     public synchronized void writeSegment(
             SegmentId id, byte[] data, int offset, int length) {
-        // select whether to write a data or a bulk segment
-        List<TarFile> files = dataFiles;
-        String base = "data";
-        if (id.isBulkSegmentId()) {
-            files = bulkFiles;
-            base = "bulk";
-        }
-
         try {
             UUID uuid = new UUID(
                     id.getMostSignificantBits(),
                     id.getLeastSignificantBits());
             if (files.isEmpty() || !files.get(files.size() - 1).writeEntry(
                     uuid, data, offset, length)) {
-                String name = String.format(
-                        FILE_NAME_FORMAT, base, files.size(), "a");
+                String name = format(FILE_NAME_FORMAT, files.size(), "a");
                 File file = new File(directory, name);
                 TarFile last = new TarFile(file, maxFileSize, memoryMapping);
                 checkState(last.writeEntry(uuid, data, offset, length));

Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarFile.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarFile.java?rev=1583717&r1=1583716&r2=1583717&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarFile.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarFile.java Tue Apr  1 15:39:59 2014
@@ -43,7 +43,7 @@ class TarFile {
 
     /** Magic byte sequence at the end of the index block. */
     private static final int INDEX_MAGIC =
-            '\n' << 24 + '0' << 16 + 'K' << 8 + '\n';
+            ('\n' << 24) + ('0' << 16) + ('K' << 8) + '\n';
 
     /** The tar file block size. */
     private static final int BLOCK_SIZE = 512;

Modified: jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/file/FileStoreTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/file/FileStoreTest.java?rev=1583717&r1=1583716&r2=1583717&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/file/FileStoreTest.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/file/FileStoreTest.java Tue Apr  1 15:39:59 2014
@@ -16,12 +16,16 @@
  */
 package org.apache.jackrabbit.oak.plugins.segment.file;
 
+import static com.google.common.collect.Lists.newArrayList;
 import static junit.framework.Assert.assertEquals;
 import static junit.framework.Assert.assertFalse;
+import static junit.framework.Assert.assertTrue;
 
 import java.io.File;
+import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.RandomAccessFile;
+import java.util.Map;
 
 import org.apache.jackrabbit.oak.plugins.segment.SegmentNodeBuilder;
 import org.apache.jackrabbit.oak.plugins.segment.SegmentNodeState;
@@ -82,4 +86,25 @@ public class FileStoreTest {
         store.close();
     }
 
+    @Test
+    public void testRearrangeOldData() throws IOException {
+        new FileOutputStream(new File(directory, "data00000.tar")).close();
+        new FileOutputStream(new File(directory, "data00010a.tar")).close();
+        new FileOutputStream(new File(directory, "data00030.tar")).close();
+        new FileOutputStream(new File(directory, "bulk00002.tar")).close();
+        new FileOutputStream(new File(directory, "bulk00005a.tar")).close();
+
+        Map<Integer, File> files = FileStore.collectFiles(directory);
+        assertEquals(newArrayList(0, 1, 31, 32, 33), newArrayList(files.keySet()));
+
+        assertTrue(new File(directory, "data00000a.tar").isFile());
+        assertTrue(new File(directory, "data00001a.tar").isFile());
+        assertTrue(new File(directory, "data00031a.tar").isFile());
+        assertTrue(new File(directory, "data00032a.tar").isFile());
+        assertTrue(new File(directory, "data00033a.tar").isFile());
+
+        files = FileStore.collectFiles(directory);
+        assertEquals(newArrayList(0, 1, 31, 32, 33), newArrayList(files.keySet()));
+    }
+
 }