You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by fr...@apache.org on 2016/07/26 14:05:56 UTC

svn commit: r1754132 - in /jackrabbit/oak/trunk/oak-segment-tar/src: main/java/org/apache/jackrabbit/oak/segment/ main/java/org/apache/jackrabbit/oak/segment/file/ test/java/org/apache/jackrabbit/oak/segment/ test/java/org/apache/jackrabbit/oak/segment...

Author: frm
Date: Tue Jul 26 14:05:56 2016
New Revision: 1754132

URL: http://svn.apache.org/viewvc?rev=1754132&view=rev
Log:
OAK-4603 - Group binary references by owning segment in the index

Knowing which segment a binary reference belongs to is crucial during
compaction. When a new generation of a TAR file is created and some segments
are garbage collected in the process, the corresponding binary references
should be removed from the index, too.

Modified:
    jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/BinaryReferenceConsumer.java
    jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/BinaryReferenceConsumers.java
    jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentWriter.java
    jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/FileStore.java
    jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/TarReader.java
    jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/TarWriter.java
    jackrabbit/oak/trunk/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/SegmentDataStoreBlobGCIT.java
    jackrabbit/oak/trunk/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/file/TarFileTest.java

Modified: jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/BinaryReferenceConsumer.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/BinaryReferenceConsumer.java?rev=1754132&r1=1754131&r2=1754132&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/BinaryReferenceConsumer.java (original)
+++ jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/BinaryReferenceConsumer.java Tue Jul 26 14:05:56 2016
@@ -17,6 +17,8 @@
 
 package org.apache.jackrabbit.oak.segment;
 
+import java.util.UUID;
+
 /**
  * A consumer for references to external binaries. An implementor of this
  * interface is called every time an external binary reference is written in the
@@ -29,9 +31,10 @@ public interface BinaryReferenceConsumer
      *
      * @param generation      The generation of the record referencing the
      *                        binary.
+     * @param segmentId       The ID of the segment this reference belongs to.
      * @param binaryReference The opaque string representation of the binary
      *                        reference.
      */
-    void consume(int generation, String binaryReference);
+    void consume(int generation, UUID segmentId, String binaryReference);
 
 }

Modified: jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/BinaryReferenceConsumers.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/BinaryReferenceConsumers.java?rev=1754132&r1=1754131&r2=1754132&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/BinaryReferenceConsumers.java (original)
+++ jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/BinaryReferenceConsumers.java Tue Jul 26 14:05:56 2016
@@ -17,6 +17,8 @@
 
 package org.apache.jackrabbit.oak.segment;
 
+import java.util.UUID;
+
 /**
  * Utility methods to work with {@link BinaryReferenceConsumer} instances.
  */
@@ -32,7 +34,7 @@ public class BinaryReferenceConsumers {
         return new BinaryReferenceConsumer() {
 
             @Override
-            public void consume(int generation, String binaryReference) {
+            public void consume(int generation, UUID segmentId, String binaryReference) {
                 // Discard the binary reference
             }
 

Modified: jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentWriter.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentWriter.java?rev=1754132&r1=1754131&r2=1754132&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentWriter.java (original)
+++ jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentWriter.java Tue Jul 26 14:05:56 2016
@@ -808,7 +808,7 @@ public class SegmentWriter {
                 recordId = RecordWriters.newBlobIdWriter(writeString(blobId)).write(writer);
             }
 
-            binaryReferenceConsumer.consume(writer.getGeneration(), blobId);
+            binaryReferenceConsumer.consume(writer.getGeneration(), recordId.asUUID(), blobId);
 
             return recordId;
         }

Modified: jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/FileStore.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/FileStore.java?rev=1754132&r1=1754131&r2=1754132&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/FileStore.java (original)
+++ jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/FileStore.java Tue Jul 26 14:05:56 2016
@@ -246,10 +246,10 @@ public class FileStore implements Segmen
         this.binaryReferenceConsumer = new BinaryReferenceConsumer() {
 
             @Override
-            public void consume(int generation, String binaryReference) {
+            public void consume(int generation, UUID segmentId, String binaryReference) {
                 fileStoreLock.writeLock().lock();
                 try {
-                    tarWriter.addBinaryReference(generation, binaryReference);
+                    tarWriter.addBinaryReference(generation, segmentId, binaryReference);
                 } finally {
                     fileStoreLock.writeLock().unlock();
                 }
@@ -1335,6 +1335,7 @@ public class FileStore implements Segmen
                     directory,
                     String.format(FILE_NAME_FORMAT, writeNumber, "a"));
             tarWriter = new TarWriter(writeFile, stats);
+            log.info("New TAR writer {}", tarWriter);
         }
     }
 

Modified: jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/TarReader.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/TarReader.java?rev=1754132&r1=1754131&r2=1754132&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/TarReader.java (original)
+++ jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/TarReader.java Tue Jul 26 14:05:56 2016
@@ -58,6 +58,7 @@ import javax.annotation.Nonnull;
 
 import com.google.common.base.Charsets;
 import com.google.common.base.Predicate;
+import com.google.common.collect.Sets;
 import org.apache.commons.io.FileUtils;
 import org.apache.jackrabbit.oak.plugins.blob.ReferenceCollector;
 import org.apache.jackrabbit.oak.segment.SegmentGraph.SegmentGraphVisitor;
@@ -740,19 +741,21 @@ class TarReader implements Closeable {
      * @param minGeneration
      */
     void collectBlobReferences(ReferenceCollector collector, int minGeneration) {
-        Map<Integer, Set<String>> references = getBinaryReferences();
+        Map<Integer, Map<UUID, Set<String>>> generations = getBinaryReferences();
 
-        if (references == null) {
+        if (generations == null) {
             return;
         }
 
-        for (Entry<Integer, Set<String>> entry : references.entrySet()) {
+        for (Entry<Integer, Map<UUID, Set<String>>> entry : generations.entrySet()) {
             if (entry.getKey() < minGeneration) {
                 continue;
             }
 
-            for (String reference : entry.getValue()) {
-                collector.addReference(reference, null);
+            for (Set<String> references : entry.getValue().values()) {
+                for (String reference : references) {
+                    collector.addReference(reference, null);
+                }
             }
         }
     }
@@ -961,7 +964,7 @@ class TarReader implements Closeable {
         return getEntrySize(buffer.getInt(buffer.limit() - 8));
     }
 
-    Map<Integer, Set<String>> getBinaryReferences() {
+    Map<Integer, Map<UUID, Set<String>>> getBinaryReferences() {
         ByteBuffer buffer;
 
         try {
@@ -1015,27 +1018,37 @@ class TarReader implements Closeable {
         return buffer;
     }
 
-    private Map<Integer, Set<String>> parseBinaryReferences(ByteBuffer buffer) {
+    private Map<Integer, Map<UUID, Set<String>>> parseBinaryReferences(ByteBuffer buffer) {
         int nGenerations = buffer.getInt(buffer.limit() - 12);
 
-        Map<Integer, Set<String>> binaryReferences = newHashMapWithExpectedSize(nGenerations);
+        Map<Integer, Map<UUID, Set<String>>> binaryReferences = newHashMapWithExpectedSize(nGenerations);
 
         for (int i = 0; i < nGenerations; i++) {
             int generation = buffer.getInt();
-            int nReferences = buffer.getInt();
+            int segmentCount = buffer.getInt();
+
+            Map<UUID, Set<String>> segments = newHashMapWithExpectedSize(segmentCount);
+
+            for (int j = 0; j < segmentCount; j++) {
+                long msb = buffer.getLong();
+                long lsb = buffer.getLong();
+                int referenceCount = buffer.getInt();
 
-            Set<String> references = newHashSetWithExpectedSize(nReferences);
+                Set<String> references = Sets.newHashSetWithExpectedSize(referenceCount);
 
-            for (int j = 0; j < nReferences; j++) {
-                int length = buffer.getInt();
+                for (int k = 0; k < referenceCount; k++) {
+                    int length = buffer.getInt();
 
-                byte[] data = new byte[length];
-                buffer.get(data);
+                    byte[] data = new byte[length];
+                    buffer.get(data);
+
+                    references.add(new String(data, Charsets.UTF_8));
+                }
 
-                references.add(new String(data, Charsets.UTF_8));
+                segments.put(new UUID(msb, lsb), references);
             }
 
-            binaryReferences.put(generation, references);
+            binaryReferences.put(generation, segments);
         }
 
         return binaryReferences;

Modified: jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/TarWriter.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/TarWriter.java?rev=1754132&r1=1754131&r2=1754132&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/TarWriter.java (original)
+++ jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/TarWriter.java Tue Jul 26 14:05:56 2016
@@ -162,7 +162,7 @@ class TarWriter implements Closeable {
     /**
      * List of binary references contained in this TAR file.
      */
-    private final Map<Integer, Set<String>> binaryReferences = newHashMap();
+    private final Map<Integer, Map<UUID, Set<String>>> binaryReferences = newHashMap();
 
     TarWriter(File file) {
         this(file, FileStoreMonitor.DEFAULT);
@@ -282,12 +282,19 @@ class TarWriter implements Closeable {
         return currentLength;
     }
 
-    void addBinaryReference(int generation, String reference) {
-        Set<String> references = binaryReferences.get(generation);
+    void addBinaryReference(int generation, UUID segmentId, String reference) {
+        Map<UUID, Set<String>> segmentToReferences = binaryReferences.get(generation);
+
+        if (segmentToReferences == null) {
+            segmentToReferences = newHashMap();
+            binaryReferences.put(generation, segmentToReferences);
+        }
+
+        Set<String> references = segmentToReferences.get(segmentId);
 
         if (references == null) {
             references = newHashSet();
-            binaryReferences.put(generation, references);
+            segmentToReferences.put(segmentId, references);
         }
 
         references.add(reference);
@@ -386,37 +393,53 @@ class TarWriter implements Closeable {
         // The following information are stored as part of the main content of
         // this entry, after the optional padding.
 
-        for (Set<String> references : binaryReferences.values()) {
+        for (Map<UUID, Set<String>> segmentToReferences : binaryReferences.values()) {
             // 4 bytes per generation to store the generation number itself.
             binaryReferenceSize += 4;
 
-            // 4 bytes per generation to store the amount of binary references
-            // associated to the generation.
+            // 4 bytes per generation to store the number of segments.
             binaryReferenceSize += 4;
 
-            for (String reference : references) {
-                // 4 bytes for each reference to store the length of the reference.
+            for (Set<String> references : segmentToReferences.values()) {
+                // 16 bytes per segment identifier.
+                binaryReferenceSize += 16;
+
+                // 4 bytes to store the number of references for this segment.
                 binaryReferenceSize += 4;
 
-                // A variable amount of bytes, depending on the reference itself.
-                binaryReferenceSize += reference.getBytes(Charsets.UTF_8).length;
+                for (String reference : references) {
+                    // 4 bytes for each reference to store the length of the reference.
+                    binaryReferenceSize += 4;
+
+                    // A variable amount of bytes, depending on the reference itself.
+                    binaryReferenceSize += reference.getBytes(Charsets.UTF_8).length;
+                }
             }
         }
 
         ByteBuffer buffer = ByteBuffer.allocate(binaryReferenceSize);
 
-        for (Entry<Integer, Set<String>> entry : binaryReferences.entrySet()) {
-            int generation = entry.getKey();
-            Set<String> references = entry.getValue();
+        for (Entry<Integer, Map<UUID, Set<String>>> be : binaryReferences.entrySet()) {
+            int generation = be.getKey();
+            Map<UUID, Set<String>> segmentToReferences = be.getValue();
 
             buffer.putInt(generation);
-            buffer.putInt(references.size());
+            buffer.putInt(segmentToReferences.size());
+
+            for (Entry<UUID, Set<String>> se : segmentToReferences.entrySet()) {
+                UUID segmentId = se.getKey();
+                Set<String> references = se.getValue();
 
-            for (String reference : references) {
-                byte[] bytes = reference.getBytes(Charsets.UTF_8);
+                buffer.putLong(segmentId.getMostSignificantBits());
+                buffer.putLong(segmentId.getLeastSignificantBits());
+                buffer.putInt(references.size());
 
-                buffer.putInt(bytes.length);
-                buffer.put(bytes);
+                for (String reference : references) {
+                    byte[] bytes = reference.getBytes(Charsets.UTF_8);
+
+                    buffer.putInt(bytes.length);
+                    buffer.put(bytes);
+                }
             }
         }
 

Modified: jackrabbit/oak/trunk/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/SegmentDataStoreBlobGCIT.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/SegmentDataStoreBlobGCIT.java?rev=1754132&r1=1754131&r2=1754132&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/SegmentDataStoreBlobGCIT.java (original)
+++ jackrabbit/oak/trunk/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/SegmentDataStoreBlobGCIT.java Tue Jul 26 14:05:56 2016
@@ -97,7 +97,14 @@ public class SegmentDataStoreBlobGCIT {
     SegmentGCOptions gcOptions = defaultGCOptions();
 
     @Rule
-    public TemporaryFolder folder = new TemporaryFolder(new File("target"));
+    public TemporaryFolder folder = new TemporaryFolder(new File("target")) {
+
+        @Override
+        public void delete() {
+            // Do nothing
+        }
+
+    };
 
     @BeforeClass
     public static void assumptions() {
@@ -395,6 +402,7 @@ public class SegmentDataStoreBlobGCIT {
         try {
             is = new FileInputStream(markedFiles.get(0));
             Set<String> records = FileIOUtils.readStringsAsSet(is, true);
+            assertEquals(expected.size(), records.size());
             assertEquals(expected, records);
         } finally {
             Closeables.close(is, false);

Modified: jackrabbit/oak/trunk/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/file/TarFileTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/file/TarFileTest.java?rev=1754132&r1=1754131&r2=1754132&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/file/TarFileTest.java (original)
+++ jackrabbit/oak/trunk/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/file/TarFileTest.java Tue Jul 26 14:05:56 2016
@@ -19,9 +19,9 @@
 package org.apache.jackrabbit.oak.segment.file;
 
 import static com.google.common.base.Charsets.UTF_8;
-import static com.google.common.collect.Sets.newHashSet;
+import static com.google.common.collect.Maps.newHashMap;
 import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
+import static org.mockito.internal.util.collections.Sets.newSet;
 
 import java.io.File;
 import java.io.IOException;
@@ -84,29 +84,45 @@ public class TarFileTest {
         try (TarWriter writer = new TarWriter(file)) {
             writer.writeEntry(0x00, 0x00, new byte[] {0x01, 0x02, 0x3}, 0, 3, 0);
 
-            writer.addBinaryReference(1, "r0");
-            writer.addBinaryReference(1, "r1");
-            writer.addBinaryReference(1, "r2");
-            writer.addBinaryReference(1, "r3");
-
-            writer.addBinaryReference(2, "r4");
-            writer.addBinaryReference(2, "r5");
-            writer.addBinaryReference(2, "r6");
+            writer.addBinaryReference(1, new UUID(1, 0), "r0");
+            writer.addBinaryReference(1, new UUID(1, 1), "r1");
+            writer.addBinaryReference(1, new UUID(1, 2), "r2");
+            writer.addBinaryReference(1, new UUID(1, 3), "r3");
+
+            writer.addBinaryReference(2, new UUID(2, 0), "r4");
+            writer.addBinaryReference(2, new UUID(2, 1), "r5");
+            writer.addBinaryReference(2, new UUID(2, 2), "r6");
 
-            writer.addBinaryReference(3, "r7");
-            writer.addBinaryReference(3, "r8");
+            writer.addBinaryReference(3, new UUID(3, 0), "r7");
+            writer.addBinaryReference(3, new UUID(3, 1), "r8");
         }
 
-        try (TarReader reader = TarReader.open(file, false)) {
-            Map<Integer, Set<String>> brf = reader.getBinaryReferences();
+        Map<UUID, Set<String>> one = newHashMap();
+
+        one.put(new UUID(1, 0), newSet("r0"));
+        one.put(new UUID(1, 1), newSet("r1"));
+        one.put(new UUID(1, 2), newSet("r2"));
+        one.put(new UUID(1, 3), newSet("r3"));
+
+        Map<UUID, Set<String>> two = newHashMap();
+
+        two.put(new UUID(2, 0), newSet("r4"));
+        two.put(new UUID(2, 1), newSet("r5"));
+        two.put(new UUID(2, 2), newSet("r6"));
 
-            assertNotNull(brf);
+        Map<UUID, Set<String>> three = newHashMap();
 
-            assertEquals(newHashSet(1, 2, 3), brf.keySet());
+        three.put(new UUID(3, 0), newSet("r7"));
+        three.put(new UUID(3, 1), newSet("r8"));
 
-            assertEquals(newHashSet("r0", "r1", "r2", "r3"), brf.get(1));
-            assertEquals(newHashSet("r4", "r5", "r6"), brf.get(2));
-            assertEquals(newHashSet("r7", "r8"), brf.get(3));
+        Map<Integer, Map<UUID, Set<String>>> expected = newHashMap();
+
+        expected.put(1, one);
+        expected.put(2, two);
+        expected.put(3, three);
+
+        try (TarReader reader = TarReader.open(file, false)) {
+            assertEquals(expected, reader.getBinaryReferences());
         }
     }