You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by fr...@apache.org on 2016/07/26 14:05:56 UTC
svn commit: r1754132 - in /jackrabbit/oak/trunk/oak-segment-tar/src:
main/java/org/apache/jackrabbit/oak/segment/
main/java/org/apache/jackrabbit/oak/segment/file/
test/java/org/apache/jackrabbit/oak/segment/
test/java/org/apache/jackrabbit/oak/segment...
Author: frm
Date: Tue Jul 26 14:05:56 2016
New Revision: 1754132
URL: http://svn.apache.org/viewvc?rev=1754132&view=rev
Log:
OAK-4603 - Group binary references by owning segment in the index
Knowing which segment a binary reference belongs to is crucial during
compaction. When a new generation of a TAR file is created and some segments
are garbage collected in the process, the corresponding binary references
should be removed from the index, too.
Modified:
jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/BinaryReferenceConsumer.java
jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/BinaryReferenceConsumers.java
jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentWriter.java
jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/FileStore.java
jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/TarReader.java
jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/TarWriter.java
jackrabbit/oak/trunk/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/SegmentDataStoreBlobGCIT.java
jackrabbit/oak/trunk/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/file/TarFileTest.java
Modified: jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/BinaryReferenceConsumer.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/BinaryReferenceConsumer.java?rev=1754132&r1=1754131&r2=1754132&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/BinaryReferenceConsumer.java (original)
+++ jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/BinaryReferenceConsumer.java Tue Jul 26 14:05:56 2016
@@ -17,6 +17,8 @@
package org.apache.jackrabbit.oak.segment;
+import java.util.UUID;
+
/**
* A consumer for references to external binaries. An implementor of this
* interface is called every time an external binary reference is written in the
@@ -29,9 +31,10 @@ public interface BinaryReferenceConsumer
*
* @param generation The generation of the record referencing the
* binary.
+ * @param segmentId The ID of the segment this reference belongs to.
* @param binaryReference The opaque string representation of the binary
* reference.
*/
- void consume(int generation, String binaryReference);
+ void consume(int generation, UUID segmentId, String binaryReference);
}
Modified: jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/BinaryReferenceConsumers.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/BinaryReferenceConsumers.java?rev=1754132&r1=1754131&r2=1754132&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/BinaryReferenceConsumers.java (original)
+++ jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/BinaryReferenceConsumers.java Tue Jul 26 14:05:56 2016
@@ -17,6 +17,8 @@
package org.apache.jackrabbit.oak.segment;
+import java.util.UUID;
+
/**
* Utility methods to work with {@link BinaryReferenceConsumer} instances.
*/
@@ -32,7 +34,7 @@ public class BinaryReferenceConsumers {
return new BinaryReferenceConsumer() {
@Override
- public void consume(int generation, String binaryReference) {
+ public void consume(int generation, UUID segmentId, String binaryReference) {
// Discard the binary reference
}
Modified: jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentWriter.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentWriter.java?rev=1754132&r1=1754131&r2=1754132&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentWriter.java (original)
+++ jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentWriter.java Tue Jul 26 14:05:56 2016
@@ -808,7 +808,7 @@ public class SegmentWriter {
recordId = RecordWriters.newBlobIdWriter(writeString(blobId)).write(writer);
}
- binaryReferenceConsumer.consume(writer.getGeneration(), blobId);
+ binaryReferenceConsumer.consume(writer.getGeneration(), recordId.asUUID(), blobId);
return recordId;
}
Modified: jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/FileStore.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/FileStore.java?rev=1754132&r1=1754131&r2=1754132&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/FileStore.java (original)
+++ jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/FileStore.java Tue Jul 26 14:05:56 2016
@@ -246,10 +246,10 @@ public class FileStore implements Segmen
this.binaryReferenceConsumer = new BinaryReferenceConsumer() {
@Override
- public void consume(int generation, String binaryReference) {
+ public void consume(int generation, UUID segmentId, String binaryReference) {
fileStoreLock.writeLock().lock();
try {
- tarWriter.addBinaryReference(generation, binaryReference);
+ tarWriter.addBinaryReference(generation, segmentId, binaryReference);
} finally {
fileStoreLock.writeLock().unlock();
}
@@ -1335,6 +1335,7 @@ public class FileStore implements Segmen
directory,
String.format(FILE_NAME_FORMAT, writeNumber, "a"));
tarWriter = new TarWriter(writeFile, stats);
+ log.info("New TAR writer {}", tarWriter);
}
}
Modified: jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/TarReader.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/TarReader.java?rev=1754132&r1=1754131&r2=1754132&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/TarReader.java (original)
+++ jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/TarReader.java Tue Jul 26 14:05:56 2016
@@ -58,6 +58,7 @@ import javax.annotation.Nonnull;
import com.google.common.base.Charsets;
import com.google.common.base.Predicate;
+import com.google.common.collect.Sets;
import org.apache.commons.io.FileUtils;
import org.apache.jackrabbit.oak.plugins.blob.ReferenceCollector;
import org.apache.jackrabbit.oak.segment.SegmentGraph.SegmentGraphVisitor;
@@ -740,19 +741,21 @@ class TarReader implements Closeable {
* @param minGeneration
*/
void collectBlobReferences(ReferenceCollector collector, int minGeneration) {
- Map<Integer, Set<String>> references = getBinaryReferences();
+ Map<Integer, Map<UUID, Set<String>>> generations = getBinaryReferences();
- if (references == null) {
+ if (generations == null) {
return;
}
- for (Entry<Integer, Set<String>> entry : references.entrySet()) {
+ for (Entry<Integer, Map<UUID, Set<String>>> entry : generations.entrySet()) {
if (entry.getKey() < minGeneration) {
continue;
}
- for (String reference : entry.getValue()) {
- collector.addReference(reference, null);
+ for (Set<String> references : entry.getValue().values()) {
+ for (String reference : references) {
+ collector.addReference(reference, null);
+ }
}
}
}
@@ -961,7 +964,7 @@ class TarReader implements Closeable {
return getEntrySize(buffer.getInt(buffer.limit() - 8));
}
- Map<Integer, Set<String>> getBinaryReferences() {
+ Map<Integer, Map<UUID, Set<String>>> getBinaryReferences() {
ByteBuffer buffer;
try {
@@ -1015,27 +1018,37 @@ class TarReader implements Closeable {
return buffer;
}
- private Map<Integer, Set<String>> parseBinaryReferences(ByteBuffer buffer) {
+ private Map<Integer, Map<UUID, Set<String>>> parseBinaryReferences(ByteBuffer buffer) {
int nGenerations = buffer.getInt(buffer.limit() - 12);
- Map<Integer, Set<String>> binaryReferences = newHashMapWithExpectedSize(nGenerations);
+ Map<Integer, Map<UUID, Set<String>>> binaryReferences = newHashMapWithExpectedSize(nGenerations);
for (int i = 0; i < nGenerations; i++) {
int generation = buffer.getInt();
- int nReferences = buffer.getInt();
+ int segmentCount = buffer.getInt();
+
+ Map<UUID, Set<String>> segments = newHashMapWithExpectedSize(segmentCount);
+
+ for (int j = 0; j < segmentCount; j++) {
+ long msb = buffer.getLong();
+ long lsb = buffer.getLong();
+ int referenceCount = buffer.getInt();
- Set<String> references = newHashSetWithExpectedSize(nReferences);
+ Set<String> references = Sets.newHashSetWithExpectedSize(referenceCount);
- for (int j = 0; j < nReferences; j++) {
- int length = buffer.getInt();
+ for (int k = 0; k < referenceCount; k++) {
+ int length = buffer.getInt();
- byte[] data = new byte[length];
- buffer.get(data);
+ byte[] data = new byte[length];
+ buffer.get(data);
+
+ references.add(new String(data, Charsets.UTF_8));
+ }
- references.add(new String(data, Charsets.UTF_8));
+ segments.put(new UUID(msb, lsb), references);
}
- binaryReferences.put(generation, references);
+ binaryReferences.put(generation, segments);
}
return binaryReferences;
Modified: jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/TarWriter.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/TarWriter.java?rev=1754132&r1=1754131&r2=1754132&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/TarWriter.java (original)
+++ jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/TarWriter.java Tue Jul 26 14:05:56 2016
@@ -162,7 +162,7 @@ class TarWriter implements Closeable {
/**
* List of binary references contained in this TAR file.
*/
- private final Map<Integer, Set<String>> binaryReferences = newHashMap();
+ private final Map<Integer, Map<UUID, Set<String>>> binaryReferences = newHashMap();
TarWriter(File file) {
this(file, FileStoreMonitor.DEFAULT);
@@ -282,12 +282,19 @@ class TarWriter implements Closeable {
return currentLength;
}
- void addBinaryReference(int generation, String reference) {
- Set<String> references = binaryReferences.get(generation);
+ void addBinaryReference(int generation, UUID segmentId, String reference) {
+ Map<UUID, Set<String>> segmentToReferences = binaryReferences.get(generation);
+
+ if (segmentToReferences == null) {
+ segmentToReferences = newHashMap();
+ binaryReferences.put(generation, segmentToReferences);
+ }
+
+ Set<String> references = segmentToReferences.get(segmentId);
if (references == null) {
references = newHashSet();
- binaryReferences.put(generation, references);
+ segmentToReferences.put(segmentId, references);
}
references.add(reference);
@@ -386,37 +393,53 @@ class TarWriter implements Closeable {
// The following information are stored as part of the main content of
// this entry, after the optional padding.
- for (Set<String> references : binaryReferences.values()) {
+ for (Map<UUID, Set<String>> segmentToReferences : binaryReferences.values()) {
// 4 bytes per generation to store the generation number itself.
binaryReferenceSize += 4;
- // 4 bytes per generation to store the amount of binary references
- // associated to the generation.
+ // 4 bytes per generation to store the number of segments.
binaryReferenceSize += 4;
- for (String reference : references) {
- // 4 bytes for each reference to store the length of the reference.
+ for (Set<String> references : segmentToReferences.values()) {
+ // 16 bytes per segment identifier.
+ binaryReferenceSize += 16;
+
+ // 4 bytes to store the number of references for this segment.
binaryReferenceSize += 4;
- // A variable amount of bytes, depending on the reference itself.
- binaryReferenceSize += reference.getBytes(Charsets.UTF_8).length;
+ for (String reference : references) {
+ // 4 bytes for each reference to store the length of the reference.
+ binaryReferenceSize += 4;
+
+ // A variable amount of bytes, depending on the reference itself.
+ binaryReferenceSize += reference.getBytes(Charsets.UTF_8).length;
+ }
}
}
ByteBuffer buffer = ByteBuffer.allocate(binaryReferenceSize);
- for (Entry<Integer, Set<String>> entry : binaryReferences.entrySet()) {
- int generation = entry.getKey();
- Set<String> references = entry.getValue();
+ for (Entry<Integer, Map<UUID, Set<String>>> be : binaryReferences.entrySet()) {
+ int generation = be.getKey();
+ Map<UUID, Set<String>> segmentToReferences = be.getValue();
buffer.putInt(generation);
- buffer.putInt(references.size());
+ buffer.putInt(segmentToReferences.size());
+
+ for (Entry<UUID, Set<String>> se : segmentToReferences.entrySet()) {
+ UUID segmentId = se.getKey();
+ Set<String> references = se.getValue();
- for (String reference : references) {
- byte[] bytes = reference.getBytes(Charsets.UTF_8);
+ buffer.putLong(segmentId.getMostSignificantBits());
+ buffer.putLong(segmentId.getLeastSignificantBits());
+ buffer.putInt(references.size());
- buffer.putInt(bytes.length);
- buffer.put(bytes);
+ for (String reference : references) {
+ byte[] bytes = reference.getBytes(Charsets.UTF_8);
+
+ buffer.putInt(bytes.length);
+ buffer.put(bytes);
+ }
}
}
Modified: jackrabbit/oak/trunk/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/SegmentDataStoreBlobGCIT.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/SegmentDataStoreBlobGCIT.java?rev=1754132&r1=1754131&r2=1754132&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/SegmentDataStoreBlobGCIT.java (original)
+++ jackrabbit/oak/trunk/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/SegmentDataStoreBlobGCIT.java Tue Jul 26 14:05:56 2016
@@ -97,7 +97,14 @@ public class SegmentDataStoreBlobGCIT {
SegmentGCOptions gcOptions = defaultGCOptions();
@Rule
- public TemporaryFolder folder = new TemporaryFolder(new File("target"));
+ public TemporaryFolder folder = new TemporaryFolder(new File("target")) {
+
+ @Override
+ public void delete() {
+ // Do nothing
+ }
+
+ };
@BeforeClass
public static void assumptions() {
@@ -395,6 +402,7 @@ public class SegmentDataStoreBlobGCIT {
try {
is = new FileInputStream(markedFiles.get(0));
Set<String> records = FileIOUtils.readStringsAsSet(is, true);
+ assertEquals(expected.size(), records.size());
assertEquals(expected, records);
} finally {
Closeables.close(is, false);
Modified: jackrabbit/oak/trunk/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/file/TarFileTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/file/TarFileTest.java?rev=1754132&r1=1754131&r2=1754132&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/file/TarFileTest.java (original)
+++ jackrabbit/oak/trunk/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/file/TarFileTest.java Tue Jul 26 14:05:56 2016
@@ -19,9 +19,9 @@
package org.apache.jackrabbit.oak.segment.file;
import static com.google.common.base.Charsets.UTF_8;
-import static com.google.common.collect.Sets.newHashSet;
+import static com.google.common.collect.Maps.newHashMap;
import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
+import static org.mockito.internal.util.collections.Sets.newSet;
import java.io.File;
import java.io.IOException;
@@ -84,29 +84,45 @@ public class TarFileTest {
try (TarWriter writer = new TarWriter(file)) {
writer.writeEntry(0x00, 0x00, new byte[] {0x01, 0x02, 0x3}, 0, 3, 0);
- writer.addBinaryReference(1, "r0");
- writer.addBinaryReference(1, "r1");
- writer.addBinaryReference(1, "r2");
- writer.addBinaryReference(1, "r3");
-
- writer.addBinaryReference(2, "r4");
- writer.addBinaryReference(2, "r5");
- writer.addBinaryReference(2, "r6");
+ writer.addBinaryReference(1, new UUID(1, 0), "r0");
+ writer.addBinaryReference(1, new UUID(1, 1), "r1");
+ writer.addBinaryReference(1, new UUID(1, 2), "r2");
+ writer.addBinaryReference(1, new UUID(1, 3), "r3");
+
+ writer.addBinaryReference(2, new UUID(2, 0), "r4");
+ writer.addBinaryReference(2, new UUID(2, 1), "r5");
+ writer.addBinaryReference(2, new UUID(2, 2), "r6");
- writer.addBinaryReference(3, "r7");
- writer.addBinaryReference(3, "r8");
+ writer.addBinaryReference(3, new UUID(3, 0), "r7");
+ writer.addBinaryReference(3, new UUID(3, 1), "r8");
}
- try (TarReader reader = TarReader.open(file, false)) {
- Map<Integer, Set<String>> brf = reader.getBinaryReferences();
+ Map<UUID, Set<String>> one = newHashMap();
+
+ one.put(new UUID(1, 0), newSet("r0"));
+ one.put(new UUID(1, 1), newSet("r1"));
+ one.put(new UUID(1, 2), newSet("r2"));
+ one.put(new UUID(1, 3), newSet("r3"));
+
+ Map<UUID, Set<String>> two = newHashMap();
+
+ two.put(new UUID(2, 0), newSet("r4"));
+ two.put(new UUID(2, 1), newSet("r5"));
+ two.put(new UUID(2, 2), newSet("r6"));
- assertNotNull(brf);
+ Map<UUID, Set<String>> three = newHashMap();
- assertEquals(newHashSet(1, 2, 3), brf.keySet());
+ three.put(new UUID(3, 0), newSet("r7"));
+ three.put(new UUID(3, 1), newSet("r8"));
- assertEquals(newHashSet("r0", "r1", "r2", "r3"), brf.get(1));
- assertEquals(newHashSet("r4", "r5", "r6"), brf.get(2));
- assertEquals(newHashSet("r7", "r8"), brf.get(3));
+ Map<Integer, Map<UUID, Set<String>>> expected = newHashMap();
+
+ expected.put(1, one);
+ expected.put(2, two);
+ expected.put(3, three);
+
+ try (TarReader reader = TarReader.open(file, false)) {
+ assertEquals(expected, reader.getBinaryReferences());
}
}