You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by ju...@apache.org on 2014/04/29 23:00:37 UTC
svn commit: r1591101 - in /jackrabbit/oak/trunk/oak-core/src:
main/java/org/apache/jackrabbit/oak/plugins/segment/file/
test/java/org/apache/jackrabbit/oak/plugins/segment/file/
Author: jukka
Date: Tue Apr 29 21:00:36 2014
New Revision: 1591101
URL: http://svn.apache.org/r1591101
Log:
OAK-1780: Faster TarMK cleanup
Store a precompiled version of the segment graph at the end of the tar file along with the segment index.
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarReader.java
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarWriter.java
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/file/TarFileTest.java
Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarReader.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarReader.java?rev=1591101&r1=1591100&r2=1591101&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarReader.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarReader.java Tue Apr 29 21:00:36 2014
@@ -18,12 +18,15 @@ package org.apache.jackrabbit.oak.plugin
import static com.google.common.base.Charsets.UTF_8;
import static com.google.common.collect.Lists.newArrayList;
+import static com.google.common.collect.Lists.newArrayListWithCapacity;
+import static com.google.common.collect.Maps.newHashMap;
import static com.google.common.collect.Maps.newLinkedHashMap;
import static com.google.common.collect.Maps.newTreeMap;
import static com.google.common.collect.Sets.newHashSetWithExpectedSize;
import static java.util.Collections.singletonList;
import static org.apache.jackrabbit.oak.plugins.segment.Segment.REF_COUNT_OFFSET;
import static org.apache.jackrabbit.oak.plugins.segment.SegmentId.isDataSegmentId;
+import static org.apache.jackrabbit.oak.plugins.segment.file.TarWriter.GRAPH_MAGIC;
import java.io.File;
import java.io.IOException;
@@ -416,17 +419,64 @@ class TarReader {
}
}
+ /**
+ * Loads the optional pre-compiled graph entry from the given tar file.
+ *
+ * @return graph buffer, or {@code null} if one was not found
+ * @throws IOException if the tar file could not be read
+ */
+ private static ByteBuffer loadGraph(
+ File file, FileAccess access, ByteBuffer index) throws IOException {
+ // read the graph metadata just before the tar index entry
+ int pos = access.length() - 2 * BLOCK_SIZE - getEntrySize(index.remaining());
+ ByteBuffer meta = access.read(pos - 16, 16);
+ int crc32 = meta.getInt();
+ int count = meta.getInt();
+ int bytes = meta.getInt();
+ int magic = meta.getInt();
+
+ if (magic != GRAPH_MAGIC) {
+ return null; // magic byte mismatch
+ }
+
+ if (count < 0 || bytes < count * 24 + 16 || BLOCK_SIZE + bytes > pos) {
+ log.warn("Invalid graph metadata in tar file {}", file);
+ return null; // impossible uuid and/or byte counts
+ }
+
+ // this involves seeking backwards in the file, which might not
+ // perform well, but that's OK since we only do this once per file
+ ByteBuffer graph = access.read(pos - bytes, bytes);
+
+ byte[] b = new byte[bytes - 16];
+ graph.mark();
+ graph.get(b);
+ graph.reset();
+
+ CRC32 checksum = new CRC32();
+ checksum.update(b);
+ if (crc32 != (int) checksum.getValue()) {
+ log.warn("Invalid graph checksum in tar file {}", file);
+ return null; // checksum mismatch
+ }
+
+ return graph;
+ }
+
private final File file;
private final FileAccess access;
private final ByteBuffer index;
+ private final ByteBuffer graph;
+
private TarReader(File file, FileAccess access, ByteBuffer index)
throws IOException {
this.file = file;
this.access = access;
this.index = index;
+ this.graph = loadGraph(file, access, index);
}
Set<UUID> getUUIDs() {
@@ -501,6 +551,11 @@ class TarReader {
}
synchronized TarReader cleanup(Set<UUID> referencedIds) throws IOException {
+ Map<UUID, List<UUID>> graph = null;
+ if (this.graph != null) {
+ graph = parseGraph();
+ }
+
TarEntry[] sorted = new TarEntry[index.remaining() / 24];
int position = index.position();
for (int i = 0; position < index.limit(); i++) {
@@ -527,16 +582,26 @@ class TarReader {
if (isDataSegmentId(entry.lsb())) {
// this is a referenced data segment, so follow the graph
- ByteBuffer segment = access.read(
- entry.offset(),
- Math.min(entry.size(), 16 * 256));
- int pos = segment.position();
- int refcount = segment.get(pos + REF_COUNT_OFFSET) & 0xff;
- int refend = pos + 16 * (refcount + 1);
- for (int refpos = pos + 16; refpos < refend; refpos += 16) {
- referencedIds.add(new UUID(
- segment.getLong(refpos),
- segment.getLong(refpos + 8)));
+ if (graph != null) {
+ List<UUID> refids = graph.get(
+ new UUID(entry.msb(), entry.lsb()));
+ if (refids != null) {
+ referencedIds.addAll(refids);
+ }
+ } else {
+ // a pre-compiled graph is not available, so read the
+ // references directly from this segment
+ ByteBuffer segment = access.read(
+ entry.offset(),
+ Math.min(entry.size(), 16 * 256));
+ int pos = segment.position();
+ int refcount = segment.get(pos + REF_COUNT_OFFSET) & 0xff;
+ int refend = pos + 16 * (refcount + 1);
+ for (int refpos = pos + 16; refpos < refend; refpos += 16) {
+ referencedIds.add(new UUID(
+ segment.getLong(refpos),
+ segment.getLong(refpos + 8)));
+ }
}
}
}
@@ -544,12 +609,14 @@ class TarReader {
size += getEntrySize(24 * count + 16);
size += 2 * BLOCK_SIZE;
- if (count == 0) {
- // none of the entries within this tar file are referenceable
- return null;
- } else if (size >= access.length() * 3 / 4) {
- // the space savings are not worth it at less than 25%
- return this;
+ if (graph != null) {
+ if (count == 0) {
+ // none of the entries within this tar file are referenceable
+ return null;
+ } else if (size >= access.length() * 3 / 4) {
+ // the space savings are not worth it at less than 25%
+ return this;
+ }
}
String name = file.getName();
@@ -592,6 +659,31 @@ class TarReader {
//-----------------------------------------------------------< private >--
+ private Map<UUID, List<UUID>> parseGraph() throws IOException {
+ int count = graph.getInt(graph.limit() - 12);
+
+ ByteBuffer buffer = graph.duplicate();
+ buffer.limit(graph.limit() - 16);
+
+ List<UUID> uuids = newArrayListWithCapacity(count);
+ for (int i = 0; i < count; i++) {
+ uuids.add(new UUID(buffer.getLong(), buffer.getLong()));
+ }
+
+ Map<UUID, List<UUID>> graph = newHashMap();
+ while (buffer.hasRemaining()) {
+ UUID uuid = uuids.get(buffer.getInt());
+ List<UUID> list = newArrayList();
+ int refid = buffer.getInt();
+ while (refid != -1) {
+ list.add(uuids.get(refid));
+ refid = buffer.getInt();
+ }
+ graph.put(uuid, list);
+ }
+ return graph;
+ }
+
private static String readString(ByteBuffer buffer, int fieldSize) {
byte[] b = new byte[fieldSize];
buffer.get(b);
Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarWriter.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarWriter.java?rev=1591101&r1=1591100&r2=1591101&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarWriter.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarWriter.java Tue Apr 29 21:00:36 2014
@@ -21,6 +21,7 @@ import static com.google.common.base.Pre
import static com.google.common.base.Preconditions.checkPositionIndexes;
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.collect.Maps.newHashMap;
+import static com.google.common.collect.Maps.newTreeMap;
import static com.google.common.collect.Sets.newHashSet;
import static org.apache.jackrabbit.oak.plugins.segment.Segment.REF_COUNT_OFFSET;
import static org.apache.jackrabbit.oak.plugins.segment.SegmentId.isDataSegmentId;
@@ -31,14 +32,19 @@ import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.SortedMap;
import java.util.UUID;
import java.util.zip.CRC32;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import com.google.common.collect.Lists;
+
class TarWriter {
/** Logger instance */
@@ -48,6 +54,10 @@ class TarWriter {
static final int INDEX_MAGIC =
('\n' << 24) + ('0' << 16) + ('K' << 8) + '\n';
+ /** Magic byte sequence at the end of the graph block. */
+ static final int GRAPH_MAGIC =
+ ('\n' << 24) + ('0' << 16) + ('G' << 8) + '\n';
+
/** The tar file block size. */
static final int BLOCK_SIZE = 512;
@@ -95,6 +105,8 @@ class TarWriter {
private final Set<UUID> references = newHashSet();
+ private final SortedMap<UUID, List<UUID>> graph = newTreeMap();
+
TarWriter(File file) {
this.file = file;
}
@@ -165,11 +177,20 @@ class TarWriter {
ByteBuffer segment = ByteBuffer.wrap(data, offset, size);
int pos = segment.position();
int refcount = segment.get(pos + REF_COUNT_OFFSET) & 0xff;
- int refend = pos + 16 * (refcount + 1);
- for (int refpos = pos + 16; refpos < refend; refpos += 16) {
- references.add(new UUID(
- segment.getLong(refpos),
- segment.getLong(refpos + 8)));
+ if (refcount != 0) {
+ int refend = pos + 16 * (refcount + 1);
+ List<UUID> list = Lists.newArrayListWithCapacity(refcount);
+ for (int refpos = pos + 16; refpos < refend; refpos += 16) {
+ UUID refid = new UUID(
+ segment.getLong(refpos),
+ segment.getLong(refpos + 8));
+ if (!index.containsKey(refid)) {
+ references.add(refid);
+ }
+ list.add(refid);
+ }
+ Collections.sort(list);
+ graph.put(uuid, list);
}
}
@@ -220,45 +241,99 @@ class TarWriter {
return;
}
- // Complete the tar file by adding the index and the trailing two
- // zero blocks. This code is synchronized on the file instance to
- // ensure that no concurrent thread is still flushing the file when
- // we close the file handle.
+ // Complete the tar file by adding the graph, the index and the
+ // trailing two zero blocks. This code is synchronized on the file
+ // instance to ensure that no concurrent thread is still flushing
+ // the file when we close the file handle.
synchronized (file) {
- int indexSize = index.size() * 24 + 16;
- int padding = getPaddingSize(indexSize);
+ writeGraph();
+ writeIndex();
+ access.write(ZERO_BYTES);
+ access.write(ZERO_BYTES);
+ access.close();
+ }
+ }
- String indexName = file.getName() + ".idx";
- byte[] header = newEntryHeader(indexName, indexSize);
+ private void writeGraph() throws IOException {
+ List<UUID> uuids = Lists.newArrayListWithCapacity(
+ index.size() + references.size());
+ uuids.addAll(index.keySet());
+ uuids.addAll(references);
+ Collections.sort(uuids);
+
+ int graphSize = uuids.size() * 16 + 16;
+ for (List<UUID> list : graph.values()) {
+ graphSize += 4 + list.size() * 4 + 4;
+ }
+ int padding = getPaddingSize(graphSize);
- ByteBuffer buffer = ByteBuffer.allocate(indexSize);
- TarEntry[] sorted = index.values().toArray(new TarEntry[index.size()]);
- Arrays.sort(sorted, TarEntry.IDENTIFIER_ORDER);
- for (TarEntry entry : sorted) {
- buffer.putLong(entry.msb());
- buffer.putLong(entry.lsb());
- buffer.putInt(entry.offset());
- buffer.putInt(entry.size());
- }
+ String graphName = file.getName() + ".gph";
+ byte[] header = newEntryHeader(graphName, graphSize + padding);
+
+ ByteBuffer buffer = ByteBuffer.allocate(graphSize);
- CRC32 checksum = new CRC32();
- checksum.update(buffer.array(), 0, buffer.position());
- buffer.putInt((int) checksum.getValue());
- buffer.putInt(index.size());
- buffer.putInt(padding + indexSize);
- buffer.putInt(INDEX_MAGIC);
-
- access.write(header);
- if (padding > 0) {
- // padding comes *before* the index!
- access.write(ZERO_BYTES, 0, padding);
+ Map<UUID, Integer> refmap = newHashMap();
+
+ int index = 0;
+ for (UUID uuid : uuids) {
+ buffer.putLong(uuid.getMostSignificantBits());
+ buffer.putLong(uuid.getLeastSignificantBits());
+ refmap.put(uuid, index++);
+ }
+
+ for (Map.Entry<UUID, List<UUID>> entry : graph.entrySet()) {
+ buffer.putInt(refmap.get(entry.getKey()));
+ for (UUID refid : entry.getValue()) {
+ buffer.putInt(refmap.get(refid));
}
- access.write(buffer.array());
- access.write(ZERO_BYTES);
- access.write(ZERO_BYTES);
- access.close();
- buffer = null;
+ buffer.putInt(-1);
+ }
+
+ CRC32 checksum = new CRC32();
+ checksum.update(buffer.array(), 0, buffer.position());
+ buffer.putInt((int) checksum.getValue());
+ buffer.putInt(uuids.size());
+ buffer.putInt(graphSize);
+ buffer.putInt(GRAPH_MAGIC);
+
+ access.write(header);
+ if (padding > 0) {
+ // padding comes *before* the graph!
+ access.write(ZERO_BYTES, 0, padding);
+ }
+ access.write(buffer.array());
+ }
+
+ private void writeIndex() throws IOException {
+ int indexSize = index.size() * 24 + 16;
+ int padding = getPaddingSize(indexSize);
+
+ String indexName = file.getName() + ".idx";
+ byte[] header = newEntryHeader(indexName, indexSize + padding);
+
+ ByteBuffer buffer = ByteBuffer.allocate(indexSize);
+ TarEntry[] sorted = index.values().toArray(new TarEntry[index.size()]);
+ Arrays.sort(sorted, TarEntry.IDENTIFIER_ORDER);
+ for (TarEntry entry : sorted) {
+ buffer.putLong(entry.msb());
+ buffer.putLong(entry.lsb());
+ buffer.putInt(entry.offset());
+ buffer.putInt(entry.size());
+ }
+
+ CRC32 checksum = new CRC32();
+ checksum.update(buffer.array(), 0, buffer.position());
+ buffer.putInt((int) checksum.getValue());
+ buffer.putInt(index.size());
+ buffer.putInt(padding + indexSize);
+ buffer.putInt(INDEX_MAGIC);
+
+ access.write(header);
+ if (padding > 0) {
+ // padding comes *before* the index!
+ access.write(ZERO_BYTES, 0, padding);
}
+ access.write(buffer.array());
}
private byte[] newEntryHeader(String name, int size) throws IOException {
Modified: jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/file/TarFileTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/file/TarFileTest.java?rev=1591101&r1=1591100&r2=1591101&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/file/TarFileTest.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/file/TarFileTest.java Tue Apr 29 21:00:36 2014
@@ -51,7 +51,7 @@ public class TarFileTest {
writer.close();
}
- assertEquals(3072, file.length());
+ assertEquals(4096, file.length());
TarReader reader = TarReader.open(file, false);
try {