You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by ju...@apache.org on 2014/04/01 17:40:58 UTC

svn commit: r1583718 - in /jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file: FileAccess.java FileStore.java MappedAccess.java RandomAccess.java TarFile.java

Author: jukka
Date: Tue Apr  1 15:40:57 2014
New Revision: 1583718

URL: http://svn.apache.org/r1583718
Log:
OAK-631: SegmentMK: Implement garbage collection

Add checksums to tar entries for better sanity checks during recovery
Write new tar files during cleanup if doing so would save at least 25% space

Modified:
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileAccess.java
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileStore.java
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/MappedAccess.java
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/RandomAccess.java
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarFile.java

Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileAccess.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileAccess.java?rev=1583718&r1=1583717&r2=1583718&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileAccess.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileAccess.java Tue Apr  1 15:40:57 2014
@@ -23,6 +23,8 @@ interface FileAccess {
 
     int length() throws IOException;
 
+    long crc32(int position, int size) throws IOException;
+
     ByteBuffer read(int position, int length) throws IOException;
 
     void write(int position, byte[] b, int offset, int length)

Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileStore.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileStore.java?rev=1583718&r1=1583717&r2=1583718&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileStore.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/FileStore.java Tue Apr  1 15:40:57 2014
@@ -178,7 +178,7 @@ public class FileStore implements Segmen
                     segmentId.getLeastSignificantBits())) {
                 id = last;
             } else {
-                log.warn("Unable to committed revision {}, rewinding...", last);
+                log.warn("Unable to access revision {}, rewinding...", last);
             }
         }
 

Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/MappedAccess.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/MappedAccess.java?rev=1583718&r1=1583717&r2=1583718&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/MappedAccess.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/MappedAccess.java Tue Apr  1 15:40:57 2014
@@ -22,6 +22,7 @@ import java.io.IOException;
 import java.io.RandomAccessFile;
 import java.nio.ByteBuffer;
 import java.nio.MappedByteBuffer;
+import java.util.zip.CRC32;
 
 class MappedAccess implements FileAccess {
 
@@ -48,10 +49,23 @@ class MappedAccess implements FileAccess
     }
 
     @Override
+    public long crc32(int position, int length) {
+        ByteBuffer entry = buffer.asReadOnlyBuffer();
+        entry.position(entry.position() + position);
+
+        byte[] data = new byte[length];
+        entry.get(data);
+
+        CRC32 checksum = new CRC32();
+        checksum.update(data);
+        return checksum.getValue();
+    }
+
+    @Override
     public ByteBuffer read(int position, int length) {
         ByteBuffer entry = buffer.asReadOnlyBuffer();
-        entry.position(position);
-        entry.limit(position + length);
+        entry.position(entry.position() + position);
+        entry.limit(entry.position() + length);
         return entry.slice();
     }
 
@@ -60,7 +74,7 @@ class MappedAccess implements FileAccess
             int position, byte[] b, int offset, int length)
             throws IOException {
         ByteBuffer entry = buffer.duplicate();
-        entry.position(position);
+        entry.position(entry.position() + position);
         entry.put(b, offset, length);
         updated = true;
     }

Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/RandomAccess.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/RandomAccess.java?rev=1583718&r1=1583717&r2=1583718&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/RandomAccess.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/RandomAccess.java Tue Apr  1 15:40:57 2014
@@ -22,6 +22,7 @@ import static com.google.common.base.Pre
 import java.io.IOException;
 import java.io.RandomAccessFile;
 import java.nio.ByteBuffer;
+import java.util.zip.CRC32;
 
 import javax.annotation.Nonnull;
 
@@ -43,6 +44,13 @@ class RandomAccess implements FileAccess
     }
 
     @Override
+    public long crc32(int position, int length) throws IOException {
+        CRC32 checksum = new CRC32();
+        checksum.update(read(position, length).array());
+        return checksum.getValue();
+    }
+
+    @Override
     public synchronized ByteBuffer read(int position, int length)
             throws IOException {
         ByteBuffer entry = ByteBuffer.allocate(length);

Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarFile.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarFile.java?rev=1583718&r1=1583717&r2=1583718&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarFile.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarFile.java Tue Apr  1 15:40:57 2014
@@ -31,6 +31,8 @@ import java.util.Arrays;
 import java.util.Set;
 import java.util.UUID;
 import java.util.concurrent.ConcurrentMap;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 import java.util.zip.CRC32;
 
 import org.slf4j.Logger;
@@ -45,6 +47,11 @@ class TarFile {
     private static final int INDEX_MAGIC =
             ('\n' << 24) + ('0' << 16) + ('K' << 8) + '\n';
 
+    /** Pattern of the segment entry names */
+    private static final Pattern NAME_PATTERN = Pattern.compile(
+            "([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})"
+            + "(\\.([0-9a-f]{8}))?");
+
     /** The tar file block size. */
     private static final int BLOCK_SIZE = 512;
 
@@ -149,36 +156,14 @@ class TarFile {
                 + getEntrySize(indexSize + 24) // index with one extra entry
                 + 2 * BLOCK_SIZE               // two zero blocks at the end
                 > length) {
-            int bytes = length - position - 3 * BLOCK_SIZE;
-            writeEntryHeader(indexEntryName, bytes);
-
-            ByteBuffer index = ByteBuffer.allocate(indexSize);
-
-            TarEntry[] sorted =
-                    entries.values().toArray(new TarEntry[entries.size()]);
-            Arrays.sort(sorted, TarEntry.IDENTIFIER);
-            for (TarEntry entry : sorted) {
-                index.putLong(entry.msb());
-                index.putLong(entry.lsb());
-                index.putInt(entry.offset());
-                index.putInt(entry.size());
-            }
-
-            CRC32 checksum = new CRC32();
-            checksum.update(index.array(), 0, index.position());
-            index.putInt((int) checksum.getValue());
-            index.putInt(entries.size());
-            index.putInt(bytes);
-            index.putInt(INDEX_MAGIC);
-
-            access.write(
-                    length - 2 * BLOCK_SIZE - indexSize,
-                    index.array(), 0, indexSize);
-            position = length - 2 * BLOCK_SIZE;
+            writeIndex();
             return false;
         }
 
-        writeEntryHeader(uuid.toString().getBytes(UTF_8), size);
+        CRC32 checksum = new CRC32();
+        checksum.update(b, offset, size);
+        String name = String.format("%s.%08x", uuid, checksum.getValue());
+        writeEntryHeader(name.getBytes(UTF_8), size);
         access.write(position + BLOCK_SIZE, b, offset, size);
         entries.put(uuid, new TarEntry(
                 uuid.getMostSignificantBits(), uuid.getLeastSignificantBits(),
@@ -188,7 +173,40 @@ class TarFile {
         return true;
     }
 
-    protected void writeEntryHeader(byte[] name, int size) throws IOException {
+    private void writeIndex() throws IOException {
+        int length = access.length();
+        int indexSize = entries.size() * 24 + 16;
+        int bytes = length - position - 3 * BLOCK_SIZE;
+        writeEntryHeader(indexEntryName, bytes);
+
+        ByteBuffer index = ByteBuffer.allocate(indexSize);
+
+        TarEntry[] sorted =
+                entries.values().toArray(new TarEntry[entries.size()]);
+        Arrays.sort(sorted, TarEntry.IDENTIFIER);
+        for (TarEntry entry : sorted) {
+            index.putLong(entry.msb());
+            index.putLong(entry.lsb());
+            index.putInt(entry.offset());
+            index.putInt(entry.size());
+        }
+
+        CRC32 checksum = new CRC32();
+        checksum.update(index.array(), 0, index.position());
+        index.putInt((int) checksum.getValue());
+        index.putInt(entries.size());
+        index.putInt(bytes);
+        index.putInt(INDEX_MAGIC);
+
+        access.write(
+                length - 2 * BLOCK_SIZE - indexSize,
+                index.array(), 0, indexSize);
+        access.write(length - BLOCK_SIZE * 2, ZERO_BYTES, 0, BLOCK_SIZE);
+        access.write(length - BLOCK_SIZE,     ZERO_BYTES, 0, BLOCK_SIZE);
+        position = length;
+    }
+
+    private void writeEntryHeader(byte[] name, int size) throws IOException {
         byte[] header = new byte[BLOCK_SIZE];
 
         // File name
@@ -269,27 +287,47 @@ class TarFile {
             if (!referencedIds.remove(id)) {
                 // this segment is not referenced anywhere
                 sorted[i] = null;
-            } else if (isDataSegmentId(entry.lsb())) {
+            } else {
                 size += getEntrySize(entry.size());
                 count += 1;
-                // this is a referenced data segment, so follow the graph
-                ByteBuffer segment = access.read(
-                        entry.offset(),
-                        Math.min(entry.size(), 16 * 256));
-                int pos = segment.position();
-                int refcount = segment.get(pos + REF_COUNT_OFFSET) & 0xff;
-                int refend = pos + 16 * (refcount + 1);
-                for (int refpos = pos + 16; refpos < refend; refpos += 16) {
-                    referencedIds.add(new UUID(
-                            segment.getLong(refpos),
-                            segment.getLong(refpos + 8)));
+                if (isDataSegmentId(entry.lsb())) {
+                    // this is a referenced data segment, so follow the graph
+                    ByteBuffer segment = access.read(
+                            entry.offset(),
+                            Math.min(entry.size(), 16 * 256));
+                    int pos = segment.position();
+                    int refcount = segment.get(pos + REF_COUNT_OFFSET) & 0xff;
+                    int refend = pos + 16 * (refcount + 1);
+                    for (int refpos = pos + 16; refpos < refend; refpos += 16) {
+                        referencedIds.add(new UUID(
+                                segment.getLong(refpos),
+                                segment.getLong(refpos + 8)));
+                    }
                 }
             }
         }
+        size += getEntrySize(24 * count + 16);
+        size += 2 * BLOCK_SIZE;
 
         // check if we could free up at least 25% of space
         if (entries == null && size < access.length() * 3 / 4) {
-            // TODO: write new tar file
+            String name = file.getName();
+            int pos = name.length() - "a.tar".length();
+            char generation = name.charAt(pos);
+            name = name.substring(0, pos) + (char) (generation + 1) + ".tar";
+            File newFile = new File(file.getParentFile(), name);
+            TarFile newTar = new TarFile(newFile, size, false);
+            for (int i = sorted.length - 1; i >= 0; i--) {
+                TarEntry entry = sorted[i];
+                if (entry != null) {
+                    byte[] buffer = new byte[entry.size()];
+                    access.read(entry.offset(), entry.size()).get(buffer);
+                    UUID id = new UUID(entry.msb(), entry.lsb());
+                    newTar.writeEntry(id, buffer, 0, buffer.length);
+                }
+            }
+            newTar.writeIndex();
+            newTar.close();
         }
     }
 
@@ -460,8 +498,9 @@ class TarFile {
         while (position + 2 * BLOCK_SIZE <= limit) {
             // read the tar header block
             ByteBuffer header = access.read(position, BLOCK_SIZE);
+            int pos = header.position();
             String name = readString(header, 100);
-            header.position(124);
+            header.position(pos + 124);
             int size = readNumber(header, 12);
 
             // TODO: verify the checksum, magic, etc.?
@@ -471,11 +510,26 @@ class TarFile {
             } else if (Arrays.equals(name.getBytes(UTF_8), indexEntryName)) {
                 break; // index entry encountered, so stop here
             } else if (position + BLOCK_SIZE + size > limit) {
+                log.warn("Invalid tar entry: " + name);
                 break; // invalid entry, truncate the file at this point
             }
 
+            Matcher matcher = NAME_PATTERN.matcher(name);
+            if (!matcher.matches()) {
+                log.warn("Unexpected tar entry name: " + name);
+                break;
+            }
+
+            String checksum = matcher.group(3);
+            if (checksum != null
+                    && Long.parseLong(checksum, 16)
+                    !=  access.crc32(position + BLOCK_SIZE, size)) {
+                log.warn("Checksum mismatch in tar entry: " + name);
+                break;
+            }
+
             try {
-                UUID id = UUID.fromString(name);
+                UUID id = UUID.fromString(matcher.group(1));
                 entries.put(id, new TarEntry(
                         id.getMostSignificantBits(),
                         id.getLeastSignificantBits(),