You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by ju...@apache.org on 2013/02/12 21:44:12 UTC

svn commit: r1445349 - in /jackrabbit/oak/trunk/oak-core/src: main/java/org/apache/jackrabbit/oak/plugins/segment/ test/java/org/apache/jackrabbit/oak/plugins/segment/

Author: jukka
Date: Tue Feb 12 20:44:12 2013
New Revision: 1445349

URL: http://svn.apache.org/r1445349
Log:
OAK-593: Segment-based MK

Improved space-efficiency of storing short values

Modified:
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentReader.java
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentStream.java
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentWriter.java
    jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentSizeTest.java

Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentReader.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentReader.java?rev=1445349&r1=1445348&r2=1445349&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentReader.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentReader.java Tue Feb 12 20:44:12 2013
@@ -21,11 +21,6 @@ import static com.google.common.base.Pre
 import static com.google.common.base.Preconditions.checkPositionIndexes;
 import static org.apache.jackrabbit.oak.plugins.segment.SegmentWriter.BLOCK_SIZE;
 
-import java.io.IOException;
-
-import com.google.common.base.Charsets;
-import com.google.common.io.ByteStreams;
-
 public class SegmentReader {
 
     private final SegmentStore store;
@@ -37,15 +32,7 @@ public class SegmentReader {
     public String readString(RecordId recordId) {
         SegmentStream stream = readStream(recordId);
         try {
-            if (stream.getLength() > Integer.MAX_VALUE) {
-                throw new IllegalStateException(
-                        "Too long value: " + stream.getLength());
-            }
-            byte[] data = new byte[(int) stream.getLength()];
-            ByteStreams.readFully(stream, data);
-            return new String(data, Charsets.UTF_8);
-        } catch (IOException e) {
-            throw new IllegalStateException("Unexpected IOException", e);
+            return stream.getString();
         } finally {
             stream.close();
         }
@@ -54,11 +41,32 @@ public class SegmentReader {
     public SegmentStream readStream(RecordId recordId) {
         Segment segment = store.readSegment(recordId.getSegmentId());
         int offset = recordId.getOffset();
-        long length = segment.readLong(offset);
-        int size = (int) ((length + BLOCK_SIZE - 1) / BLOCK_SIZE);
-        ListRecord list =
-            new ListRecord(segment.readRecordId(offset + 8), size);
-        return new SegmentStream(this, recordId, list, length);
+        int length = segment.readByte(offset++) & 0xff;
+        if ((length & 0x80) == 0) {
+            byte[] data = new byte[length];
+            segment.readBytes(offset, data, 0, length);
+            return new SegmentStream(recordId, data);
+        } else if ((length & 0x40) == 0) {
+            length = (length & 0x3f) << 8;
+            length |= segment.readByte(offset++) & 0xff;
+            length += 0x80;
+            byte[] data = new byte[length];
+            segment.readBytes(offset, data, 0, length);
+            return new SegmentStream(recordId, data);
+        } else {
+            long l = ((long) length & 0x3f) << 56
+                    | ((long) (segment.readByte(offset++) & 0xff)) << 48
+                    | ((long) (segment.readByte(offset++) & 0xff)) << 40
+                    | ((long) (segment.readByte(offset++) & 0xff)) << 32
+                    | ((long) (segment.readByte(offset++) & 0xff)) << 24
+                    | ((long) (segment.readByte(offset++) & 0xff)) << 16
+                    | ((long) (segment.readByte(offset++) & 0xff)) << 8
+                    | ((long) (segment.readByte(offset++) & 0xff));
+            int size = (int) ((l + BLOCK_SIZE - 1) / BLOCK_SIZE);
+            ListRecord list =
+                    new ListRecord(segment.readRecordId(offset), size);
+            return new SegmentStream(this, recordId, list, l);
+        }
     }
 
     public int readInt(RecordId recordId, int position) {

Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentStream.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentStream.java?rev=1445349&r1=1445348&r2=1445349&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentStream.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentStream.java Tue Feb 12 20:44:12 2013
@@ -21,10 +21,14 @@ import static com.google.common.base.Pre
 import static com.google.common.base.Preconditions.checkPositionIndexes;
 import static org.apache.jackrabbit.oak.plugins.segment.SegmentWriter.BLOCK_SIZE;
 
+import java.io.IOException;
 import java.io.InputStream;
 
 import javax.annotation.CheckForNull;
 
+import com.google.common.base.Charsets;
+import com.google.common.io.ByteStreams;
+
 public class SegmentStream extends InputStream {
 
     @CheckForNull
@@ -43,6 +47,8 @@ public class SegmentStream extends Input
 
     private final RecordId recordId;
 
+    private final byte[] inline;
+
     private final ListRecord blocks;
 
     private final long length;
@@ -56,15 +62,44 @@ public class SegmentStream extends Input
             ListRecord blocks, long length) {
         this.reader = checkNotNull(reader);
         this.recordId = checkNotNull(recordId);
+        this.inline = null;
         this.blocks = checkNotNull(blocks);
         checkArgument(length >= 0);
         this.length = length;
     }
 
+    SegmentStream(RecordId recordId, byte[] inline) {
+        this.reader = null;
+        this.recordId = checkNotNull(recordId);
+        this.inline = checkNotNull(inline);
+        this.blocks = null;
+        this.length = inline.length;
+    }
+
     public long getLength() {
         return length;
     }
 
+    public String getString() {
+        if (inline != null) {
+            return new String(inline, Charsets.UTF_8);
+        } else if (length > Integer.MAX_VALUE) {
+            throw new IllegalStateException("Too long value: " + length);
+        } else {
+            SegmentStream stream =
+                    new SegmentStream(reader, recordId, blocks, length);
+            try {
+                byte[] data = new byte[(int) length];
+                ByteStreams.readFully(stream, data);
+                return new String(data, Charsets.UTF_8);
+            } catch (IOException e) {
+                throw new IllegalStateException("Unexpected IOException", e);
+            } finally {
+                stream.close();
+            }
+        }
+    }
+
     @Override
     public boolean markSupported() {
         return true;
@@ -103,6 +138,13 @@ public class SegmentStream extends Input
             return 0;
         } else if (position == length) {
             return -1;
+        } else if (inline != null) {
+            if (position + len > length) {
+                len = (int) (length - position);
+            }
+            System.arraycopy(inline, (int) position, b, off, len);
+            position += len;
+            return len;
         } else {
             int blockIndex = (int) (position / SegmentWriter.BLOCK_SIZE);
             int blockOffset = (int) (position % SegmentWriter.BLOCK_SIZE);

Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentWriter.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentWriter.java?rev=1445349&r1=1445348&r2=1445349&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentWriter.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentWriter.java Tue Feb 12 20:44:12 2013
@@ -19,6 +19,7 @@ package org.apache.jackrabbit.oak.plugin
 import static com.google.common.base.Preconditions.checkNotNull;
 import static com.google.common.base.Preconditions.checkPositionIndexes;
 
+import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.nio.ByteBuffer;
@@ -254,7 +255,7 @@ public class SegmentWriter {
     private synchronized RecordId writeValueRecord(
             long length, RecordId blocks) {
         RecordId valueId = prepare(8, Collections.singleton(blocks));
-        buffer.putLong(length);
+        buffer.putLong(length | (0x3L << 62));
         writeRecordId(blocks);
         return valueId;
     }
@@ -322,24 +323,11 @@ public class SegmentWriter {
         RecordId id = strings.get(string);
         if (id == null) {
             byte[] data = string.getBytes(Charsets.UTF_8);
-            List<RecordId> blockIds = new ArrayList<RecordId>();
-
-            int headLength = Math.min(data.length, INLINE_SIZE);
-            writeInlineBlocks(blockIds, data, 0, headLength);
-            if (data.length > headLength) {
-                int offset = headLength;
-                while (offset + INLINE_SIZE <= data.length) {
-                    int bulkLength =
-                        Math.min(data.length - offset, blockSegmentSize);
-                    writeBulkSegment(blockIds, data, offset, bulkLength);
-                    offset += bulkLength;
-                }
-                if (offset < data.length) {
-                    writeInlineBlocks(blockIds, data, offset, data.length - offset);
-                }
+            try {
+                id = writeStream(new ByteArrayInputStream(data));
+            } catch (IOException e) {
+                throw new IllegalStateException("Unexpected IOException", e);
             }
-
-            id = writeValueRecord(data.length, writeList(blockIds));
             strings.put(string, id);
         }
         return id;
@@ -355,45 +343,56 @@ public class SegmentWriter {
      */
     public RecordId writeStream(InputStream stream) throws IOException {
         RecordId id = SegmentStream.getRecordIdIfAvailable(stream);
-        if (id != null) {
-            return id;
-        }
+        if (id == null) {
+            try {
+                List<RecordId> blockIds = new ArrayList<RecordId>();
 
-        try {
-            List<RecordId> blockIds = new ArrayList<RecordId>();
+                // First read the head of the stream. This covers most small
+                // values and the frequently accessed head of larger ones.
+                // The head gets inlined in the current segment.
+                byte[] head = new byte[INLINE_SIZE];
+                int headLength = ByteStreams.read(stream, head, 0, head.length);
+
+                if (headLength < 0x80) {
+                    id = prepare(1 + headLength);
+                    buffer.put((byte) headLength);
+                    buffer.put(head, 0, headLength);
+                } else if (headLength - 0x80 < 0x4000) {
+                    id = prepare(2 + headLength);
+                    buffer.putShort((short) ((headLength - 0x80) | 0x8000));
+                    buffer.put(head, 0, headLength);
+                } else {
+                    writeInlineBlocks(blockIds, head, 0, headLength);
+                    long length = headLength;
+
+                    // If the stream filled the full head buffer, it's likely
+                    // that the bulk of the data is still to come. Read it
+                    // in larger chunks and save in separate segments.
+                    if (headLength == head.length) {
+                        byte[] bulk = new byte[blockSegmentSize];
+                        int bulkLength = ByteStreams.read(
+                                stream, bulk, 0, bulk.length);
+                        while (bulkLength > INLINE_SIZE) {
+                            writeBulkSegment(blockIds, bulk, 0, bulkLength);
+                            length += bulkLength;
+                            bulkLength = ByteStreams.read(
+                                    stream, bulk, 0, bulk.length);
+                        }
+                        // The tail chunk of the stream is too small to put in
+                        // a separate segment, so we inline also it.
+                        if (bulkLength > 0) {
+                            writeInlineBlocks(blockIds, bulk, 0, bulkLength);
+                            length += bulkLength;
+                        }
+                    }
 
-            // First read the head of the stream. This covers most small
-            // binaries and the frequently accessed head of larger ones.
-            // The head gets inlined in the current segment.
-            byte[] head = new byte[INLINE_SIZE];
-            int headLength = ByteStreams.read(stream, head, 0, head.length);
-
-            writeInlineBlocks(blockIds, head, 0, headLength);
-            long length = headLength;
-
-            // If the stream filled the full head buffer, it's likely that
-            // the bulk of the data is still to come. Read it in larger
-            // chunks and save in separate segments.
-            if (headLength == head.length) {
-                byte[] bulk = new byte[blockSegmentSize];
-                int bulkLength = ByteStreams.read(stream, bulk, 0, bulk.length);
-                while (bulkLength > INLINE_SIZE) {
-                    writeBulkSegment(blockIds, bulk, 0, bulkLength);
-                    length += bulkLength;
-                    bulkLength = ByteStreams.read(stream, bulk, 0, bulk.length);
-                }
-                // The tail chunk of the stream is too small to put in
-                // a separate segment, so we inline also it.
-                if (bulkLength > 0) {
-                    writeInlineBlocks(blockIds, bulk, 0, bulkLength);
-                    length += bulkLength;
+                    id = writeValueRecord(length, writeList(blockIds));
                 }
+            } finally {
+                stream.close();
             }
-
-            return writeValueRecord(length, writeList(blockIds));
-        } finally {
-            stream.close();
         }
+        return id;
     }
 
     private RecordId writeProperty(PropertyState state) {

Modified: jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentSizeTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentSizeTest.java?rev=1445349&r1=1445348&r2=1445349&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentSizeTest.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentSizeTest.java Tue Feb 12 20:44:12 2013
@@ -40,21 +40,21 @@ public class SegmentSizeTest {
 
         builder = MemoryNodeState.EMPTY_NODE.builder();
         builder.setProperty("foo", "bar");
-        assertEquals(70, getSize(builder));
+        assertEquals(48, getSize(builder));
 
         builder = MemoryNodeState.EMPTY_NODE.builder();
         builder.setProperty("foo", "bar");
         builder.setProperty("baz", 123);
-        assertEquals(124, getSize(builder));
+        assertEquals(80, getSize(builder));
 
         builder = MemoryNodeState.EMPTY_NODE.builder();
         builder.child("foo");
-        assertEquals(59, getSize(builder));
+        assertEquals(48, getSize(builder));
 
         builder = MemoryNodeState.EMPTY_NODE.builder();
         builder.child("foo");
         builder.child("bar");
-        assertEquals(102, getSize(builder));
+        assertEquals(80, getSize(builder));
     }
 
     @Test