You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by ju...@apache.org on 2013/02/12 21:44:12 UTC
svn commit: r1445349 - in /jackrabbit/oak/trunk/oak-core/src:
main/java/org/apache/jackrabbit/oak/plugins/segment/
test/java/org/apache/jackrabbit/oak/plugins/segment/
Author: jukka
Date: Tue Feb 12 20:44:12 2013
New Revision: 1445349
URL: http://svn.apache.org/r1445349
Log:
OAK-593: Segment-based MK
Improved space-efficiency of storing short values
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentReader.java
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentStream.java
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentWriter.java
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentSizeTest.java
Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentReader.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentReader.java?rev=1445349&r1=1445348&r2=1445349&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentReader.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentReader.java Tue Feb 12 20:44:12 2013
@@ -21,11 +21,6 @@ import static com.google.common.base.Pre
import static com.google.common.base.Preconditions.checkPositionIndexes;
import static org.apache.jackrabbit.oak.plugins.segment.SegmentWriter.BLOCK_SIZE;
-import java.io.IOException;
-
-import com.google.common.base.Charsets;
-import com.google.common.io.ByteStreams;
-
public class SegmentReader {
private final SegmentStore store;
@@ -37,15 +32,7 @@ public class SegmentReader {
public String readString(RecordId recordId) {
SegmentStream stream = readStream(recordId);
try {
- if (stream.getLength() > Integer.MAX_VALUE) {
- throw new IllegalStateException(
- "Too long value: " + stream.getLength());
- }
- byte[] data = new byte[(int) stream.getLength()];
- ByteStreams.readFully(stream, data);
- return new String(data, Charsets.UTF_8);
- } catch (IOException e) {
- throw new IllegalStateException("Unexpected IOException", e);
+ return stream.getString();
} finally {
stream.close();
}
@@ -54,11 +41,32 @@ public class SegmentReader {
public SegmentStream readStream(RecordId recordId) {
Segment segment = store.readSegment(recordId.getSegmentId());
int offset = recordId.getOffset();
- long length = segment.readLong(offset);
- int size = (int) ((length + BLOCK_SIZE - 1) / BLOCK_SIZE);
- ListRecord list =
- new ListRecord(segment.readRecordId(offset + 8), size);
- return new SegmentStream(this, recordId, list, length);
+ int length = segment.readByte(offset++) & 0xff;
+ if ((length & 0x80) == 0) {
+ byte[] data = new byte[length];
+ segment.readBytes(offset, data, 0, length);
+ return new SegmentStream(recordId, data);
+ } else if ((length & 0x40) == 0) {
+ length = (length & 0x3f) << 8;
+ length |= segment.readByte(offset++) & 0xff;
+ length += 0x80;
+ byte[] data = new byte[length];
+ segment.readBytes(offset, data, 0, length);
+ return new SegmentStream(recordId, data);
+ } else {
+ long l = ((long) length & 0x3f) << 56
+ | ((long) (segment.readByte(offset++) & 0xff)) << 48
+ | ((long) (segment.readByte(offset++) & 0xff)) << 40
+ | ((long) (segment.readByte(offset++) & 0xff)) << 32
+ | ((long) (segment.readByte(offset++) & 0xff)) << 24
+ | ((long) (segment.readByte(offset++) & 0xff)) << 16
+ | ((long) (segment.readByte(offset++) & 0xff)) << 8
+ | ((long) (segment.readByte(offset++) & 0xff));
+ int size = (int) ((l + BLOCK_SIZE - 1) / BLOCK_SIZE);
+ ListRecord list =
+ new ListRecord(segment.readRecordId(offset), size);
+ return new SegmentStream(this, recordId, list, l);
+ }
}
public int readInt(RecordId recordId, int position) {
Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentStream.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentStream.java?rev=1445349&r1=1445348&r2=1445349&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentStream.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentStream.java Tue Feb 12 20:44:12 2013
@@ -21,10 +21,14 @@ import static com.google.common.base.Pre
import static com.google.common.base.Preconditions.checkPositionIndexes;
import static org.apache.jackrabbit.oak.plugins.segment.SegmentWriter.BLOCK_SIZE;
+import java.io.IOException;
import java.io.InputStream;
import javax.annotation.CheckForNull;
+import com.google.common.base.Charsets;
+import com.google.common.io.ByteStreams;
+
public class SegmentStream extends InputStream {
@CheckForNull
@@ -43,6 +47,8 @@ public class SegmentStream extends Input
private final RecordId recordId;
+ private final byte[] inline;
+
private final ListRecord blocks;
private final long length;
@@ -56,15 +62,44 @@ public class SegmentStream extends Input
ListRecord blocks, long length) {
this.reader = checkNotNull(reader);
this.recordId = checkNotNull(recordId);
+ this.inline = null;
this.blocks = checkNotNull(blocks);
checkArgument(length >= 0);
this.length = length;
}
+ SegmentStream(RecordId recordId, byte[] inline) {
+ this.reader = null;
+ this.recordId = checkNotNull(recordId);
+ this.inline = checkNotNull(inline);
+ this.blocks = null;
+ this.length = inline.length;
+ }
+
public long getLength() {
return length;
}
+ public String getString() {
+ if (inline != null) {
+ return new String(inline, Charsets.UTF_8);
+ } else if (length > Integer.MAX_VALUE) {
+ throw new IllegalStateException("Too long value: " + length);
+ } else {
+ SegmentStream stream =
+ new SegmentStream(reader, recordId, blocks, length);
+ try {
+ byte[] data = new byte[(int) length];
+ ByteStreams.readFully(stream, data);
+ return new String(data, Charsets.UTF_8);
+ } catch (IOException e) {
+ throw new IllegalStateException("Unexpected IOException", e);
+ } finally {
+ stream.close();
+ }
+ }
+ }
+
@Override
public boolean markSupported() {
return true;
@@ -103,6 +138,13 @@ public class SegmentStream extends Input
return 0;
} else if (position == length) {
return -1;
+ } else if (inline != null) {
+ if (position + len > length) {
+ len = (int) (length - position);
+ }
+ System.arraycopy(inline, (int) position, b, off, len);
+ position += len;
+ return len;
} else {
int blockIndex = (int) (position / SegmentWriter.BLOCK_SIZE);
int blockOffset = (int) (position % SegmentWriter.BLOCK_SIZE);
Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentWriter.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentWriter.java?rev=1445349&r1=1445348&r2=1445349&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentWriter.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentWriter.java Tue Feb 12 20:44:12 2013
@@ -19,6 +19,7 @@ package org.apache.jackrabbit.oak.plugin
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Preconditions.checkPositionIndexes;
+import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
@@ -254,7 +255,7 @@ public class SegmentWriter {
private synchronized RecordId writeValueRecord(
long length, RecordId blocks) {
RecordId valueId = prepare(8, Collections.singleton(blocks));
- buffer.putLong(length);
+ buffer.putLong(length | (0x3L << 62));
writeRecordId(blocks);
return valueId;
}
@@ -322,24 +323,11 @@ public class SegmentWriter {
RecordId id = strings.get(string);
if (id == null) {
byte[] data = string.getBytes(Charsets.UTF_8);
- List<RecordId> blockIds = new ArrayList<RecordId>();
-
- int headLength = Math.min(data.length, INLINE_SIZE);
- writeInlineBlocks(blockIds, data, 0, headLength);
- if (data.length > headLength) {
- int offset = headLength;
- while (offset + INLINE_SIZE <= data.length) {
- int bulkLength =
- Math.min(data.length - offset, blockSegmentSize);
- writeBulkSegment(blockIds, data, offset, bulkLength);
- offset += bulkLength;
- }
- if (offset < data.length) {
- writeInlineBlocks(blockIds, data, offset, data.length - offset);
- }
+ try {
+ id = writeStream(new ByteArrayInputStream(data));
+ } catch (IOException e) {
+ throw new IllegalStateException("Unexpected IOException", e);
}
-
- id = writeValueRecord(data.length, writeList(blockIds));
strings.put(string, id);
}
return id;
@@ -355,45 +343,56 @@ public class SegmentWriter {
*/
public RecordId writeStream(InputStream stream) throws IOException {
RecordId id = SegmentStream.getRecordIdIfAvailable(stream);
- if (id != null) {
- return id;
- }
+ if (id == null) {
+ try {
+ List<RecordId> blockIds = new ArrayList<RecordId>();
- try {
- List<RecordId> blockIds = new ArrayList<RecordId>();
+ // First read the head of the stream. This covers most small
+ // values and the frequently accessed head of larger ones.
+ // The head gets inlined in the current segment.
+ byte[] head = new byte[INLINE_SIZE];
+ int headLength = ByteStreams.read(stream, head, 0, head.length);
+
+ if (headLength < 0x80) {
+ id = prepare(1 + headLength);
+ buffer.put((byte) headLength);
+ buffer.put(head, 0, headLength);
+ } else if (headLength - 0x80 < 0x4000) {
+ id = prepare(2 + headLength);
+ buffer.putShort((short) ((headLength - 0x80) | 0x8000));
+ buffer.put(head, 0, headLength);
+ } else {
+ writeInlineBlocks(blockIds, head, 0, headLength);
+ long length = headLength;
+
+ // If the stream filled the full head buffer, it's likely
+ // that the bulk of the data is still to come. Read it
+ // in larger chunks and save in separate segments.
+ if (headLength == head.length) {
+ byte[] bulk = new byte[blockSegmentSize];
+ int bulkLength = ByteStreams.read(
+ stream, bulk, 0, bulk.length);
+ while (bulkLength > INLINE_SIZE) {
+ writeBulkSegment(blockIds, bulk, 0, bulkLength);
+ length += bulkLength;
+ bulkLength = ByteStreams.read(
+ stream, bulk, 0, bulk.length);
+ }
+ // The tail chunk of the stream is too small to put in
+ // a separate segment, so we inline also it.
+ if (bulkLength > 0) {
+ writeInlineBlocks(blockIds, bulk, 0, bulkLength);
+ length += bulkLength;
+ }
+ }
- // First read the head of the stream. This covers most small
- // binaries and the frequently accessed head of larger ones.
- // The head gets inlined in the current segment.
- byte[] head = new byte[INLINE_SIZE];
- int headLength = ByteStreams.read(stream, head, 0, head.length);
-
- writeInlineBlocks(blockIds, head, 0, headLength);
- long length = headLength;
-
- // If the stream filled the full head buffer, it's likely that
- // the bulk of the data is still to come. Read it in larger
- // chunks and save in separate segments.
- if (headLength == head.length) {
- byte[] bulk = new byte[blockSegmentSize];
- int bulkLength = ByteStreams.read(stream, bulk, 0, bulk.length);
- while (bulkLength > INLINE_SIZE) {
- writeBulkSegment(blockIds, bulk, 0, bulkLength);
- length += bulkLength;
- bulkLength = ByteStreams.read(stream, bulk, 0, bulk.length);
- }
- // The tail chunk of the stream is too small to put in
- // a separate segment, so we inline also it.
- if (bulkLength > 0) {
- writeInlineBlocks(blockIds, bulk, 0, bulkLength);
- length += bulkLength;
+ id = writeValueRecord(length, writeList(blockIds));
}
+ } finally {
+ stream.close();
}
-
- return writeValueRecord(length, writeList(blockIds));
- } finally {
- stream.close();
}
+ return id;
}
private RecordId writeProperty(PropertyState state) {
Modified: jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentSizeTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentSizeTest.java?rev=1445349&r1=1445348&r2=1445349&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentSizeTest.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentSizeTest.java Tue Feb 12 20:44:12 2013
@@ -40,21 +40,21 @@ public class SegmentSizeTest {
builder = MemoryNodeState.EMPTY_NODE.builder();
builder.setProperty("foo", "bar");
- assertEquals(70, getSize(builder));
+ assertEquals(48, getSize(builder));
builder = MemoryNodeState.EMPTY_NODE.builder();
builder.setProperty("foo", "bar");
builder.setProperty("baz", 123);
- assertEquals(124, getSize(builder));
+ assertEquals(80, getSize(builder));
builder = MemoryNodeState.EMPTY_NODE.builder();
builder.child("foo");
- assertEquals(59, getSize(builder));
+ assertEquals(48, getSize(builder));
builder = MemoryNodeState.EMPTY_NODE.builder();
builder.child("foo");
builder.child("bar");
- assertEquals(102, getSize(builder));
+ assertEquals(80, getSize(builder));
}
@Test