You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jackrabbit.apache.org by ju...@apache.org on 2010/10/14 14:28:33 UTC
svn commit: r1022487 - in
/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util:
BundleReader.java BundleWriter.java
Author: jukka
Date: Thu Oct 14 12:28:31 2010
New Revision: 1022487
URL: http://svn.apache.org/viewvc?rev=1022487&view=rev
Log:
JCR-2762: Optimize bundle serialization
Use a variable-length encoding for internal counts and lengths that are never negative and almost always small.
Optimize the property state header from 12 to 2 bytes in most cases.
Drop serialization of definition identifiers as they are no longer used in Jackrabbit.
Modified:
jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleReader.java
jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleWriter.java
Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleReader.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleReader.java?rev=1022487&r1=1022486&r2=1022487&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleReader.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleReader.java Thu Oct 14 12:28:31 2010
@@ -35,7 +35,8 @@ import java.math.BigDecimal;
import javax.jcr.PropertyType;
/**
- * Bundle deserializater.
+ * Bundle deserializater. See the {@link BundleWriter} class for details of
+ * the serialization format.
*
* @see BundleWriter
*/
@@ -100,8 +101,10 @@ class BundleReader {
// parentUUID
bundle.setParentId(readNodeId());
- // definitionId
- in.readUTF();
+ if (version < BundleBinding.VERSION_3) {
+ // definitionId
+ in.readUTF();
+ }
// mixin types
Set<Name> mixinTypeNames = new HashSet<Name>();
@@ -137,7 +140,9 @@ class BundleReader {
}
// read modcount, since version 1.0
- if (version >= BundleBinding.VERSION_1) {
+ if (version >= BundleBinding.VERSION_3) {
+ bundle.setModCount((short) readInt());
+ } else if (version >= BundleBinding.VERSION_1) {
bundle.setModCount(in.readShort());
}
@@ -166,23 +171,47 @@ class BundleReader {
private NodePropBundle.PropertyEntry readPropertyEntry(PropertyId id)
throws IOException {
NodePropBundle.PropertyEntry entry = new NodePropBundle.PropertyEntry(id);
- // type and modcount
- int type = in.readInt();
- entry.setModCount((short) ((type >> 16) & 0x0ffff));
- type &= 0x0ffff;
- entry.setType(type);
-
- // multiValued
- entry.setMultiValued(in.readBoolean());
- // definitionId
- in.readUTF();
+
+ int count = 1;
+ if (version >= BundleBinding.VERSION_3) {
+ int b = in.readUnsignedByte();
+
+ entry.setType(b & 0x0f);
+
+ int len = b >>> 4;
+ if (len != 0) {
+ entry.setMultiValued(true);
+ if (len == 0x0f) {
+ count = readInt() + 0x0f - 1;
+ } else {
+ count = len - 1;
+ }
+ }
+
+ entry.setModCount((short) readInt());
+ } else {
+ // type and modcount
+ int type = in.readInt();
+ entry.setModCount((short) ((type >> 16) & 0x0ffff));
+ type &= 0x0ffff;
+ entry.setType(type);
+
+ // multiValued
+ entry.setMultiValued(in.readBoolean());
+
+ // definitionId
+ in.readUTF();
+
+ // count
+ count = in.readInt();
+ }
+
// values
- int count = in.readInt(); // count
InternalValue[] values = new InternalValue[count];
String[] blobIds = new String[count];
for (int i = 0; i < count; i++) {
InternalValue val;
- switch (type) {
+ switch (entry.getType()) {
case PropertyType.BINARY:
int size = in.readInt();
if (size == BundleBinding.BINARY_IN_DATA_STORE) {
@@ -237,10 +266,16 @@ class BundleReader {
default:
// because writeUTF(String) has a size limit of 64k,
// Strings are serialized as <length><byte[]>
- int len = in.readInt();
+ int len;
+ if (version >= BundleBinding.VERSION_3) {
+ len = readInt();
+ } else {
+ len = in.readInt();
+ }
byte[] bytes = new byte[len];
in.readFully(bytes);
- val = InternalValue.valueOf(new String(bytes, "UTF-8"), type);
+ val = InternalValue.valueOf(
+ new String(bytes, "UTF-8"), entry.getType());
}
values[i] = val;
}
@@ -320,8 +355,6 @@ class BundleReader {
/**
* Deserializes a name written using bundle serialization version 3.
- * See the {@link BundleWriter} class for details of the serialization
- * format.
*
* @return deserialized name
* @throws IOException if an I/O error occurs
@@ -356,4 +389,20 @@ class BundleReader {
}
}
+ /**
+ * Deserializes a variable-length integer written using bundle
+ * serialization version 3.
+ *
+ * @return deserialized name
+ * @throws IOException if an I/O error occurs
+ */
+ private int readInt() throws IOException {
+ int b = in.readUnsignedByte();
+ if ((b & 0x80) == 0) {
+ return b;
+ } else {
+ return readInt() << 7 | b & 0x7f;
+ }
+ }
+
}
Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleWriter.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleWriter.java?rev=1022487&r1=1022486&r2=1022487&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleWriter.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleWriter.java Thu Oct 14 12:28:31 2010
@@ -89,9 +89,6 @@ class BundleWriter {
// parentUUID
writeNodeId(bundle.getParentId());
- // definitionId
- out.writeUTF("");
-
// mixin types
for (Name name : bundle.getMixinTypeNames()) {
writeName(name);
@@ -127,7 +124,7 @@ class BundleWriter {
writeNodeId(null);
// write mod count
- out.writeShort(bundle.getModCount());
+ writeInt(bundle.getModCount());
// write shared set
for (NodeId nodeId: bundle.getSharedSet()) {
@@ -140,22 +137,55 @@ class BundleWriter {
}
/**
- * Serializes a <code>PropertyState</code> to the data output stream
+ * Serializes a property entry. The serialization begins with a single
+ * byte that encodes the type and multi-valuedness of the property:
+ * <pre>
+ * +-------------------------------+
+ * | mv count | type |
+ * +-------------------------------+
+ * </pre>
+ * <p>
+ * The lower four bits encode the property type (0-12 in JCR 2.0) and
+ * higher bits indicate whether this is a multi-valued property and how
+ * many property values there are. A value of 0 is reserved for
+ * single-valued properties (that are guaranteed to always have just a
+ * single value), and all non-zero values indicate a multi-valued property.
+ * <p>
+ * In multi-valued properties the exact value of the "mv count" field is
+ * the number of property values plus one and truncated at 15 (the highest
+ * four-bit value). If there are 14 or more (14 + 1 == 15) property values,
+ * then the number of additional values is serialized as a variable-length
+ * integer (see {@link #writeInt(int)}) right after this byte.
+ * <p>
+ * The modification count of the property state is written next as a
+ * variable-length integer, followed by the serializations of all the
+ * values of this property.
*
* @param state the property entry to store
* @throws IOException if an I/O error occurs.
*/
private void writeState(NodePropBundle.PropertyEntry state)
throws IOException {
- // type & mod count
- out.writeInt(state.getType() | (state.getModCount() << 16));
- // multiValued
- out.writeBoolean(state.isMultiValued());
- // definitionId
- out.writeUTF("");
- // values
InternalValue[] values = state.getValues();
- out.writeInt(values.length); // count
+
+ int type = state.getType();
+ assert 0 <= type && type <= 0x0f;
+ if (state.isMultiValued()) {
+ int len = values.length + 1;
+ if (len < 0x0f) {
+ out.writeByte(len << 4 | type);
+ } else {
+ out.writeByte(0xf0 | type);
+ writeInt(len - 0x0f);
+ }
+ } else {
+ assert values.length == 1;
+ out.writeByte(type);
+ }
+
+ writeInt(state.getModCount());
+
+ // values
for (int i = 0; i < values.length; i++) {
InternalValue val = values[i];
switch (state.getType()) {
@@ -282,7 +312,7 @@ class BundleWriter {
// because writeUTF(String) has a size limit of 64k,
// we're using write(byte[]) instead
byte[] bytes = val.toString().getBytes("UTF-8");
- out.writeInt(bytes.length); // length of byte[]
+ writeInt(bytes.length); // length of byte[]
out.write(bytes); // byte[]
}
}
@@ -428,4 +458,38 @@ class BundleWriter {
}
}
+ /**
+ * Serializes an integer using a variable-length encoding that favors
+ * small positive numbers. The serialization consists of one to five
+ * bytes of the following format:
+ * <pre>
+ * +-------------------------------+
+ * | c | 7 least significant bits |
+ * +-------------------------------+
+ * </pre>
+ * <p>
+ * If the given integer fits in seven bits (i.e. the value between
+ * 0 and 127, inclusive), then it is written as-is in a single byte.
+ * Otherwise the continuation flag <code>c</code> is set and the least
+ * significant seven bits are written together with the flag as a single
+ * byte. The integer is then shifed right seven bits and the process
+ * continues from the beginning.
+ * <p>
+ * This format uses a single byte for values 0-127, two bytes for
+ * 128-16343, three for 16343-2097151, four for 2097152-268435455
+ * and five bytes for all other 32-bit numbers (including negative ones).
+ *
+ * @param integer integer value
+ * @throws IOException if an I/O error occurs
+ */
+ private void writeInt(int value) throws IOException {
+ int b = value & 0x7f;
+ if (b == value) {
+ out.writeByte(b);
+ } else {
+ out.writeByte(b | 0x80);
+ writeInt(value >>> 7);
+ }
+ }
+
}