You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jackrabbit.apache.org by ju...@apache.org on 2010/10/14 14:28:33 UTC

svn commit: r1022487 - in /jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util: BundleReader.java BundleWriter.java

Author: jukka
Date: Thu Oct 14 12:28:31 2010
New Revision: 1022487

URL: http://svn.apache.org/viewvc?rev=1022487&view=rev
Log:
JCR-2762: Optimize bundle serialization

Use a variable-length encoding for internal counts and lengths that are never negative and almost always small.

Optimize the property state header from 12 to 2 bytes in most cases.

Drop serialization of definition identifiers as they are no longer used in Jackrabbit.

Modified:
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleReader.java
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleWriter.java

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleReader.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleReader.java?rev=1022487&r1=1022486&r2=1022487&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleReader.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleReader.java Thu Oct 14 12:28:31 2010
@@ -35,7 +35,8 @@ import java.math.BigDecimal;
 import javax.jcr.PropertyType;
 
 /**
- * Bundle deserializater.
+ * Bundle deserializater. See the {@link BundleWriter} class for details of
+ * the serialization format.
  *
  * @see BundleWriter
  */
@@ -100,8 +101,10 @@ class BundleReader {
         // parentUUID
         bundle.setParentId(readNodeId());
 
-        // definitionId
-        in.readUTF();
+        if (version < BundleBinding.VERSION_3) {
+            // definitionId
+            in.readUTF();
+        }
 
         // mixin types
         Set<Name> mixinTypeNames = new HashSet<Name>();
@@ -137,7 +140,9 @@ class BundleReader {
         }
 
         // read modcount, since version 1.0
-        if (version >= BundleBinding.VERSION_1) {
+        if (version >= BundleBinding.VERSION_3) {
+            bundle.setModCount((short) readInt());
+        } else if (version >= BundleBinding.VERSION_1) {
             bundle.setModCount(in.readShort());
         }
 
@@ -166,23 +171,47 @@ class BundleReader {
     private NodePropBundle.PropertyEntry readPropertyEntry(PropertyId id)
             throws IOException {
         NodePropBundle.PropertyEntry entry = new NodePropBundle.PropertyEntry(id);
-        // type and modcount
-        int type = in.readInt();
-        entry.setModCount((short) ((type >> 16) & 0x0ffff));
-        type &= 0x0ffff;
-        entry.setType(type);
-
-        // multiValued
-        entry.setMultiValued(in.readBoolean());
-        // definitionId
-        in.readUTF();
+
+        int count = 1;
+        if (version >= BundleBinding.VERSION_3) {
+            int b = in.readUnsignedByte();
+
+            entry.setType(b & 0x0f);
+
+            int len = b >>> 4;
+            if (len != 0) {
+                entry.setMultiValued(true);
+                if (len == 0x0f) {
+                    count = readInt() + 0x0f - 1;
+                } else {
+                    count = len - 1;
+                }
+            }
+
+            entry.setModCount((short) readInt());
+        } else {
+            // type and modcount
+            int type = in.readInt();
+            entry.setModCount((short) ((type >> 16) & 0x0ffff));
+            type &= 0x0ffff;
+            entry.setType(type);
+
+            // multiValued
+            entry.setMultiValued(in.readBoolean());
+
+            // definitionId
+            in.readUTF();
+
+            // count
+            count = in.readInt();
+        }
+
         // values
-        int count = in.readInt();   // count
         InternalValue[] values = new InternalValue[count];
         String[] blobIds = new String[count];
         for (int i = 0; i < count; i++) {
             InternalValue val;
-            switch (type) {
+            switch (entry.getType()) {
                 case PropertyType.BINARY:
                     int size = in.readInt();
                     if (size == BundleBinding.BINARY_IN_DATA_STORE) {
@@ -237,10 +266,16 @@ class BundleReader {
                 default:
                     // because writeUTF(String) has a size limit of 64k,
                     // Strings are serialized as <length><byte[]>
-                    int len = in.readInt();
+                    int len;
+                    if (version >= BundleBinding.VERSION_3) {
+                        len = readInt();
+                    } else {
+                        len = in.readInt();
+                    }
                     byte[] bytes = new byte[len];
                     in.readFully(bytes);
-                    val = InternalValue.valueOf(new String(bytes, "UTF-8"), type);
+                    val = InternalValue.valueOf(
+                            new String(bytes, "UTF-8"), entry.getType());
             }
             values[i] = val;
         }
@@ -320,8 +355,6 @@ class BundleReader {
 
     /**
      * Deserializes a name written using bundle serialization version 3.
-     * See the {@link BundleWriter} class for details of the serialization
-     * format.
      *
      * @return deserialized name
      * @throws IOException if an I/O error occurs
@@ -356,4 +389,20 @@ class BundleReader {
         }
     }
 
+    /**
+     * Deserializes a variable-length integer written using bundle
+     * serialization version 3.
+     *
+     * @return deserialized name
+     * @throws IOException if an I/O error occurs
+     */
+    private int readInt() throws IOException {
+        int b = in.readUnsignedByte();
+        if ((b & 0x80) == 0) {
+            return b;
+        } else {
+            return readInt() << 7 | b & 0x7f;
+        }
+    }
+
 }

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleWriter.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleWriter.java?rev=1022487&r1=1022486&r2=1022487&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleWriter.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleWriter.java Thu Oct 14 12:28:31 2010
@@ -89,9 +89,6 @@ class BundleWriter {
         // parentUUID
         writeNodeId(bundle.getParentId());
 
-        // definitionId
-        out.writeUTF("");
-
         // mixin types
         for (Name name : bundle.getMixinTypeNames()) {
             writeName(name);
@@ -127,7 +124,7 @@ class BundleWriter {
         writeNodeId(null);
 
         // write mod count
-        out.writeShort(bundle.getModCount());
+        writeInt(bundle.getModCount());
 
         // write shared set
         for (NodeId nodeId: bundle.getSharedSet()) {
@@ -140,22 +137,55 @@ class BundleWriter {
     }
 
     /**
-     * Serializes a <code>PropertyState</code> to the data output stream
+     * Serializes a property entry. The serialization begins with a single
+     * byte that encodes the type and multi-valuedness of the property:
+     * <pre>
+     * +-------------------------------+
+     * |   mv count    |     type      |
+     * +-------------------------------+
+     * </pre>
+     * <p>
+     * The lower four bits encode the property type (0-12 in JCR 2.0) and
+     * higher bits indicate whether this is a multi-valued property and how
+     * many property values there are. A value of 0 is reserved for
+     * single-valued properties (that are guaranteed to always have just a
+     * single value), and all non-zero values indicate a multi-valued property.
+     * <p>
+     * In multi-valued properties the exact value of the "mv count" field is
+     * the number of property values plus one and truncated at 15 (the highest
+     * four-bit value). If there are 14 or more (14 + 1 == 15) property values,
+     * then the number of additional values is serialized as a variable-length
+     * integer (see {@link #writeInt(int)}) right after this byte.
+     * <p>
+     * The modification count of the property state is written next as a
+     * variable-length integer, followed by the serializations of all the
+     * values of this property.
      *
      * @param state the property entry to store
      * @throws IOException if an I/O error occurs.
      */
     private void writeState(NodePropBundle.PropertyEntry state)
             throws IOException {
-        // type & mod count
-        out.writeInt(state.getType() | (state.getModCount() << 16));
-        // multiValued
-        out.writeBoolean(state.isMultiValued());
-        // definitionId
-        out.writeUTF("");
-        // values
         InternalValue[] values = state.getValues();
-        out.writeInt(values.length); // count
+
+        int type = state.getType();
+        assert 0 <= type && type <= 0x0f;
+        if (state.isMultiValued()) {
+            int len = values.length + 1;
+            if (len < 0x0f) {
+                out.writeByte(len << 4 | type);
+            } else {
+                out.writeByte(0xf0 | type);
+                writeInt(len - 0x0f);
+            }
+        } else {
+            assert values.length == 1;
+            out.writeByte(type);
+        }
+
+        writeInt(state.getModCount());
+
+        // values
         for (int i = 0; i < values.length; i++) {
             InternalValue val = values[i];
             switch (state.getType()) {
@@ -282,7 +312,7 @@ class BundleWriter {
                     // because writeUTF(String) has a size limit of 64k,
                     // we're using write(byte[]) instead
                     byte[] bytes = val.toString().getBytes("UTF-8");
-                    out.writeInt(bytes.length); // length of byte[]
+                    writeInt(bytes.length); // length of byte[]
                     out.write(bytes);   // byte[]
             }
         }
@@ -428,4 +458,38 @@ class BundleWriter {
         }
     }
 
+    /**
+     * Serializes an integer using a variable-length encoding that favors
+     * small positive numbers. The serialization consists of one to five
+     * bytes of the following format:
+     * <pre>
+     * +-------------------------------+
+     * | c | 7 least significant bits  |
+     * +-------------------------------+
+     * </pre>
+     * <p>
+     * If the given integer fits in seven bits (i.e. the value between
+     * 0 and 127, inclusive), then it is written as-is in a single byte.
+     * Otherwise the continuation flag <code>c</code> is set and the least
+     * significant seven bits are written together with the flag as a single
+     * byte. The integer is then shifed right seven bits and the process
+     * continues from the beginning.
+     * <p>
+     * This format uses a single byte for values 0-127, two bytes for
+     * 128-16343, three for 16343-2097151, four for 2097152-268435455
+     * and five bytes for all other 32-bit numbers (including negative ones).
+     *
+     * @param integer integer value
+     * @throws IOException if an I/O error occurs
+     */
+    private void writeInt(int value) throws IOException {
+        int b = value & 0x7f;
+        if (b == value) {
+            out.writeByte(b);
+        } else {
+            out.writeByte(b | 0x80);
+            writeInt(value >>> 7);
+        }
+    }
+
 }