You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jackrabbit.apache.org by ju...@apache.org on 2010/10/14 14:27:59 UTC

svn commit: r1022485 - in /jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util: BundleBinding.java BundleNames.java BundleReader.java BundleWriter.java

Author: jukka
Date: Thu Oct 14 12:27:59 2010
New Revision: 1022485

URL: http://svn.apache.org/viewvc?rev=1022485&view=rev
Log:
JCR-2762: Optimize bundle serialization

Introduce bundle serialization version 3 that uses a fixed list of known names and namespaces to more efficiently store commonly used names. See BundleWriter.writeName() for a detailed description of the name serialization format.

Added:
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleNames.java   (with props)
Modified:
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleBinding.java
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleReader.java
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleWriter.java

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleBinding.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleBinding.java?rev=1022485&r1=1022484&r2=1022485&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleBinding.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleBinding.java Thu Oct 14 12:27:59 2010
@@ -44,9 +44,14 @@ public class BundleBinding {
     static final int VERSION_2 = 2;
 
     /**
+     * serialization version 3
+     */
+    static final int VERSION_3 = 3;
+
+    /**
      * current version
      */
-    static final int VERSION_CURRENT = VERSION_2;
+    static final int VERSION_CURRENT = VERSION_3;
 
     /**
      * the namespace index

Added: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleNames.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleNames.java?rev=1022485&view=auto
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleNames.java (added)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleNames.java Thu Oct 14 12:27:59 2010
@@ -0,0 +1,245 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.core.persistence.util;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.commons.io.IOExceptionWithCause;
+import org.apache.jackrabbit.spi.Name;
+import org.apache.jackrabbit.spi.commons.name.NameConstants;
+
+// WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING //
+//                                                                         //
+// The contents and behavour of this class are tightly coupled with the    //
+// bundle serialization format, so make sure that you know what you're     //
+// doing before modifying this class!                                      //
+//                                                                         //
+// WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING //
+
+
+/**
+ * Static collection of common JCR names and namespaces. This class is
+ * used by the {@link BundleWriter} and {@link BundleReader} classes to
+ * optimize the serialization of names used in bundles.
+ */
+class BundleNames {
+
+    /**
+     * Static list of standard names.
+     */
+    private static final Name[] NAME_ARRAY = {
+        // WARNING: Only edit if you really know what you're doing!
+        null,
+        NameConstants.JCR_ACTIVITIES,
+        NameConstants.JCR_ACTIVITY,
+        NameConstants.JCR_ACTIVITY_TITLE,
+        NameConstants.JCR_AUTOCREATED,
+        NameConstants.JCR_BASEVERSION,
+        NameConstants.JCR_CHILDNODEDEFINITION,
+        NameConstants.JCR_CHILDVERSIONHISTORY,
+        NameConstants.JCR_CONFIGURATION,
+        NameConstants.JCR_CONFIGURATIONS,
+        NameConstants.JCR_CONTENT,
+        NameConstants.JCR_COPIEDFROM,
+        NameConstants.JCR_CREATED,
+        NameConstants.JCR_CREATEDBY,
+        NameConstants.JCR_CURRENT_LIFECYCLE_STATE,
+        NameConstants.JCR_DATA,
+        NameConstants.JCR_DEFAULTPRIMARYTYPE,
+        NameConstants.JCR_DEFAULTVALUES,
+        NameConstants.JCR_ENCODING,
+        NameConstants.JCR_ETAG,
+        NameConstants.JCR_FROZENMIXINTYPES,
+        NameConstants.JCR_FROZENNODE,
+        NameConstants.JCR_FROZENPRIMARYTYPE,
+        NameConstants.JCR_FROZENUUID,
+        NameConstants.JCR_HASORDERABLECHILDNODES,
+        NameConstants.JCR_HOST,
+        NameConstants.JCR_ID,
+        NameConstants.JCR_ISCHECKEDOUT,
+        NameConstants.JCR_ISMIXIN,
+        NameConstants.JCR_LANGUAGE,
+        NameConstants.JCR_LASTMODIFIED,
+        NameConstants.JCR_LASTMODIFIEDBY,
+        NameConstants.JCR_LIFECYCLE_POLICY,
+        NameConstants.JCR_LOCKISDEEP,
+        NameConstants.JCR_LOCKOWNER,
+        NameConstants.JCR_MANDATORY,
+        NameConstants.JCR_MERGEFAILED,
+        NameConstants.JCR_MIMETYPE,
+        NameConstants.JCR_MIXINTYPES,
+        NameConstants.JCR_MULTIPLE,
+        NameConstants.JCR_NAME,
+        NameConstants.JCR_NODETYPENAME,
+        NameConstants.JCR_NODETYPES,
+        NameConstants.JCR_ONPARENTVERSION,
+        NameConstants.JCR_PATH,
+        NameConstants.JCR_PORT,
+        NameConstants.JCR_PREDECESSORS,
+        NameConstants.JCR_PRIMARYITEMNAME,
+        NameConstants.JCR_PRIMARYTYPE,
+        NameConstants.JCR_PROPERTYDEFINITION,
+        NameConstants.JCR_PROTECTED,
+        NameConstants.JCR_PROTOCOL,
+        NameConstants.JCR_REPOSITORY,
+        NameConstants.JCR_REQUIREDPRIMARYTYPES,
+        NameConstants.JCR_REQUIREDTYPE,
+        NameConstants.JCR_ROOT,
+        NameConstants.JCR_ROOTVERSION,
+        NameConstants.JCR_SAMENAMESIBLINGS,
+        NameConstants.JCR_SCORE,
+        NameConstants.JCR_STATEMENT,
+        NameConstants.JCR_SUCCESSORS,
+        NameConstants.JCR_SUPERTYPES,
+        NameConstants.JCR_SYSTEM,
+        NameConstants.JCR_UUID,
+        NameConstants.JCR_VALUECONSTRAINTS,
+        NameConstants.JCR_VERSIONHISTORY,
+        NameConstants.JCR_VERSIONLABELS,
+        NameConstants.JCR_VERSIONSTORAGE,
+        NameConstants.JCR_VERSIONABLEUUID,
+        NameConstants.JCR_WORKSPACE,
+        NameConstants.JCR_XMLCHARACTERS,
+        NameConstants.JCR_XMLTEXT,
+        NameConstants.MIX_CREATED,
+        NameConstants.MIX_ETAG,
+        NameConstants.MIX_LANGUAGE,
+        NameConstants.MIX_LASTMODIFIED,
+        NameConstants.MIX_LIFECYCLE,
+        NameConstants.MIX_LOCKABLE,
+        NameConstants.MIX_MIMETYPE,
+        NameConstants.MIX_REFERENCEABLE,
+        NameConstants.MIX_SHAREABLE,
+        NameConstants.MIX_SIMPLE_VERSIONABLE,
+        NameConstants.MIX_TITLE,
+        NameConstants.MIX_VERSIONABLE,
+        NameConstants.NT_ACTIVITY,
+        NameConstants.NT_ADDRESS,
+        NameConstants.NT_BASE,
+        NameConstants.NT_CHILDNODEDEFINITION,
+        NameConstants.NT_CONFIGURATION,
+        NameConstants.NT_FILE,
+        NameConstants.NT_FOLDER,
+        NameConstants.NT_FROZENNODE,
+        NameConstants.NT_HIERARCHYNODE,
+        NameConstants.NT_NODETYPE,
+        NameConstants.NT_PROPERTYDEFINITION,
+        NameConstants.NT_QUERY,
+        NameConstants.NT_RESOURCE,
+        NameConstants.NT_SHARE,
+        NameConstants.NT_UNSTRUCTURED,
+        NameConstants.NT_VERSION,
+        NameConstants.NT_VERSIONEDCHILD,
+        NameConstants.NT_VERSIONHISTORY,
+        NameConstants.NT_VERSIONLABELS,
+        NameConstants.REP_ACTIVITIES,
+        NameConstants.REP_CONFIGURATIONS,
+        NameConstants.REP_VERSION_REFERENCE,
+        NameConstants.REP_BASEVERSIONS,
+        NameConstants.REP_NODETYPES,
+        NameConstants.REP_ROOT,
+        NameConstants.REP_SYSTEM,
+        NameConstants.REP_VERSIONS,
+        NameConstants.REP_VERSIONSTORAGE
+    };  // WARNING: Only edit if you really know what you're doing!
+
+
+    private static final Map<Name, Integer> NAME_MAP =
+        new HashMap<Name, Integer>();
+
+    /**
+     * Static list of standard namespaces.
+     */
+    private static final String[] NAMESPACE_ARRAY = {
+        // WARNING: Only edit if you really know what you're doing!
+        Name.NS_DEFAULT_URI,
+        Name.NS_JCR_URI,
+        Name.NS_MIX_URI,
+        Name.NS_NT_URI,
+        Name.NS_XMLNS_URI,
+        Name.NS_XML_URI,
+        Name.NS_REP_URI
+    };  // WARNING: Only edit if you really know what you're doing!
+
+    private static final Map<String, Integer> NAMESPACE_MAP =
+        new HashMap<String, Integer>();
+
+    static {
+        assert NAME_ARRAY.length < 0x80;
+        for (int i = 0; i < NAME_ARRAY.length; i++) {
+            NAME_MAP.put(NAME_ARRAY[i], i);
+        }
+
+        assert NAMESPACE_ARRAY.length < 0x08;
+        for (int i = 0; i < NAMESPACE_ARRAY.length; i++) {
+            NAMESPACE_MAP.put(NAMESPACE_ARRAY[i], i);
+        }
+    }
+
+    /**
+     * Returns the seven-bit index of a common JCR name, or -1 if the given
+     * name is not known.
+     *
+     * @param name JCR name
+     * @return seven-bit index of the name, or -1
+     */
+    public static int nameToIndex(Name name) {
+        Integer index = NAME_MAP.get(name);
+        if (index != null) {
+            return index;
+        } else {
+            return -1;
+        }
+    }
+
+    public static Name indexToName(int index) throws IOException {
+        try {
+            return NAME_ARRAY[index];
+        } catch (ArrayIndexOutOfBoundsException e) {
+            throw new IOExceptionWithCause(
+                    "Invalid common JCR name index: " + index, e);
+        }
+    }
+
+    /**
+     * Returns the three-bit index (0-6) of a common JCR namespace,
+     * or -1 if the given namespace URI is not known.
+     *
+     * @param uri namespace URI
+     * @return three-bit index of the namespace, or -1
+     */
+    public static int namespaceToIndex(String uri) {
+        Integer index = NAMESPACE_MAP.get(uri);
+        if (index != null) {
+            return index;
+        } else {
+            return -1;
+        }
+    }
+
+    public static String indexToNamespace(int index) throws IOException {
+        try {
+            return NAMESPACE_ARRAY[index];
+        } catch (ArrayIndexOutOfBoundsException e) {
+            throw new IOExceptionWithCause(
+                    "Invalid common JCR namespace index: " + index, e);
+        }
+    }
+
+}

Propchange: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleNames.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleReader.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleReader.java?rev=1022485&r1=1022484&r2=1022485&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleReader.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleReader.java Thu Oct 14 12:27:59 2010
@@ -75,14 +75,17 @@ class BundleReader {
         NodePropBundle bundle = new NodePropBundle(id);
 
         // read primary type...special handling
-        int a = in.readUnsignedByte();
-        int b = in.readUnsignedByte();
-        int c = in.readUnsignedByte();
-        String uri = binding.nsIndex.indexToString(a << 16 | b << 8 | c);
-        String local = binding.nameIndex.indexToString(in.readInt());
-        Name nodeTypeName = NameFactoryImpl.getInstance().create(uri, local);
-
-        // primaryType
+        Name nodeTypeName;
+        if (version >= BundleBinding.VERSION_3) {
+            nodeTypeName = readName();
+        } else {
+            int a = in.readUnsignedByte();
+            int b = in.readUnsignedByte();
+            int c = in.readUnsignedByte();
+            String uri = binding.nsIndex.indexToString(a << 16 | b << 8 | c);
+            String local = binding.nameIndex.indexToString(in.readInt());
+            nodeTypeName = NameFactoryImpl.getInstance().create(uri, local);
+        }
         bundle.setNodeTypeName(nodeTypeName);
 
         // parentUUID
@@ -103,17 +106,14 @@ class BundleReader {
         // properties
         name = readIndexedQName();
         while (name != null) {
-            PropertyId pId = new PropertyId(bundle.getId(), name);
+            PropertyId pId = new PropertyId(id, name);
+            NodePropBundle.PropertyEntry pState = readPropertyEntry(pId);
             // skip redundant primaryType, mixinTypes and uuid properties
-            if (name.equals(NameConstants.JCR_PRIMARYTYPE)
-                || name.equals(NameConstants.JCR_MIXINTYPES)
-                || name.equals(NameConstants.JCR_UUID)) {
-                readPropertyEntry(pId);
-                name = readIndexedQName();
-                continue;
+            if (!name.equals(NameConstants.JCR_PRIMARYTYPE)
+                && !name.equals(NameConstants.JCR_MIXINTYPES)
+                && !name.equals(NameConstants.JCR_UUID)) {
+                bundle.addProperty(pState);
             }
-            NodePropBundle.PropertyEntry pState = readPropertyEntry(pId);
-            bundle.addProperty(pState);
             name = readIndexedQName();
         }
 
@@ -279,6 +279,10 @@ class BundleReader {
      * @throws IOException in an I/O error occurs.
      */
     private Name readQName() throws IOException {
+        if (version >= BundleBinding.VERSION_3) {
+            return readName();
+        }
+
         String uri = binding.nsIndex.indexToString(in.readInt());
         String local = in.readUTF();
         return NameFactoryImpl.getInstance().create(uri, local);
@@ -291,6 +295,10 @@ class BundleReader {
      * @throws IOException in an I/O error occurs.
      */
     private Name readIndexedQName() throws IOException {
+        if (version >= BundleBinding.VERSION_3) {
+            return readName();
+        }
+
         int index = in.readInt();
         if (index < 0) {
             return null;
@@ -301,4 +309,39 @@ class BundleReader {
         }
     }
 
+    /**
+     * Deserializes a name written using bundle serialization version 3.
+     * See the {@link BundleWriter} class for details of the serialization
+     * format.
+     *
+     * @return deserialized name
+     * @throws IOException if an I/O error occurs
+     */
+    private Name readName() throws IOException {
+        int b = in.readUnsignedByte();
+        if ((b & 0x80) == 0) {
+            return BundleNames.indexToName(b);
+        } else {
+            String uri;
+            int ns = (b >> 4) & 0x07;
+            if (ns != 0x07) {
+                uri = BundleNames.indexToNamespace(ns);
+            } else {
+                uri = in.readUTF();
+            }
+
+            String local;
+            int len = b & 0x0f;
+            if (b != 0x0f) {
+                byte[] buffer = new byte[len + 1];
+                in.readFully(buffer);
+                local = new String(buffer, "UTF-8");
+            } else {
+                local = in.readUTF();
+            }
+
+            return NameFactoryImpl.getInstance().create(uri, local);
+        }
+    }
+
 }

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleWriter.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleWriter.java?rev=1022485&r1=1022484&r2=1022485&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleWriter.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleWriter.java Thu Oct 14 12:27:59 2010
@@ -73,13 +73,8 @@ class BundleWriter {
             throws IOException {
         long size = out.size();
 
-        // primaryType and version
-        Name type = bundle.getNodeTypeName();
-        int index = binding.nsIndex.stringToIndex(type.getNamespaceURI());
-        out.writeByte(index >>> 16);
-        out.writeByte(index >>> 8);
-        out.writeByte(index);
-        out.writeInt(binding.nameIndex.stringToIndex(type.getLocalName()));
+        // primaryType
+        writeName(bundle.getNodeTypeName());
 
         // parentUUID
         writeNodeId(bundle.getParentId());
@@ -89,9 +84,9 @@ class BundleWriter {
 
         // mixin types
         for (Name name : bundle.getMixinTypeNames()) {
-            writeIndexedQName(name);
+            writeName(name);
         }
-        writeIndexedQName(null);
+        writeName(null);
 
         // properties
         for (Name pName : bundle.getPropertyNames()) {
@@ -105,11 +100,11 @@ class BundleWriter {
             if (pState == null) {
                 log.error("PropertyState missing in bundle: " + pName);
             } else {
-                writeIndexedQName(pName);
+                writeName(pName);
                 writeState(pState);
             }
         }
-        writeIndexedQName(null);
+        writeName(null);
 
         // write uuid flag
         out.writeBoolean(bundle.isReferenceable());
@@ -117,7 +112,7 @@ class BundleWriter {
         // child nodes (list of uuid/name pairs)
         for (NodePropBundle.ChildNodeEntry entry : bundle.getChildNodeEntries()) {
             writeNodeId(entry.getId());  // uuid
-            writeQName(entry.getName());   // name
+            writeName(entry.getName());   // name
         }
         writeNodeId(null);
 
@@ -263,7 +258,7 @@ class BundleWriter {
                     break;
                 case PropertyType.NAME:
                     try {
-                        writeQName(val.getName());
+                        writeName(val.getName());
                     } catch (RepositoryException e) {
                         // should never occur
                         throw new IOException("Unexpected error while writing NAME value.");
@@ -349,28 +344,65 @@ class BundleWriter {
     }
 
     /**
-     * Serializes a Name
+     * Serializes a name. The name encoding works as follows:
+     * <p>
+     * First; if the name is known by the {@link BundleNames} class (this
+     * includes the <code>null</code> name), then the name is serialized
+     * as a single byte using the following format.
+     * <pre>
+     * +-------------------------------+
+     * | 0 |    common name index      |
+     * +-------------------------------+
+     * </pre>
+     * <p>
+     * Second; if the name is not known, it gets serialized as a
+     * variable-length field whose first byte looks like this:
+     * <pre>
+     * +-------------------------------+
+     * | 1 | ns index  |  name length  |
+     * +-------------------------------+
+     * </pre>
+     * <p>
+     * The three-bit namespace index identifies either a known namespace
+     * in the {@link BundleNames} class (values 0 - 6) or an explicit
+     * namespace URI string that is written using
+     * {@link DataOutputStream#writeUTF(String)} right after this byte
+     * (value 7).
+     * <p>
+     * The four-bit name length field indicates the length (in UTF-8 bytes)
+     * of the local part of the name. Since zero-length local names are not
+     * allowed, the length is first decremented by one before storing in this
+     * field. The UTF-8 byte sequence is written out after this byte and the
+     * possible namespace URI string. If the length of the local name is
+     * larger than 15 (i.e. would be stored as 0x0f or more), then the value
+     * 0x0f is stored as the name length and the name string is written
+     * using {@link DataOutputStream#writeUTF(String)}.
      *
      * @param name the name
      * @throws IOException in an I/O error occurs.
      */
-    private void writeQName(Name name) throws IOException {
-        out.writeInt(binding.nsIndex.stringToIndex(name.getNamespaceURI()));
-        out.writeUTF(name.getLocalName());
-    }
-
-    /**
-     * Serializes a indexed Name
-     *
-     * @param name the name
-     * @throws IOException in an I/O error occurs.
-     */
-    private void writeIndexedQName(Name name) throws IOException {
-        if (name == null) {
-            out.writeInt(-1);
+    private void writeName(Name name) throws IOException {
+        int index = BundleNames.nameToIndex(name);
+        if (index != -1) {
+            assert 0 <= index && index < 0x80;
+            out.writeByte(index);
         } else {
-            out.writeInt(binding.nsIndex.stringToIndex(name.getNamespaceURI()));
-            out.writeInt(binding.nameIndex.stringToIndex(name.getLocalName()));
+            String uri = name.getNamespaceURI();
+            int ns = BundleNames.namespaceToIndex(uri) & 0x07;
+
+            String local = name.getLocalName();
+            byte[] bytes = local.getBytes("UTF-8");
+            int len = Math.min(bytes.length - 1, 0x0f);
+
+            out.writeByte(0x80 | ns << 4 | len);
+            if (ns == 0x07) {
+                out.writeUTF(uri);
+            }
+            if (len != 0x0f) {
+                out.write(bytes);
+            } else {
+                out.writeUTF(local);
+            }
         }
     }