You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jackrabbit.apache.org by ju...@apache.org on 2010/10/15 11:16:21 UTC
svn commit: r1022864 - in /jackrabbit/trunk/jackrabbit-core/src:
main/java/org/apache/jackrabbit/core/persistence/util/
test/java/org/apache/jackrabbit/core/persistence/util/
Author: jukka
Date: Fri Oct 15 09:16:15 2010
New Revision: 1022864
URL: http://svn.apache.org/viewvc?rev=1022864&view=rev
Log:
JCR-2762: Optimize bundle serialization
Use variable-length serialization for LONG values, and leverage that also for DATE serialization together with an encoding that maps dates (millisecond accuracy, with timezone offsets) to 64 bits.
Modified:
jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleReader.java
jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleWriter.java
jackrabbit/trunk/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/persistence/util/BundleBindingTest.java
Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleReader.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleReader.java?rev=1022864&r1=1022863&r2=1022864&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleReader.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleReader.java Fri Oct 15 09:16:15 2010
@@ -28,8 +28,11 @@ import org.apache.jackrabbit.spi.commons
import java.io.DataInputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.util.Calendar;
+import java.util.GregorianCalendar;
import java.util.HashSet;
import java.util.Set;
+import java.util.TimeZone;
import java.math.BigDecimal;
import javax.jcr.PropertyType;
@@ -45,6 +48,44 @@ class BundleReader {
/** Logger instance */
private static Logger log = LoggerFactory.getLogger(BundleReader.class);
+ /**
+ * Pre-calculated {@link TimeZone} objects for common timezone offsets.
+ */
+ private static final TimeZone[] COMMON_TIMEZONES = {
+ TimeZone.getTimeZone("GMT+00:00"), // 0b00000
+ TimeZone.getTimeZone("GMT+01:00"), // 0b00001
+ TimeZone.getTimeZone("GMT+02:00"), // 0b00010
+ TimeZone.getTimeZone("GMT+03:00"), // 0b00011
+ TimeZone.getTimeZone("GMT+04:00"), // 0b00100
+ TimeZone.getTimeZone("GMT+05:00"), // 0b00101
+ TimeZone.getTimeZone("GMT+06:00"), // 0b00110
+ TimeZone.getTimeZone("GMT+07:00"), // 0b00111
+ TimeZone.getTimeZone("GMT+08:00"), // 0b01000
+ TimeZone.getTimeZone("GMT+09:00"), // 0b01001
+ TimeZone.getTimeZone("GMT+10:00"), // 0b01010
+ TimeZone.getTimeZone("GMT+11:00"), // 0b01011
+ TimeZone.getTimeZone("GMT+12:00"), // 0b01100
+ TimeZone.getTimeZone("GMT+13:00"), // 0b01101
+ TimeZone.getTimeZone("GMT+14:00"), // 0b01110
+ TimeZone.getTimeZone("GMT+15:00"), // 0b01111
+ TimeZone.getTimeZone("GMT-16:00"), // 0b10000
+ TimeZone.getTimeZone("GMT-15:00"), // 0b10001
+ TimeZone.getTimeZone("GMT-14:00"), // 0b10010
+ TimeZone.getTimeZone("GMT-13:00"), // 0b10011
+ TimeZone.getTimeZone("GMT-12:00"), // 0b10100
+ TimeZone.getTimeZone("GMT-11:00"), // 0b10101
+ TimeZone.getTimeZone("GMT-10:00"), // 0b10110
+ TimeZone.getTimeZone("GMT-09:00"), // 0b10111
+ TimeZone.getTimeZone("GMT-08:00"), // 0b11000
+ TimeZone.getTimeZone("GMT-07:00"), // 0b11001
+ TimeZone.getTimeZone("GMT-06:00"), // 0b11010
+ TimeZone.getTimeZone("GMT-05:00"), // 0b11011
+ TimeZone.getTimeZone("GMT-04:00"), // 0b11100
+ TimeZone.getTimeZone("GMT-03:00"), // 0b11101
+ TimeZone.getTimeZone("GMT-02:00"), // 0b11110
+ TimeZone.getTimeZone("GMT-01:00"), // 0b11111
+ };
+
private final BundleBinding binding;
private final DataInputStream in;
@@ -249,7 +290,11 @@ class BundleReader {
val = InternalValue.create(readDecimal());
break;
case PropertyType.LONG:
- val = InternalValue.create(in.readLong());
+ if (version >= BundleBinding.VERSION_3) {
+ val = InternalValue.create(readVarLong());
+ } else {
+ val = InternalValue.create(in.readLong());
+ }
break;
case PropertyType.BOOLEAN:
val = InternalValue.create(in.readBoolean());
@@ -263,6 +308,11 @@ class BundleReader {
case PropertyType.REFERENCE:
val = InternalValue.create(readNodeId(), false);
break;
+ case PropertyType.DATE:
+ if (version >= BundleBinding.VERSION_3) {
+ val = InternalValue.create(readDate());
+ break;
+ } // else fall through
default:
if (version >= BundleBinding.VERSION_3) {
val = InternalValue.valueOf(
@@ -385,7 +435,7 @@ class BundleReader {
* Deserializes a variable-length integer written using bundle
* serialization version 3.
*
- * @return deserialized name
+ * @return deserialized integer
* @throws IOException if an I/O error occurs
*/
private int readVarInt() throws IOException {
@@ -397,6 +447,110 @@ class BundleReader {
}
}
+ /**
+ * Deserializes a variable-length long written using bundle
+ * serialization version 3.
+ *
+ * @return deserialized long
+ * @throws IOException if an I/O error occurs
+ */
+ private long readVarLong() throws IOException {
+ long value = 0;
+ int bits = 0;
+ long b;
+ do {
+ b = in.readUnsignedByte();
+ value = (b & 0x7f) << 57 | value >>> 7;
+ bits += 7;
+ } while ((b & 0x80) != 0);
+ value = value >>> (64 - bits);
+ if ((value & 1) != 0) {
+ return ~(value >>> 1);
+ } else {
+ return value >>> 1;
+ }
+ }
+
+ /**
+ * Deserializes a specially encoded date written using bundle
+ * serialization version 3.
+ *
+ * @return deserialized date
+ * @throws IOException if an I/O error occurs
+ */
+ private Calendar readDate() throws IOException {
+ long ts = readVarLong();
+
+ TimeZone tz;
+ if ((ts & 1) == 0) {
+ tz = COMMON_TIMEZONES[0];
+ ts >>= 1;
+ } else if ((ts & 2) == 0) {
+ tz = COMMON_TIMEZONES[((int) ts >> 2) & 0x1f]; // 5 bits;
+ ts >>= 7;
+ } else {
+ int m = ((int) ts << 19) >> 21; // 11 bits, sign-extended
+ int h = m / 60;
+ String s;
+ if (m < 0) {
+ s = String.format("GMT-%02d:%02d", -h, h * 60 - m);
+ } else {
+ s = String.format("GMT+%02d:%02d", h, m - h * 60);
+ }
+ tz = TimeZone.getTimeZone(s);
+ ts >>= 13;
+ }
+
+ int u = 0;
+ int s = 0;
+ int m = 0;
+ int h = 0;
+ int type = (int) ts & 3;
+ ts >>= 2;
+ switch (type) {
+ case 3:
+ u = (int) ts & 0x3fffffff; // 30 bits
+ s = u / 1000;
+ m = s / 60;
+ h = m / 60;
+ m -= h * 60;
+ s -= (h * 60 + m) * 60;
+ u -= ((h * 60 + m) * 60 + s) * 1000;
+ ts >>= 30;
+ break;
+ case 2:
+ m = (int) ts & 0x07ff; // 11 bits
+ h = m / 60;
+ m -= h * 60;
+ ts >>= 11;
+ break;
+ case 1:
+ h = (int) ts & 0x1f; // 5 bits
+ ts >>= 5;
+ break;
+ }
+
+ int d = (int) ts & 0x01ff; // 9 bits;
+ ts >>= 9;
+ int y = (int) (ts + 2010);
+
+ Calendar value = Calendar.getInstance(tz);
+ if (y <= 0) {
+ value.set(Calendar.YEAR, 1 - y);
+ value.set(Calendar.ERA, GregorianCalendar.BC);
+ } else {
+ value.set(Calendar.YEAR, y);
+ value.set(Calendar.ERA, GregorianCalendar.AD);
+ }
+ value.set(Calendar.DAY_OF_YEAR, d);
+ value.set(Calendar.HOUR_OF_DAY, h);
+ value.set(Calendar.MINUTE, m);
+ value.set(Calendar.SECOND, s);
+ value.set(Calendar.MILLISECOND, u);
+
+ return value;
+ }
+
private String readString() throws IOException {
if (version >= BundleBinding.VERSION_3) {
return new String(readBytes(0, 0), "UTF-8");
Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleWriter.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleWriter.java?rev=1022864&r1=1022863&r2=1022864&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleWriter.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/persistence/util/BundleWriter.java Fri Oct 15 09:16:15 2010
@@ -22,6 +22,8 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.math.BigDecimal;
+import java.util.Calendar;
+import java.util.GregorianCalendar;
import javax.jcr.PropertyType;
import javax.jcr.RepositoryException;
@@ -36,7 +38,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
- * Bundle serializater.
+ * Bundle serializer.
*
* @see BundleReader
*/
@@ -282,7 +284,7 @@ class BundleWriter {
break;
case PropertyType.LONG:
try {
- out.writeLong(val.getLong());
+ writeVarLong(val.getLong());
} catch (RepositoryException e) {
// should never occur
throw new IOException("Unexpected error while writing LONG value.");
@@ -308,6 +310,14 @@ class BundleWriter {
case PropertyType.REFERENCE:
writeNodeId(val.getNodeId());
break;
+ case PropertyType.DATE:
+ try {
+ writeDate(val.getCalendar());
+ } catch (RepositoryException e) {
+ // should never occur
+ throw new IOException("Unexpected error while writing DATE value.");
+ }
+ break;
default:
writeString(val.toString());
break;
@@ -494,6 +504,179 @@ class BundleWriter {
}
/**
+ * Serializes a long value using a variable length encoding like the
+ * one used by {@link #writeVarInt(int)} for integer values. Before
+ * writing out, the value is first normalized to an unsigned value
+ * by moving the sign bit to be the end negating the other bits of
+ * a negative value. This normalization step maximizes the number of
+ * zero high order bits for typical small values (positive or negative),
+ * and thus keeps the serialization short.
+ *
+ * @param value long value
+ * @throws IOException if an I/O error occurs
+ */
+ private void writeVarLong(long value) throws IOException {
+ // Normalize to an unsigned value with the sign as the lowest bit
+ if (value < 0) {
+ value = ~value << 1 | 1;
+ } else {
+ value <<= 1;
+ }
+ while (true) {
+ long b = value & 0x7f;
+ if (b != value) {
+ out.writeByte((int) b | 0x80);
+ value >>>= 7; // unsigned shift
+ } else {
+ out.writeByte((int) b);
+ return;
+ }
+ }
+ }
+
+ /**
+ * Serializes a JCR date value using the {@link #writeVarLong(long)}
+ * serialization on a special 64-bit date encoding. This encoding maps
+ * the <code>sYYYY-MM-DDThh:mm:ss.sssTZD</code> date format used by
+ * JCR to an as small 64 bit integer (positive or negative) as possible,
+ * while preserving full accuracy (including time zone offsets) and
+ * favouring common levels of accuracy (per minute, hour and day) over
+ * full millisecond level detail.
+ * <p>
+ * Each date value is mapped to separate timestamp and timezone fields,
+ * both of whose lenghts are variable:
+ * <pre>
+ * +----- ... ------- ... --+
+ * | timestamp | timezone |
+ * +----- ... ------- ... --+
+ * </pre>
+ * <p>
+ * The type and length of the timezone field can be determined by looking
+ * at the two least significant bits of the value:
+ * <dl>
+ * <dt><code>?0</code></dt>
+ * <dd>
+ * UTC time. The length of the timezone field is just one bit,
+ * i.e. the second bit is already a part of the timestamp field.
+ * </dd>
+ * <dt><code>01</code></dt>
+ * <dd>
+ * The offset is counted as hours from UTC, and stored as the number
+ * of hours (positive or negative) in the next 5 bits (range from
+ * -16 to +15 hours), making the timezone field 7 bits long in total.
+ * </dd>
+ * <dt><code>11</code></dt>
+ * <dd>
+ * The offset is counted as hours and minutes from UTC, and stored
+ * as the total minute offset (positive or negative) in the next
+ * 11 bits (range from -17 to +17 hours), making the timezone field
+ * 13 bits long in total.
+ * </dd>
+ * </dl>
+ * <p>
+ * The remaining 51-63 bits of the encoded value make up the timestamp
+ * field that also uses the two least significant bits to indicate the
+ * type and length of the field:
+ * <dl>
+ * <dt><code>00</code></dt>
+ * <dd>
+ * <code>sYYYY-MM-DDT00:00:00.000</code>, i.e. midnight of the
+ * specified date. The next 9 bits encode the day within the year
+ * (starting from 1, maximum value 366) and the remaining bits are
+ * used for the year, stored as an offset from year 2010.
+ * </dd>
+ * <dt><code>01</code></dt>
+ * <dd>
+ * <code>sYYYY-MM-DDThh:00:00.000</code>, i.e. at the hour. The
+ * next 5 bits encode the hour within the day (starting from 0,
+ * maximum value 23) and the remaining bits are used as described
+ * above for the date.
+ * </dd>
+ * <dt><code>10</code></dt>
+ * <dd>
+ * <code>sYYYY-MM-DDThh:mm:00.000</code>, i.e. at the minute. The
+ * next 11 bits encode the minute within the day (starting from 0,
+ * maximum value 1439) and the remaining bits are used as described
+ * above for the date.
+ * </dd>
+ * <dt><code>11</code></dt>
+ * <dd>
+ * <code>sYYYY-MM-DDThh:mm:ss.sss</code>, i.e. full millisecond
+ * accuracy. The next 30 bits encode the millisecond within the
+ * day (starting from 0, maximum value 87839999) and the remaining
+ * bits are used as described above for the date.
+ * </dd>
+ * </dl>
+ * <p>
+ * With full timezone and millisecond accuracies, this encoding leaves
+ * 10 bits (64 - 9 - 30 - 2 - 11 - 2) for the date offset, which allows
+ * for representation of all timestamps between years 1498 and 2521.
+ * Timestamps outside this range and with a minute-level timezone offset
+ * are automatically truncated to minute-level accuracy to support the
+ * full range of years -9999 to 9999 specified in JCR.
+ * <p>
+ * Note that the year, day of year, and time of day values are stored
+ * as separate bit sequences to avoid problems with changing leap second
+ * or leap year definitions. Bit fields are used for better encoding and
+ * decoding performance than what would be possible with the slightly more
+ * space efficient mechanism of using multiplication and modulo divisions
+ * to separate the different timestamp fields.
+ *
+ * @param value date value
+ * @throws IOException if an I/O error occurs
+ */
+ private void writeDate(Calendar value) throws IOException {
+ int y = value.get(Calendar.YEAR);
+ if (value.isSet(Calendar.ERA)
+ && value.get(Calendar.ERA) == GregorianCalendar.BC) {
+ y = 1 - y; // convert to an astronomical year
+ }
+ y -= 2010; // use a recent offset NOTE: do not change this!
+
+ int d = value.get(Calendar.DAY_OF_YEAR);
+ int h = value.get(Calendar.HOUR_OF_DAY);
+ int m = value.get(Calendar.MINUTE);
+ int s = value.get(Calendar.SECOND);
+ int u = value.get(Calendar.MILLISECOND);
+ int z = value.getTimeZone().getOffset(value.getTimeInMillis()) / (60 * 1000);
+ int zh = z / 60;
+ int zm = z - zh * 60;
+
+ long ts = y << 9 | d & 0x01ff;
+
+ if ((u != 0 || s != 0) && ((-512 <= y && y < 512) || zm == 0)) {
+ ts <<= 30;
+ ts |= (((h * 60 + m) * 60 + s) * 1000 + u) & 0x3fffffff; // 30 bits
+ ts <<= 2;
+ ts |= 3;
+ } else if (m != 0) {
+ ts <<= 11;
+ ts |= (h * 60 + m) & 0x07ff; // 11 bits
+ ts <<= 2;
+ ts |= 2;
+ } else if (h != 0) {
+ ts <<= 5;
+ ts |= h & 0x1f; // 5 bits
+ ts <<= 2;
+ ts |= 1;
+ } else {
+ ts <<= 2;
+ }
+
+ if (zm != 0) {
+ ts <<= 11;
+ ts |= z & 0x07ff; // 11 bits
+ writeVarLong(ts << 2 | 3);
+ } else if (zh != 0) {
+ ts <<= 5;
+ ts |= zh & 0x1f; // 5 bits
+ writeVarLong(ts << 2 | 1);
+ } else {
+ writeVarLong(ts << 1);
+ }
+ }
+
+ /**
* Serializes a string in UTF-8. The length of the UTF-8 byte sequence
* is first written as a variable-length string (see
* {@link #writeVarInt(int)}), and then the sequence itself is written.
Modified: jackrabbit/trunk/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/persistence/util/BundleBindingTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/persistence/util/BundleBindingTest.java?rev=1022864&r1=1022863&r2=1022864&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/persistence/util/BundleBindingTest.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/persistence/util/BundleBindingTest.java Fri Oct 15 09:16:15 2010
@@ -44,6 +44,8 @@ import junit.framework.TestCase;
public class BundleBindingTest extends TestCase {
+ private static final NameFactory factory = NameFactoryImpl.getInstance();
+
private BundleBinding binding;
protected void setUp() throws Exception {
@@ -113,7 +115,6 @@ public class BundleBindingTest extends T
bundle.setSharedSet(new HashSet<NodeId>(Arrays.asList(
new NodeId(5, 6), new NodeId(7, 8), new NodeId(9, 10))));
- NameFactory factory = NameFactoryImpl.getInstance();
PropertyEntry property;
property = new PropertyEntry(
@@ -291,7 +292,6 @@ public class BundleBindingTest extends T
bundle.setMixinTypeNames(Collections.<Name>emptySet());
bundle.setSharedSet(Collections.<NodeId>emptySet());
- NameFactory factory = NameFactoryImpl.getInstance();
bundle.addChildNodeEntry(factory.create("ns1", "test"), new NodeId());
bundle.addChildNodeEntry(factory.create("ns2", "test"), new NodeId());
bundle.addChildNodeEntry(factory.create("ns3", "test"), new NodeId());
@@ -311,6 +311,63 @@ public class BundleBindingTest extends T
assertBundleRoundtrip(bundle);
}
+ /**
+ * Tests serialization of date values.
+ */
+ public void testDateSerialization() throws Exception {
+ assertDateSerialization("2010-10-10T10:10:10.100Z");
+
+ // Different kinds of timezone offsets
+ assertDateSerialization("2010-10-10T10:10:10.100+11:00");
+ assertDateSerialization("2010-10-10T10:10:10.100-14:00");
+ assertDateSerialization("2010-10-10T10:10:10.100+00:12");
+ assertDateSerialization("2010-10-10T10:10:10.100-08:14");
+
+ // Different timestamp accuracies
+ assertDateSerialization("2010-10-10T10:10:00.000Z");
+ assertDateSerialization("2010-10-10T10:00:00.000Z");
+ assertDateSerialization("2010-10-10T00:00:00.000Z");
+
+ // Dates far from today
+ assertDateSerialization("1970-01-01T00:00:00.000Z");
+ assertDateSerialization("1970-01-01T12:34:56.789-13:45");
+ assertDateSerialization("2030-10-10T10:10:10.100+10:10");
+ assertDateSerialization("2345-10-10T10:10:10.100Z");
+ assertDateSerialization("+9876-10-10T10:10:10.100Z");
+ assertDateSerialization("-9876-10-10T10:10:10.100Z");
+ }
+
+ private void assertDateSerialization(String date) throws Exception {
+ assertValueSerialization(
+ InternalValue.valueOf(date, PropertyType.DATE));
+ }
+
+ private void assertValueSerialization(InternalValue value)
+ throws Exception {
+ NodePropBundle bundle = new NodePropBundle(new NodeId());
+ bundle.setParentId(new NodeId());
+ bundle.setNodeTypeName(NameConstants.NT_UNSTRUCTURED);
+ bundle.setMixinTypeNames(Collections.<Name>emptySet());
+ bundle.setSharedSet(Collections.<NodeId>emptySet());
+
+ Name name = factory.create("", "test");
+
+ PropertyEntry property =
+ new PropertyEntry(new PropertyId(bundle.getId(), name));
+ property.setType(value.getType());
+ property.setMultiValued(false);
+ property.setValues(new InternalValue[] { value });
+ bundle.addProperty(property);
+
+ ByteArrayOutputStream buffer = new ByteArrayOutputStream();
+ binding.writeBundle(buffer, bundle);
+ byte[] bytes = buffer.toByteArray();
+ NodePropBundle result =
+ binding.readBundle(new ByteArrayInputStream(bytes), bundle.getId());
+
+ assertEquals(value, result.getPropertyEntry(name).getValues()[0]);
+ }
+
private void assertBundleRoundtrip(NodePropBundle bundle)
throws Exception {
ByteArrayOutputStream buffer = new ByteArrayOutputStream();