You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@giraph.apache.org by ik...@apache.org on 2016/03/23 18:34:47 UTC
git commit: updated refs/heads/trunk to cc2fa8a
Repository: giraph
Updated Branches:
refs/heads/trunk a927450aa -> cc2fa8a4b
faster varint
Summary:
Varint is improved in two ways:
- faster readLong and readInt
- making sure that negative numbers can be encoded
JIRA: https://issues.apache.org/jira/browse/GIRAPH-1049
Test Plan: TestVarint.java
Reviewers: dionysis.logothetis, maja.kabiljo, sergey.edunov, ikabiljo
Reviewed By: ikabiljo
Differential Revision: https://reviews.facebook.net/D55755
Project: http://git-wip-us.apache.org/repos/asf/giraph/repo
Commit: http://git-wip-us.apache.org/repos/asf/giraph/commit/cc2fa8a4
Tree: http://git-wip-us.apache.org/repos/asf/giraph/tree/cc2fa8a4
Diff: http://git-wip-us.apache.org/repos/asf/giraph/diff/cc2fa8a4
Branch: refs/heads/trunk
Commit: cc2fa8a4b8f4508dcc05044f97acac50dd49259b
Parents: a927450
Author: spupyrev <sp...@fb.com>
Authored: Wed Mar 23 10:34:05 2016 -0700
Committer: Igor Kabiljo <ik...@fb.com>
Committed: Wed Mar 23 10:34:05 2016 -0700
----------------------------------------------------------------------
.../java/org/apache/giraph/utils/Varint.java | 312 ++++++++++++++-----
.../org/apache/giraph/utils/TestVarint.java | 201 ++++++++++++
2 files changed, 433 insertions(+), 80 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/giraph/blob/cc2fa8a4/giraph-core/src/main/java/org/apache/giraph/utils/Varint.java
----------------------------------------------------------------------
diff --git a/giraph-core/src/main/java/org/apache/giraph/utils/Varint.java b/giraph-core/src/main/java/org/apache/giraph/utils/Varint.java
index 89d4e90..174d1f5 100644
--- a/giraph-core/src/main/java/org/apache/giraph/utils/Varint.java
+++ b/giraph-core/src/main/java/org/apache/giraph/utils/Varint.java
@@ -18,10 +18,7 @@
package org.apache.giraph.utils;
/**
- * This Code is Copied from main/java/org/apache/mahout/math/Varint.java
- *
- * Only modification is throwing exceptions for passing negative values to
- * unsigned functions, instead of serializing them.
+ * This Code is adapted from main/java/org/apache/mahout/math/Varint.java
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -43,6 +40,8 @@ import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
+import com.google.common.base.Preconditions;
+
/**
* <p>
* Encodes signed and unsigned values using a common variable-length scheme,
@@ -68,131 +67,284 @@ public final class Varint {
/**
* Encodes a value using the variable-length encoding from <a
* href="http://code.google.com/apis/protocolbuffers/docs/encoding.html">
- * Google Protocol Buffers</a>. Zig-zag is not used, so input must not be
- * negative. If values can be negative, use
- * {@link #writeSignedVarLong(long, DataOutput)} instead. This method treats
- * negative input as like a large unsigned value.
+ * Google Protocol Buffers</a>.
*
- * @param value
- * value to encode
- * @param out
- * to write bytes to
+ * @param value to encode
+ * @param out to write bytes to
* @throws IOException
* if {@link DataOutput} throws {@link IOException}
*/
- public static void writeUnsignedVarLong(
- long value, DataOutput out) throws IOException {
- if (value < 0) {
- throw new IllegalArgumentException(
- "Negative value passed into writeUnsignedVarLong - " + value);
- }
- while ((value & 0xFFFFFFFFFFFFFF80L) != 0L) {
- out.writeByte(((int) value & 0x7F) | 0x80);
+ private static void writeVarLong(
+ long value,
+ DataOutput out
+ ) throws IOException {
+ while (true) {
+ int bits = ((int) value) & 0x7f;
value >>>= 7;
+ if (value == 0) {
+ out.writeByte((byte) bits);
+ return;
+ }
+ out.writeByte((byte) (bits | 0x80));
}
- out.writeByte((int) value & 0x7F);
}
/**
- * @see #writeUnsignedVarLong(long, DataOutput)
- * @param value
- * value to encode
- * @param out
- * to write bytes to
+ * Encodes a value using the variable-length encoding from <a
+ * href="http://code.google.com/apis/protocolbuffers/docs/encoding.html">
+ * Google Protocol Buffers</a>.
+ *
+ * @param value to encode
+ * @param out to write bytes to
+ * @throws IOException
+ * if {@link DataOutput} throws {@link IOException}
*/
- public static void writeUnsignedVarInt(
- int value, DataOutput out) throws IOException {
- if (value < 0) {
- throw new IllegalArgumentException(
- "Negative value passed into writeUnsignedVarInt - " + value);
- }
- while ((value & 0xFFFFFF80) != 0L) {
- out.writeByte((value & 0x7F) | 0x80);
+ public static void writeUnsignedVarLong(
+ long value,
+ DataOutput out
+ ) throws IOException {
+ Preconditions.checkState(
+ value >= 0,
+ "Negative value passed into writeUnsignedVarLong - " + value
+ );
+ writeVarLong(value, out);
+ }
+
+ /**
+ * Zig-zag encoding for signed longs
+ *
+ * @param value to encode
+ * @param out to write bytes to
+ * @throws IOException
+ * if {@link DataOutput} throws {@link IOException}
+ */
+ public static void writeSignedVarLong(
+ long value,
+ DataOutput out
+ ) throws IOException {
+ writeVarLong((value << 1) ^ (value >> 63), out);
+ }
+
+ /**
+ * @see #writeVarLong(long, DataOutput)
+ * @param value to encode
+ * @param out to write bytes to
+ * @throws IOException
+ */
+ private static void writeVarInt(
+ int value,
+ DataOutput out
+ ) throws IOException {
+ while (true) {
+ int bits = value & 0x7f;
value >>>= 7;
+ if (value == 0) {
+ out.writeByte((byte) bits);
+ return;
+ }
+ out.writeByte((byte) (bits | 0x80));
}
- out.writeByte(value & 0x7F);
}
/**
- * @param in
- * to read bytes from
+ * @see #writeVarLong(long, DataOutput)
+ * @param value to encode
+ * @param out to write bytes to
+ * @throws IOException
+ */
+ public static void writeUnsignedVarInt(
+ int value,
+ DataOutput out
+ ) throws IOException {
+ Preconditions.checkState(
+ value >= 0,
+ "Negative value passed into writeUnsignedVarInt - " + value
+ );
+ writeVarInt(value, out);
+ }
+
+ /**
+ * Zig-zag encoding for signed ints
+ *
+ * @see #writeUnsignedVarInt(int, DataOutput)
+ * @param value to encode
+ * @param out to write bytes to
+ * @throws IOException
+ */
+ public static void writeSignedVarInt(
+ int value,
+ DataOutput out
+ ) throws IOException {
+ writeVarInt((value << 1) ^ (value >> 31), out);
+ }
+
+ /**
+ * @param in to read bytes from
* @return decode value
* @throws IOException
* if {@link DataInput} throws {@link IOException}
- * @throws IllegalArgumentException
- * if variable-length value does not terminate after 9 bytes have
- * been read
- * @see #writeUnsignedVarLong(long, DataOutput)
*/
public static long readUnsignedVarLong(DataInput in) throws IOException {
- long value = 0L;
- int i = 0;
- long b = in.readByte();
- while ((b & 0x80L) != 0) {
- value |= (b & 0x7F) << i;
- i += 7;
- if (i > 63) {
- throw new IllegalArgumentException(
- "Variable length quantity is too long");
+ long tmp;
+ // CHECKSTYLE: stop InnerAssignment
+ if ((tmp = in.readByte()) >= 0) {
+ return tmp;
+ }
+ long result = tmp & 0x7f;
+ if ((tmp = in.readByte()) >= 0) {
+ result |= tmp << 7;
+ } else {
+ result |= (tmp & 0x7f) << 7;
+ if ((tmp = in.readByte()) >= 0) {
+ result |= tmp << 14;
+ } else {
+ result |= (tmp & 0x7f) << 14;
+ if ((tmp = in.readByte()) >= 0) {
+ result |= tmp << 21;
+ } else {
+ result |= (tmp & 0x7f) << 21;
+ if ((tmp = in.readByte()) >= 0) {
+ result |= tmp << 28;
+ } else {
+ result |= (tmp & 0x7f) << 28;
+ if ((tmp = in.readByte()) >= 0) {
+ result |= tmp << 35;
+ } else {
+ result |= (tmp & 0x7f) << 35;
+ if ((tmp = in.readByte()) >= 0) {
+ result |= tmp << 42;
+ } else {
+ result |= (tmp & 0x7f) << 42;
+ if ((tmp = in.readByte()) >= 0) {
+ result |= tmp << 49;
+ } else {
+ result |= (tmp & 0x7f) << 49;
+ if ((tmp = in.readByte()) >= 0) {
+ result |= tmp << 56;
+ } else {
+ result |= (tmp & 0x7f) << 56;
+ result |= ((long) in.readByte()) << 63;
+ }
+ }
+ }
+ }
+ }
+ }
}
- b = in.readByte();
}
- return value | (b << i);
+ // CHECKSTYLE: resume InnerAssignment
+ return result;
+ }
+
+ /**
+ * @param in to read bytes from
+ * @return decode value
+ * @throws IOException
+ * if {@link DataInput} throws {@link IOException}
+ */
+ public static long readSignedVarLong(DataInput in) throws IOException {
+ long raw = readUnsignedVarLong(in);
+ long temp = (((raw << 63) >> 63) ^ raw) >> 1;
+ return temp ^ (raw & (1L << 63));
}
/**
- * @throws IllegalArgumentException
- * if variable-length value does not terminate after
- * 5 bytes have been read
* @throws IOException
* if {@link DataInput} throws {@link IOException}
- * @param in to read bytes from.
- * @return decode value.
+ * @param in to read bytes from
+ * @return decode value
*/
public static int readUnsignedVarInt(DataInput in) throws IOException {
- int value = 0;
- int i = 0;
- int b = in.readByte();
- while ((b & 0x80) != 0) {
- value |= (b & 0x7F) << i;
- i += 7;
- if (i > 35) {
- throw new IllegalArgumentException(
- "Variable length quantity is too long");
+ int tmp;
+ // CHECKSTYLE: stop InnerAssignment
+ if ((tmp = in.readByte()) >= 0) {
+ return tmp;
+ }
+ int result = tmp & 0x7f;
+ if ((tmp = in.readByte()) >= 0) {
+ result |= tmp << 7;
+ } else {
+ result |= (tmp & 0x7f) << 7;
+ if ((tmp = in.readByte()) >= 0) {
+ result |= tmp << 14;
+ } else {
+ result |= (tmp & 0x7f) << 14;
+ if ((tmp = in.readByte()) >= 0) {
+ result |= tmp << 21;
+ } else {
+ result |= (tmp & 0x7f) << 21;
+ result |= (in.readByte()) << 28;
+ }
}
- b = in.readByte();
}
- return value | (b << i);
+ // CHECKSTYLE: resume InnerAssignment
+ return result;
}
+
+ /**
+ * @throws IOException
+ * if {@link DataInput} throws {@link IOException}
+ * @param in to read bytes from
+ * @return decode value
+ */
+ public static int readSignedVarInt(DataInput in) throws IOException {
+ int raw = readUnsignedVarInt(in);
+ int temp = (((raw << 31) >> 31) ^ raw) >> 1;
+ return temp ^ (raw & (1 << 31));
+ }
+
/**
* Simulation for what will happen when writing an unsigned long value
* as varlong.
- * @param value the value
+ * @param value to consider
* @return the number of bytes needed to write value.
* @throws IOException
*/
public static long sizeOfUnsignedVarLong(long value) throws IOException {
- long cnt = 0;
- while ((value & 0xFFFFFFFFFFFFFF80L) != 0L) {
- cnt++;
+ int result = 0;
+ do {
+ result++;
value >>>= 7;
- }
- return ++cnt;
+ } while (value != 0);
+ return result;
+ }
+
+ /**
+ * Simulation for what will happen when writing a signed long value
+ * as varlong.
+ * @param value to consider
+ * @return the number of bytes needed to write value.
+ * @throws IOException
+ */
+ public static long sizeOfSignedVarLong(long value) throws IOException {
+ return sizeOfUnsignedVarLong((value << 1) ^ (value >> 63));
}
/**
* Simulation for what will happen when writing an unsigned int value
* as varint.
- * @param value the value
+ * @param value to consider
* @return the number of bytes needed to write value.
* @throws IOException
*/
- public static long sizeOfUnsignedVarInt(int value) throws IOException {
- long cnt = 0;
- while ((value & 0xFFFFFF80) != 0L) {
+ public static int sizeOfUnsignedVarInt(int value) throws IOException {
+ int cnt = 0;
+ do {
cnt++;
value >>>= 7;
- }
- return ++cnt;
+ } while (value != 0);
+ return cnt;
}
+
+ /**
+ * Simulation for what will happen when writing a signed int value
+ * as varint.
+ * @param value to consider
+ * @return the number of bytes needed to write value.
+ * @throws IOException
+ */
+ public static int sizeOfSignedVarInt(int value) throws IOException {
+ return sizeOfUnsignedVarInt((value << 1) ^ (value >> 31));
+ }
+
}
http://git-wip-us.apache.org/repos/asf/giraph/blob/cc2fa8a4/giraph-core/src/test/java/org/apache/giraph/utils/TestVarint.java
----------------------------------------------------------------------
diff --git a/giraph-core/src/test/java/org/apache/giraph/utils/TestVarint.java b/giraph-core/src/test/java/org/apache/giraph/utils/TestVarint.java
new file mode 100644
index 0000000..70bebd8
--- /dev/null
+++ b/giraph-core/src/test/java/org/apache/giraph/utils/TestVarint.java
@@ -0,0 +1,201 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.giraph.utils;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.concurrent.ThreadLocalRandom;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestVarint {
+ private long[] genLongs(int n) {
+ long[] res = new long[n];
+ for (int i = 0; i < n; i++) {
+ res[i] = ThreadLocalRandom.current().nextLong();
+ }
+ return res;
+ }
+
+ private int[] genInts(int n) {
+ int[] res = new int[n];
+ for (int i = 0; i < n; i++) {
+ res[i] = ThreadLocalRandom.current().nextInt();
+ }
+ return res;
+ }
+
+ private void writeLongs(DataOutput out, long[] array) throws IOException {
+ for (int i = 0; i < array.length; i++) {
+ Varint.writeSignedVarLong(array[i], out);
+ }
+ }
+
+ private void writeInts(DataOutput out, int[] array) throws IOException {
+ for (int i = 0; i < array.length; i++) {
+ Varint.writeSignedVarInt(array[i], out);
+ }
+ }
+
+ private void readLongs(DataInput in, long[] array) throws IOException {
+ for (int i = 0; i < array.length; i++) {
+ array[i] = Varint.readSignedVarLong(in);
+ }
+ }
+
+ private void readInts(DataInput in, int[] array) throws IOException {
+ for (int i = 0; i < array.length; i++) {
+ array[i] = Varint.readSignedVarInt(in);
+ }
+ }
+
+ private void testVarLong(long value) throws IOException {
+ UnsafeByteArrayOutputStream os = new UnsafeByteArrayOutputStream();
+ Varint.writeSignedVarLong(value, os);
+
+ UnsafeByteArrayInputStream is = new UnsafeByteArrayInputStream(os.getByteArray());
+ long newValue = Varint.readSignedVarLong(is);
+
+ Assert.assertEquals(Varint.sizeOfSignedVarLong(value), os.getPos());
+ Assert.assertEquals(value, newValue);
+
+ if (value >= 0) {
+ os = new UnsafeByteArrayOutputStream();
+ Varint.writeUnsignedVarLong(value, os);
+ is = new UnsafeByteArrayInputStream(os.getByteArray());
+ newValue = Varint.readUnsignedVarLong(is);
+ Assert.assertEquals(Varint.sizeOfUnsignedVarLong(value), os.getPos());
+ Assert.assertEquals(value, newValue);
+ }
+ }
+
+ private void testVarInt(int value) throws IOException {
+ UnsafeByteArrayOutputStream os = new UnsafeByteArrayOutputStream();
+ Varint.writeSignedVarInt(value, os);
+
+ UnsafeByteArrayInputStream is = new UnsafeByteArrayInputStream(os.getByteArray());
+ int newValue = Varint.readSignedVarInt(is);
+
+ Assert.assertEquals(Varint.sizeOfSignedVarLong(value), os.getPos());
+ Assert.assertEquals(value, newValue);
+
+ if (value >= 0) {
+ os = new UnsafeByteArrayOutputStream();
+ Varint.writeUnsignedVarInt(value, os);
+ is = new UnsafeByteArrayInputStream(os.getByteArray());
+ newValue = Varint.readUnsignedVarInt(is);
+ Assert.assertEquals(Varint.sizeOfUnsignedVarInt(value), os.getPos());
+ Assert.assertEquals(value, newValue);
+ }
+ }
+
+ @Test
+ public void testVars() throws IOException {
+ testVarLong(0);
+ testVarLong(Long.MIN_VALUE);
+ testVarLong(Long.MAX_VALUE);
+ testVarLong(-123456789999l);
+ testVarLong(12342356789999l);
+ testVarInt(0);
+ testVarInt(4);
+ testVarInt(-1);
+ testVarInt(1);
+ testVarInt(Integer.MIN_VALUE);
+ testVarInt(Integer.MAX_VALUE);
+ testVarInt(Integer.MAX_VALUE - 1);
+ }
+
+ @Test
+ public void testVarLongSmall() throws IOException {
+ long[] array = new long[] {1, 2, 3, -5, 0, 12345678987l, Long.MIN_VALUE};
+ UnsafeByteArrayOutputStream os = new UnsafeByteArrayOutputStream();
+ writeLongs(os, array);
+
+ long[] resArray = new long[array.length];
+ UnsafeByteArrayInputStream is = new UnsafeByteArrayInputStream(os.getByteArray());
+ readLongs(is, resArray);
+
+ Assert.assertArrayEquals(array, resArray);
+ }
+
+ @Test
+ public void testVarIntSmall() throws IOException {
+ int[] array = new int[] {13, -2, 3, 0, 123456789, Integer.MIN_VALUE, Integer.MAX_VALUE};
+ UnsafeByteArrayOutputStream os = new UnsafeByteArrayOutputStream();
+ writeInts(os, array);
+
+ int[] resArray = new int[array.length];
+ UnsafeByteArrayInputStream is = new UnsafeByteArrayInputStream(os.getByteArray());
+ readInts(is, resArray);
+
+ Assert.assertArrayEquals(array, resArray);
+ }
+
+ @Test
+ public void testVarLongLarge() throws IOException {
+ int n = 1000000;
+ long[] array = genLongs(n);
+ UnsafeByteArrayOutputStream os = new UnsafeByteArrayOutputStream();
+
+ long startTime = System.currentTimeMillis();
+ writeLongs(os, array);
+ long endTime = System.currentTimeMillis();
+ System.out.println("Write time: " + (endTime - startTime) / 1000.0);
+
+ long[] resArray = new long[array.length];
+ UnsafeByteArrayInputStream is = new UnsafeByteArrayInputStream(os.getByteArray());
+ startTime = System.currentTimeMillis();
+ readLongs(is, resArray);
+ endTime = System.currentTimeMillis();
+ System.out.println("Read time: " + (endTime - startTime) / 1000.0);
+
+ Assert.assertArrayEquals(array, resArray);
+ }
+
+ @Test
+ public void testVarIntLarge() throws IOException {
+ int n = 1000000;
+ int[] array = genInts(n);
+ UnsafeByteArrayOutputStream os = new UnsafeByteArrayOutputStream();
+
+ long startTime = System.currentTimeMillis();
+ writeInts(os, array);
+ long endTime = System.currentTimeMillis();
+ System.out.println("Write time: " + (endTime - startTime) / 1000.0);
+
+ int[] resArray = new int[array.length];
+ UnsafeByteArrayInputStream is = new UnsafeByteArrayInputStream(os.getByteArray());
+ startTime = System.currentTimeMillis();
+ readInts(is, resArray);
+ endTime = System.currentTimeMillis();
+ System.out.println("Read time: " + (endTime - startTime) / 1000.0);
+
+ Assert.assertArrayEquals(array, resArray);
+ }
+
+ @Test
+ public void testSmall() throws IOException {
+ for (int i = -100000; i <= 100000; i++) {
+ testVarInt(i);
+ testVarLong(i);
+ }
+ }
+
+}