You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by le...@apache.org on 2015/01/09 07:34:37 UTC
svn commit: r1650447 [11/25] - in /nutch/branches/2.x: ./
src/java/org/apache/nutch/api/ src/java/org/apache/nutch/api/impl/
src/java/org/apache/nutch/api/impl/db/
src/java/org/apache/nutch/api/model/response/
src/java/org/apache/nutch/api/resources/ s...
Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/Bytes.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/Bytes.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/util/Bytes.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/util/Bytes.java Fri Jan 9 06:34:33 2015
@@ -42,1425 +42,1412 @@ import org.apache.hadoop.io.WritableUtil
*/
public class Bytes {
- private static final Logger LOG = LoggerFactory.getLogger(Bytes.class);
+ private static final Logger LOG = LoggerFactory.getLogger(Bytes.class);
- /** When we encode strings, we always specify UTF8 encoding */
- public static final String UTF8_ENCODING = "UTF-8";
-
- /**
- * An empty instance.
- */
- public static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
-
- /**
- * Size of boolean in bytes
- */
- public static final int SIZEOF_BOOLEAN = Byte.SIZE / Byte.SIZE;
-
- /**
- * Size of byte in bytes
- */
- public static final int SIZEOF_BYTE = SIZEOF_BOOLEAN;
-
- /**
- * Size of char in bytes
- */
- public static final int SIZEOF_CHAR = Character.SIZE / Byte.SIZE;
-
- /**
- * Size of double in bytes
- */
- public static final int SIZEOF_DOUBLE = Double.SIZE / Byte.SIZE;
-
- /**
- * Size of float in bytes
- */
- public static final int SIZEOF_FLOAT = Float.SIZE / Byte.SIZE;
-
- /**
- * Size of int in bytes
- */
- public static final int SIZEOF_INT = Integer.SIZE / Byte.SIZE;
-
- /**
- * Size of long in bytes
- */
- public static final int SIZEOF_LONG = Long.SIZE / Byte.SIZE;
-
- /**
- * Size of short in bytes
- */
- public static final int SIZEOF_SHORT = Short.SIZE / Byte.SIZE;
-
- /**
- * Estimate of size cost to pay beyond payload in jvm for instance of byte
- * []. Estimate based on study of jhat and jprofiler numbers.
- */
- // JHat says BU is 56 bytes.
- // SizeOf which uses java.lang.instrument says 24 bytes. (3 longs?)
- public static final int ESTIMATED_HEAP_TAX = 16;
-
- /**
- * Byte array comparator class.
- */
- public static class ByteArrayComparator implements RawComparator<byte[]> {
- /**
- * Constructor
- */
- public ByteArrayComparator() {
- super();
- }
-
- public int compare(byte[] left, byte[] right) {
- return compareTo(left, right);
- }
-
- public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
- return compareTo(b1, s1, l1, b2, s2, l2);
- }
- }
-
- /**
- * Pass this to TreeMaps where byte [] are keys.
- */
- public static Comparator<byte[]> BYTES_COMPARATOR = new ByteArrayComparator();
-
- /**
- * Use comparing byte arrays, byte-by-byte
- */
- public static RawComparator<byte[]> BYTES_RAWCOMPARATOR = new ByteArrayComparator();
-
- /**
- * Read byte-array written with a WritableableUtils.vint prefix.
- *
- * @param in
- * Input to read from.
- * @return byte array read off <code>in</code>
- * @throws IOException
- * e
- */
- public static byte[] readByteArray(final DataInput in) throws IOException {
- int len = WritableUtils.readVInt(in);
- if (len < 0) {
- throw new NegativeArraySizeException(Integer.toString(len));
- }
- byte[] result = new byte[len];
- in.readFully(result, 0, len);
- return result;
- }
-
- /**
- * Read byte-array written with a WritableableUtils.vint prefix. IOException
- * is converted to a RuntimeException.
- *
- * @param in
- * Input to read from.
- * @return byte array read off <code>in</code>
- */
- public static byte[] readByteArrayThrowsRuntime(final DataInput in) {
- try {
- return readByteArray(in);
- } catch (Exception e) {
- throw new RuntimeException(e);
- }
- }
-
- /**
- * Write byte-array with a WritableableUtils.vint prefix.
- *
- * @param out
- * output stream to be written to
- * @param b
- * array to write
- * @throws IOException
- * e
- */
- public static void writeByteArray(final DataOutput out, final byte[] b)
- throws IOException {
- if (b == null) {
- WritableUtils.writeVInt(out, 0);
- } else {
- writeByteArray(out, b, 0, b.length);
- }
- }
-
- /**
- * Write byte-array to out with a vint length prefix.
- *
- * @param out
- * output stream
- * @param b
- * array
- * @param offset
- * offset into array
- * @param length
- * length past offset
- * @throws IOException
- * e
- */
- public static void writeByteArray(final DataOutput out, final byte[] b,
- final int offset, final int length) throws IOException {
- WritableUtils.writeVInt(out, length);
- out.write(b, offset, length);
- }
-
- /**
- * Write byte-array from src to tgt with a vint length prefix.
- *
- * @param tgt
- * target array
- * @param tgtOffset
- * offset into target array
- * @param src
- * source array
- * @param srcOffset
- * source offset
- * @param srcLength
- * source length
- * @return New offset in src array.
- */
- public static int writeByteArray(final byte[] tgt, final int tgtOffset,
- final byte[] src, final int srcOffset, final int srcLength) {
- byte[] vint = vintToBytes(srcLength);
- System.arraycopy(vint, 0, tgt, tgtOffset, vint.length);
- int offset = tgtOffset + vint.length;
- System.arraycopy(src, srcOffset, tgt, offset, srcLength);
- return offset + srcLength;
- }
-
- /**
- * Put bytes at the specified byte array position.
- *
- * @param tgtBytes
- * the byte array
- * @param tgtOffset
- * position in the array
- * @param srcBytes
- * array to write out
- * @param srcOffset
- * source offset
- * @param srcLength
- * source length
- * @return incremented offset
- */
- public static int putBytes(byte[] tgtBytes, int tgtOffset, byte[] srcBytes,
- int srcOffset, int srcLength) {
- System.arraycopy(srcBytes, srcOffset, tgtBytes, tgtOffset, srcLength);
- return tgtOffset + srcLength;
- }
-
- /**
- * Write a single byte out to the specified byte array position.
- *
- * @param bytes
- * the byte array
- * @param offset
- * position in the array
- * @param b
- * byte to write out
- * @return incremented offset
- */
- public static int putByte(byte[] bytes, int offset, byte b) {
- bytes[offset] = b;
- return offset + 1;
- }
-
- /**
- * Returns a new byte array, copied from the passed ByteBuffer.
- *
- * @param bb
- * A ByteBuffer
- * @return the byte array
- */
- public static byte[] toBytes(ByteBuffer bb) {
- int length = bb.remaining();
- byte[] result = new byte[length];
- System.arraycopy(bb.array(), bb.arrayOffset() + bb.position(), result, 0, length);
- return result;
- }
+ /** When we encode strings, we always specify UTF8 encoding */
+ public static final String UTF8_ENCODING = "UTF-8";
+ /**
+ * An empty instance.
+ */
+ public static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
+
+ /**
+ * Size of boolean in bytes
+ */
+ public static final int SIZEOF_BOOLEAN = Byte.SIZE / Byte.SIZE;
+
+ /**
+ * Size of byte in bytes
+ */
+ public static final int SIZEOF_BYTE = SIZEOF_BOOLEAN;
+
+ /**
+ * Size of char in bytes
+ */
+ public static final int SIZEOF_CHAR = Character.SIZE / Byte.SIZE;
+
+ /**
+ * Size of double in bytes
+ */
+ public static final int SIZEOF_DOUBLE = Double.SIZE / Byte.SIZE;
+
+ /**
+ * Size of float in bytes
+ */
+ public static final int SIZEOF_FLOAT = Float.SIZE / Byte.SIZE;
+
+ /**
+ * Size of int in bytes
+ */
+ public static final int SIZEOF_INT = Integer.SIZE / Byte.SIZE;
+
+ /**
+ * Size of long in bytes
+ */
+ public static final int SIZEOF_LONG = Long.SIZE / Byte.SIZE;
+
+ /**
+ * Size of short in bytes
+ */
+ public static final int SIZEOF_SHORT = Short.SIZE / Byte.SIZE;
+
+ /**
+ * Estimate of size cost to pay beyond payload in jvm for instance of byte [].
+ * Estimate based on study of jhat and jprofiler numbers.
+ */
+ // JHat says BU is 56 bytes.
+ // SizeOf which uses java.lang.instrument says 24 bytes. (3 longs?)
+ public static final int ESTIMATED_HEAP_TAX = 16;
+
+ /**
+ * Byte array comparator class.
+ */
+ public static class ByteArrayComparator implements RawComparator<byte[]> {
/**
- * This method will convert utf8 encoded bytes into a string. If an
- * UnsupportedEncodingException occurs, this method will eat it and return
- * null instead.
- *
- * @param bb
- * Presumed UTF-8 encoded ByteBuffer.
- * @return String made from <code>b</code> or null
+ * Constructor
*/
- public static String toString(ByteBuffer bb) {
- return bb == null
- ? null
- : toString(bb.array(), bb.arrayOffset() + bb.position(), bb.remaining());
- }
-
- /**
- * @param b
- * Presumed UTF-8 encoded byte array.
- * @return String made from <code>b</code>
- */
- public static String toString(final byte[] b) {
- if (b == null) {
- return null;
- }
- return toString(b, 0, b.length);
- }
-
- /**
- * Joins two byte arrays together using a separator.
- *
- * @param b1
- * The first byte array.
- * @param sep
- * The separator to use.
- * @param b2
- * The second byte array.
- */
- public static String toString(final byte[] b1, String sep, final byte[] b2) {
- return toString(b1, 0, b1.length) + sep + toString(b2, 0, b2.length);
- }
-
- /**
- * This method will convert utf8 encoded bytes into a string. If an
- * UnsupportedEncodingException occurs, this method will eat it and return
- * null instead.
- *
- * @param b
- * Presumed UTF-8 encoded byte array.
- * @param off
- * offset into array
- * @param len
- * length of utf-8 sequence
- * @return String made from <code>b</code> or null
- */
- public static String toString(final byte[] b, int off, int len) {
- if (b == null) {
- return null;
- }
- if (len == 0) {
- return "";
- }
- try {
- return new String(b, off, len, UTF8_ENCODING);
- } catch (UnsupportedEncodingException e) {
- LOG.error("UTF-8 not supported?", e);
- return null;
- }
- }
+ public ByteArrayComparator() {
+ super();
+ }
- /**
- * Write a printable representation of a ByteBuffer. Non-printable
- * characters are hex escaped in the format \\x%02X, eg: \x00 \x05 etc
- *
- * @param bb
- * ByteBuffer to write out
- * @return string output
- */
- public static String toStringBinary(ByteBuffer bb) {
- return bb == null
- ? null
- : toStringBinary(bb.array(), bb.arrayOffset() + bb.position(), bb.remaining());
- }
-
- /**
- * Write a printable representation of a byte array.
- *
- * @param b
- * byte array
- * @return string
- * @see #toStringBinary(byte[], int, int)
- */
- public static String toStringBinary(final byte[] b) {
- return toStringBinary(b, 0, b.length);
- }
-
- /**
- * Write a printable representation of a byte array. Non-printable
- * characters are hex escaped in the format \\x%02X, eg: \x00 \x05 etc
- *
- * @param b
- * array to write out
- * @param off
- * offset to start at
- * @param len
- * length to write
- * @return string output
- */
- public static String toStringBinary(final byte[] b, int off, int len) {
- StringBuilder result = new StringBuilder();
- try {
- String first = new String(b, off, len, "ISO-8859-1");
- for (int i = 0; i < first.length(); ++i) {
- int ch = first.charAt(i) & 0xFF;
- if ((ch >= '0' && ch <= '9')
- || (ch >= 'A' && ch <= 'Z')
- || (ch >= 'a' && ch <= 'z')
- || " `~!@#$%^&*()-_=+[]{}\\|;:'\",.<>/?".indexOf(ch) >= 0) {
- result.append(first.charAt(i));
- } else {
- result.append(String.format("\\x%02X", ch));
- }
- }
- } catch (UnsupportedEncodingException e) {
- LOG.error("ISO-8859-1 not supported?", e);
- }
- return result.toString();
- }
-
- private static boolean isHexDigit(char c) {
- return (c >= 'A' && c <= 'F') || (c >= '0' && c <= '9');
- }
-
- /**
- * Takes a ASCII digit in the range A-F0-9 and returns the corresponding
- * integer/ordinal value.
- *
- * @param ch
- * The hex digit.
- * @return The converted hex value as a byte.
- */
- public static byte toBinaryFromHex(byte ch) {
- if (ch >= 'A' && ch <= 'F')
- return (byte) ((byte) 10 + (byte) (ch - 'A'));
- // else
- return (byte) (ch - '0');
- }
-
- public static byte[] toBytesBinary(String in) {
- // this may be bigger than we need, but lets be safe.
- byte[] b = new byte[in.length()];
- int size = 0;
- for (int i = 0; i < in.length(); ++i) {
- char ch = in.charAt(i);
- if (ch == '\\') {
- // begin hex escape:
- char next = in.charAt(i + 1);
- if (next != 'x') {
- // invalid escape sequence, ignore this one.
- b[size++] = (byte) ch;
- continue;
- }
- // ok, take next 2 hex digits.
- char hd1 = in.charAt(i + 2);
- char hd2 = in.charAt(i + 3);
-
- // they need to be A-F0-9:
- if (!isHexDigit(hd1) || !isHexDigit(hd2)) {
- // bogus escape code, ignore:
- continue;
- }
- // turn hex ASCII digit -> number
- byte d = (byte) ((toBinaryFromHex((byte) hd1) << 4) + toBinaryFromHex((byte) hd2));
-
- b[size++] = d;
- i += 3; // skip 3
- } else {
- b[size++] = (byte) ch;
- }
- }
- // resize:
- byte[] b2 = new byte[size];
- System.arraycopy(b, 0, b2, 0, size);
- return b2;
- }
-
- /**
- * Converts a string to a UTF-8 byte array.
- *
- * @param s
- * string
- * @return the byte array
- */
- public static byte[] toBytes(String s) {
- try {
- return s.getBytes(UTF8_ENCODING);
- } catch (UnsupportedEncodingException e) {
- LOG.error("UTF-8 not supported?", e);
- return null;
- }
- }
-
- /**
- * Convert a boolean to a byte array. True becomes -1 and false becomes 0.
- *
- * @param b
- * value
- * @return <code>b</code> encoded in a byte array.
- */
- public static byte[] toBytes(final boolean b) {
- return new byte[] { b ? (byte) -1 : (byte) 0 };
- }
-
- /**
- * Reverses {@link #toBytes(boolean)}
- *
- * @param b
- * array
- * @return True or false.
- */
- public static boolean toBoolean(final byte[] b) {
- if (b.length != 1) {
- throw new IllegalArgumentException("Array has wrong size: "
- + b.length);
- }
- return b[0] != (byte) 0;
- }
-
- /**
- * Convert a long value to a byte array using big-endian.
- *
- * @param val
- * value to convert
- * @return the byte array
- */
- public static byte[] toBytes(long val) {
- byte[] b = new byte[8];
- for (int i = 7; i > 0; i--) {
- b[i] = (byte) val;
- val >>>= 8;
- }
- b[0] = (byte) val;
- return b;
- }
-
- /**
- * Converts a byte array to a long value. Reverses {@link #toBytes(long)}
- *
- * @param bytes
- * array
- * @return the long value
- */
- public static long toLong(byte[] bytes) {
- return toLong(bytes, 0, SIZEOF_LONG);
- }
-
- /**
- * Converts a byte array to a long value. Assumes there will be
- * {@link #SIZEOF_LONG} bytes available.
- *
- * @param bytes
- * bytes
- * @param offset
- * offset
- * @return the long value
- */
- public static long toLong(byte[] bytes, int offset) {
- return toLong(bytes, offset, SIZEOF_LONG);
- }
-
- /**
- * Converts a byte array to a long value.
- *
- * @param bytes
- * array of bytes
- * @param offset
- * offset into array
- * @param length
- * length of data (must be {@link #SIZEOF_LONG})
- * @return the long value
- * @throws IllegalArgumentException
- * if length is not {@link #SIZEOF_LONG} or if there's not
- * enough room in the array at the offset indicated.
- */
- public static long toLong(byte[] bytes, int offset, final int length) {
- if (length != SIZEOF_LONG || offset + length > bytes.length) {
- throw explainWrongLengthOrOffset(bytes, offset, length, SIZEOF_LONG);
- }
- long l = 0;
- for (int i = offset; i < offset + length; i++) {
- l <<= 8;
- l ^= bytes[i] & 0xFF;
- }
- return l;
- }
-
- private static IllegalArgumentException explainWrongLengthOrOffset(
- final byte[] bytes, final int offset, final int length,
- final int expectedLength) {
- String reason;
- if (length != expectedLength) {
- reason = "Wrong length: " + length + ", expected " + expectedLength;
- } else {
- reason = "offset (" + offset + ") + length (" + length
- + ") exceed the" + " capacity of the array: "
- + bytes.length;
- }
- return new IllegalArgumentException(reason);
- }
-
- /**
- * Put a long value out to the specified byte array position.
- *
- * @param bytes
- * the byte array
- * @param offset
- * position in the array
- * @param val
- * long to write out
- * @return incremented offset
- * @throws IllegalArgumentException
- * if the byte array given doesn't have enough room at the
- * offset specified.
- */
- public static int putLong(byte[] bytes, int offset, long val) {
- if (bytes.length - offset < SIZEOF_LONG) {
- throw new IllegalArgumentException(
- "Not enough room to put a long at" + " offset " + offset
- + " in a " + bytes.length + " byte array");
- }
- for (int i = offset + 7; i > offset; i--) {
- bytes[i] = (byte) val;
- val >>>= 8;
- }
- bytes[offset] = (byte) val;
- return offset + SIZEOF_LONG;
- }
-
- /**
- * Presumes float encoded as IEEE 754 floating-point "single format"
- *
- * @param bytes
- * byte array
- * @return Float made from passed byte array.
- */
- public static float toFloat(byte[] bytes) {
- return toFloat(bytes, 0);
- }
-
- /**
- * Presumes float encoded as IEEE 754 floating-point "single format"
- *
- * @param bytes
- * array to convert
- * @param offset
- * offset into array
- * @return Float made from passed byte array.
- */
- public static float toFloat(byte[] bytes, int offset) {
- return Float.intBitsToFloat(toInt(bytes, offset, SIZEOF_INT));
- }
-
- /**
- * @param bytes
- * byte array
- * @param offset
- * offset to write to
- * @param f
- * float value
- * @return New offset in <code>bytes</code>
- */
- public static int putFloat(byte[] bytes, int offset, float f) {
- return putInt(bytes, offset, Float.floatToRawIntBits(f));
- }
-
- /**
- * @param f
- * float value
- * @return the float represented as byte []
- */
- public static byte[] toBytes(final float f) {
- // Encode it as int
- return Bytes.toBytes(Float.floatToRawIntBits(f));
- }
-
- /**
- * @param bytes
- * byte array
- * @return Return double made from passed bytes.
- */
- public static double toDouble(final byte[] bytes) {
- return toDouble(bytes, 0);
- }
-
- /**
- * @param bytes
- * byte array
- * @param offset
- * offset where double is
- * @return Return double made from passed bytes.
- */
- public static double toDouble(final byte[] bytes, final int offset) {
- return Double.longBitsToDouble(toLong(bytes, offset, SIZEOF_LONG));
- }
-
- /**
- * @param bytes
- * byte array
- * @param offset
- * offset to write to
- * @param d
- * value
- * @return New offset into array <code>bytes</code>
- */
- public static int putDouble(byte[] bytes, int offset, double d) {
- return putLong(bytes, offset, Double.doubleToLongBits(d));
- }
-
- /**
- * Serialize a double as the IEEE 754 double format output. The resultant
- * array will be 8 bytes long.
- *
- * @param d
- * value
- * @return the double represented as byte []
- */
- public static byte[] toBytes(final double d) {
- // Encode it as a long
- return Bytes.toBytes(Double.doubleToRawLongBits(d));
- }
-
- /**
- * Convert an int value to a byte array
- *
- * @param val
- * value
- * @return the byte array
- */
- public static byte[] toBytes(int val) {
- byte[] b = new byte[4];
- for (int i = 3; i > 0; i--) {
- b[i] = (byte) val;
- val >>>= 8;
- }
- b[0] = (byte) val;
- return b;
- }
-
- /**
- * Converts a byte array to an int value
- *
- * @param bytes
- * byte array
- * @return the int value
- */
- public static int toInt(byte[] bytes) {
- return toInt(bytes, 0, SIZEOF_INT);
- }
-
- /**
- * Converts a byte array to an int value
- *
- * @param bytes
- * byte array
- * @param offset
- * offset into array
- * @return the int value
- */
- public static int toInt(byte[] bytes, int offset) {
- return toInt(bytes, offset, SIZEOF_INT);
- }
-
- /**
- * Converts a byte array to an int value
- *
- * @param bytes
- * byte array
- * @param offset
- * offset into array
- * @param length
- * length of int (has to be {@link #SIZEOF_INT})
- * @return the int value
- * @throws IllegalArgumentException
- * if length is not {@link #SIZEOF_INT} or if there's not enough
- * room in the array at the offset indicated.
- */
- public static int toInt(byte[] bytes, int offset, final int length) {
- if (length != SIZEOF_INT || offset + length > bytes.length) {
- throw explainWrongLengthOrOffset(bytes, offset, length, SIZEOF_INT);
- }
- int n = 0;
- for (int i = offset; i < (offset + length); i++) {
- n <<= 8;
- n ^= bytes[i] & 0xFF;
- }
- return n;
- }
-
- /**
- * Put an int value out to the specified byte array position.
- *
- * @param bytes
- * the byte array
- * @param offset
- * position in the array
- * @param val
- * int to write out
- * @return incremented offset
- * @throws IllegalArgumentException
- * if the byte array given doesn't have enough room at the
- * offset specified.
- */
- public static int putInt(byte[] bytes, int offset, int val) {
- if (bytes.length - offset < SIZEOF_INT) {
- throw new IllegalArgumentException(
- "Not enough room to put an int at" + " offset " + offset
- + " in a " + bytes.length + " byte array");
- }
- for (int i = offset + 3; i > offset; i--) {
- bytes[i] = (byte) val;
- val >>>= 8;
- }
- bytes[offset] = (byte) val;
- return offset + SIZEOF_INT;
- }
-
- /**
- * Convert a short value to a byte array of {@link #SIZEOF_SHORT} bytes
- * long.
- *
- * @param val
- * value
- * @return the byte array
- */
- public static byte[] toBytes(short val) {
- byte[] b = new byte[SIZEOF_SHORT];
- b[1] = (byte) val;
- val >>= 8;
- b[0] = (byte) val;
- return b;
- }
-
- /**
- * Converts a byte array to a short value
- *
- * @param bytes
- * byte array
- * @return the short value
- */
- public static short toShort(byte[] bytes) {
- return toShort(bytes, 0, SIZEOF_SHORT);
- }
-
- /**
- * Converts a byte array to a short value
- *
- * @param bytes
- * byte array
- * @param offset
- * offset into array
- * @return the short value
- */
- public static short toShort(byte[] bytes, int offset) {
- return toShort(bytes, offset, SIZEOF_SHORT);
- }
-
- /**
- * Converts a byte array to a short value
- *
- * @param bytes
- * byte array
- * @param offset
- * offset into array
- * @param length
- * length, has to be {@link #SIZEOF_SHORT}
- * @return the short value
- * @throws IllegalArgumentException
- * if length is not {@link #SIZEOF_SHORT} or if there's not
- * enough room in the array at the offset indicated.
- */
- public static short toShort(byte[] bytes, int offset, final int length) {
- if (length != SIZEOF_SHORT || offset + length > bytes.length) {
- throw explainWrongLengthOrOffset(bytes, offset, length,
- SIZEOF_SHORT);
- }
- short n = 0;
- n ^= bytes[offset] & 0xFF;
- n <<= 8;
- n ^= bytes[offset + 1] & 0xFF;
- return n;
- }
-
- /**
- * Put a short value out to the specified byte array position.
- *
- * @param bytes
- * the byte array
- * @param offset
- * position in the array
- * @param val
- * short to write out
- * @return incremented offset
- * @throws IllegalArgumentException
- * if the byte array given doesn't have enough room at the
- * offset specified.
- */
- public static int putShort(byte[] bytes, int offset, short val) {
- if (bytes.length - offset < SIZEOF_SHORT) {
- throw new IllegalArgumentException(
- "Not enough room to put a short at" + " offset " + offset
- + " in a " + bytes.length + " byte array");
- }
- bytes[offset + 1] = (byte) val;
- val >>= 8;
- bytes[offset] = (byte) val;
- return offset + SIZEOF_SHORT;
- }
-
- /**
- * @param vint
- * Integer to make a vint of.
- * @return Vint as bytes array.
- */
- public static byte[] vintToBytes(final long vint) {
- long i = vint;
- int size = WritableUtils.getVIntSize(i);
- byte[] result = new byte[size];
- int offset = 0;
- if (i >= -112 && i <= 127) {
- result[offset] = (byte) i;
- return result;
- }
-
- int len = -112;
- if (i < 0) {
- i ^= -1L; // take one's complement'
- len = -120;
- }
-
- long tmp = i;
- while (tmp != 0) {
- tmp = tmp >> 8;
- len--;
- }
-
- result[offset++] = (byte) len;
-
- len = (len < -120) ? -(len + 120) : -(len + 112);
-
- for (int idx = len; idx != 0; idx--) {
- int shiftbits = (idx - 1) * 8;
- long mask = 0xFFL << shiftbits;
- result[offset++] = (byte) ((i & mask) >> shiftbits);
- }
- return result;
- }
-
- /**
- * @param buffer
- * buffer to convert
- * @return vint bytes as an integer.
- */
- public static long bytesToVint(final byte[] buffer) {
- int offset = 0;
- byte firstByte = buffer[offset++];
- int len = WritableUtils.decodeVIntSize(firstByte);
- if (len == 1) {
- return firstByte;
- }
- long i = 0;
- for (int idx = 0; idx < len - 1; idx++) {
- byte b = buffer[offset++];
- i = i << 8;
- i = i | (b & 0xFF);
- }
- return (WritableUtils.isNegativeVInt(firstByte) ? ~i : i);
- }
-
- /**
- * Reads a zero-compressed encoded long from input stream and returns it.
- *
- * @param buffer
- * Binary array
- * @param offset
- * Offset into array at which vint begins.
- * @throws java.io.IOException
- * e
- * @return deserialized long from stream.
- */
- public static long readVLong(final byte[] buffer, final int offset)
- throws IOException {
- byte firstByte = buffer[offset];
- int len = WritableUtils.decodeVIntSize(firstByte);
- if (len == 1) {
- return firstByte;
- }
- long i = 0;
- for (int idx = 0; idx < len - 1; idx++) {
- byte b = buffer[offset + 1 + idx];
- i = i << 8;
- i = i | (b & 0xFF);
- }
- return (WritableUtils.isNegativeVInt(firstByte) ? ~i : i);
- }
-
- /**
- * @param left
- * left operand
- * @param right
- * right operand
- * @return 0 if equal, < 0 if left is less than right, etc.
- */
- public static int compareTo(final byte[] left, final byte[] right) {
- return compareTo(left, 0, left.length, right, 0, right.length);
- }
-
- /**
- * Lexographically compare two arrays.
- *
- * @param buffer1
- * left operand
- * @param buffer2
- * right operand
- * @param offset1
- * Where to start comparing in the left buffer
- * @param offset2
- * Where to start comparing in the right buffer
- * @param length1
- * How much to compare from the left buffer
- * @param length2
- * How much to compare from the right buffer
- * @return 0 if equal, < 0 if left is less than right, etc.
- */
- public static int compareTo(byte[] buffer1, int offset1, int length1,
- byte[] buffer2, int offset2, int length2) {
- // Bring WritableComparator code local
- int end1 = offset1 + length1;
- int end2 = offset2 + length2;
- for (int i = offset1, j = offset2; i < end1 && j < end2; i++, j++) {
- int a = (buffer1[i] & 0xff);
- int b = (buffer2[j] & 0xff);
- if (a != b) {
- return a - b;
- }
- }
- return length1 - length2;
- }
-
- /**
- * @param left
- * left operand
- * @param right
- * right operand
- * @return True if equal
- */
- public static boolean equals(final byte[] left, final byte[] right) {
- // Could use Arrays.equals?
- // noinspection SimplifiableConditionalExpression
- if (left == null && right == null) {
- return true;
- }
- return (left == null || right == null || (left.length != right.length) ? false
- : compareTo(left, right) == 0);
- }
-
- /**
- * Return true if the byte array on the right is a prefix of the byte array
- * on the left.
- */
- public static boolean startsWith(byte[] bytes, byte[] prefix) {
- return bytes != null
- && prefix != null
- && bytes.length >= prefix.length
- && compareTo(bytes, 0, prefix.length, prefix, 0, prefix.length) == 0;
- }
-
- /**
- * @param b
- * bytes to hash
- * @return Runs {@link WritableComparator#hashBytes(byte[], int)} on the
- * passed in array. This method is what
- * {@link org.apache.hadoop.io.Text} and
- * {@link ImmutableBytesWritable} use calculating hash code.
- */
- public static int hashCode(final byte[] b) {
- return hashCode(b, b.length);
- }
-
- /**
- * @param b
- * value
- * @param length
- * length of the value
- * @return Runs {@link WritableComparator#hashBytes(byte[], int)} on the
- * passed in array. This method is what
- * {@link org.apache.hadoop.io.Text} and
- * {@link ImmutableBytesWritable} use calculating hash code.
- */
- public static int hashCode(final byte[] b, final int length) {
- return WritableComparator.hashBytes(b, length);
- }
-
- /**
- * @param b
- * bytes to hash
- * @return A hash of <code>b</code> as an Integer that can be used as key in
- * Maps.
- */
- public static Integer mapKey(final byte[] b) {
- return hashCode(b);
- }
-
- /**
- * @param b
- * bytes to hash
- * @param length
- * length to hash
- * @return A hash of <code>b</code> as an Integer that can be used as key in
- * Maps.
- */
- public static Integer mapKey(final byte[] b, final int length) {
- return hashCode(b, length);
- }
-
- /**
- * @param a
- * lower half
- * @param b
- * upper half
- * @return New array that has a in lower half and b in upper half.
- */
- public static byte[] add(final byte[] a, final byte[] b) {
- return add(a, b, EMPTY_BYTE_ARRAY);
- }
-
- /**
- * @param a
- * first third
- * @param b
- * second third
- * @param c
- * third third
- * @return New array made from a, b and c
- */
- public static byte[] add(final byte[] a, final byte[] b, final byte[] c) {
- byte[] result = new byte[a.length + b.length + c.length];
- System.arraycopy(a, 0, result, 0, a.length);
- System.arraycopy(b, 0, result, a.length, b.length);
- System.arraycopy(c, 0, result, a.length + b.length, c.length);
- return result;
- }
-
- /**
- * @param a
- * array
- * @param length
- * amount of bytes to grab
- * @return First <code>length</code> bytes from <code>a</code>
- */
- public static byte[] head(final byte[] a, final int length) {
- if (a.length < length) {
- return null;
- }
- byte[] result = new byte[length];
- System.arraycopy(a, 0, result, 0, length);
- return result;
- }
-
- /**
- * @param a
- * array
- * @param length
- * amount of bytes to snarf
- * @return Last <code>length</code> bytes from <code>a</code>
- */
- public static byte[] tail(final byte[] a, final int length) {
- if (a.length < length) {
- return null;
- }
- byte[] result = new byte[length];
- System.arraycopy(a, a.length - length, result, 0, length);
- return result;
- }
-
- /**
- * @param a
- * array
- * @param length
- * new array size
- * @return Value in <code>a</code> plus <code>length</code> prepended 0
- * bytes
- */
- public static byte[] padHead(final byte[] a, final int length) {
- byte[] padding = new byte[length];
- for (int i = 0; i < length; i++) {
- padding[i] = 0;
- }
- return add(padding, a);
- }
-
- /**
- * @param a
- * array
- * @param length
- * new array size
- * @return Value in <code>a</code> plus <code>length</code> appended 0 bytes
- */
- public static byte[] padTail(final byte[] a, final int length) {
- byte[] padding = new byte[length];
- for (int i = 0; i < length; i++) {
- padding[i] = 0;
- }
- return add(a, padding);
- }
-
- /**
- * Split passed range. Expensive operation relatively. Uses BigInteger math.
- * Useful splitting ranges for MapReduce jobs.
- *
- * @param a
- * Beginning of range
- * @param b
- * End of range
- * @param num
- * Number of times to split range. Pass 1 if you want to split
- * the range in two; i.e. one split.
- * @return Array of dividing values
- */
- public static byte[][] split(final byte[] a, final byte[] b, final int num) {
- byte[][] ret = new byte[num + 2][];
- int i = 0;
- Iterable<byte[]> iter = iterateOnSplits(a, b, num);
- if (iter == null)
- return null;
- for (byte[] elem : iter) {
- ret[i++] = elem;
- }
- return ret;
- }
-
- /**
- * Iterate over keys within the passed inclusive range.
- */
- public static Iterable<byte[]> iterateOnSplits(final byte[] a,
- final byte[] b, final int num) {
- byte[] aPadded;
- byte[] bPadded;
- if (a.length < b.length) {
- aPadded = padTail(a, b.length - a.length);
- bPadded = b;
- } else if (b.length < a.length) {
- aPadded = a;
- bPadded = padTail(b, a.length - b.length);
- } else {
- aPadded = a;
- bPadded = b;
- }
- if (compareTo(aPadded, bPadded) >= 0) {
- throw new IllegalArgumentException("b <= a");
- }
- if (num <= 0) {
- throw new IllegalArgumentException("num cannot be < 0");
- }
- byte[] prependHeader = { 1, 0 };
- final BigInteger startBI = new BigInteger(add(prependHeader, aPadded));
- final BigInteger stopBI = new BigInteger(add(prependHeader, bPadded));
- final BigInteger diffBI = stopBI.subtract(startBI);
- final BigInteger splitsBI = BigInteger.valueOf(num + 1);
- if (diffBI.compareTo(splitsBI) < 0) {
- return null;
- }
- final BigInteger intervalBI;
- try {
- intervalBI = diffBI.divide(splitsBI);
- } catch (Exception e) {
- LOG.error("Exception caught during division", e);
- return null;
- }
-
- final Iterator<byte[]> iterator = new Iterator<byte[]>() {
- private int i = -1;
-
- @Override
- public boolean hasNext() {
- return i < num + 1;
- }
-
- @Override
- public byte[] next() {
- i++;
- if (i == 0)
- return a;
- if (i == num + 1)
- return b;
-
- BigInteger curBI = startBI.add(intervalBI.multiply(BigInteger
- .valueOf(i)));
- byte[] padded = curBI.toByteArray();
- if (padded[1] == 0)
- padded = tail(padded, padded.length - 2);
- else
- padded = tail(padded, padded.length - 1);
- return padded;
- }
-
- @Override
- public void remove() {
- throw new UnsupportedOperationException();
- }
-
- };
-
- return new Iterable<byte[]>() {
- @Override
- public Iterator<byte[]> iterator() {
- return iterator;
- }
- };
- }
-
- /**
- * @param t
- * operands
- * @return Array of byte arrays made from passed array of Text
- */
- public static byte[][] toByteArrays(final String[] t) {
- byte[][] result = new byte[t.length][];
- for (int i = 0; i < t.length; i++) {
- result[i] = Bytes.toBytes(t[i]);
- }
- return result;
- }
-
- /**
- * @param column
- * operand
- * @return A byte array of a byte array where first and only entry is
- * <code>column</code>
- */
- public static byte[][] toByteArrays(final String column) {
- return toByteArrays(toBytes(column));
- }
-
- /**
- * @param column
- * operand
- * @return A byte array of a byte array where first and only entry is
- * <code>column</code>
- */
- public static byte[][] toByteArrays(final byte[] column) {
- byte[][] result = new byte[1][];
- result[0] = column;
- return result;
- }
-
- /**
- * Binary search for keys in indexes.
- *
- * @param arr
- * array of byte arrays to search for
- * @param key
- * the key you want to find
- * @param offset
- * the offset in the key you want to find
- * @param length
- * the length of the key
- * @param comparator
- * a comparator to compare.
- * @return index of key
- */
- public static int binarySearch(byte[][] arr, byte[] key, int offset,
- int length, RawComparator<byte[]> comparator) {
- int low = 0;
- int high = arr.length - 1;
-
- while (low <= high) {
- int mid = (low + high) >>> 1;
- // we have to compare in this order, because the comparator order
- // has special logic when the 'left side' is a special key.
- int cmp = comparator.compare(key, offset, length, arr[mid], 0,
- arr[mid].length);
- // key lives above the midpoint
- if (cmp > 0)
- low = mid + 1;
- // key lives below the midpoint
- else if (cmp < 0)
- high = mid - 1;
- // BAM. how often does this really happen?
- else
- return mid;
- }
- return -(low + 1);
- }
-
- /**
- * Bytewise binary increment/deincrement of long contained in byte array on
- * given amount.
- *
- * @param value
- * - array of bytes containing long (length <= SIZEOF_LONG)
- * @param amount
- * value will be incremented on (deincremented if negative)
- * @return array of bytes containing incremented long (length ==
- * SIZEOF_LONG)
- * @throws IOException
- * - if value.length > SIZEOF_LONG
- */
- public static byte[] incrementBytes(byte[] value, long amount)
- throws IOException {
- byte[] val = value;
- if (val.length < SIZEOF_LONG) {
- // Hopefully this doesn't happen too often.
- byte[] newvalue;
- if (val[0] < 0) {
- newvalue = new byte[] { -1, -1, -1, -1, -1, -1, -1, -1 };
- } else {
- newvalue = new byte[SIZEOF_LONG];
- }
- System.arraycopy(val, 0, newvalue, newvalue.length - val.length,
- val.length);
- val = newvalue;
- } else if (val.length > SIZEOF_LONG) {
- throw new IllegalArgumentException(
- "Increment Bytes - value too big: " + val.length);
- }
- if (amount == 0)
- return val;
- if (val[0] < 0) {
- return binaryIncrementNeg(val, amount);
- }
- return binaryIncrementPos(val, amount);
- }
-
- /* increment/deincrement for positive value */
- private static byte[] binaryIncrementPos(byte[] value, long amount) {
- long amo = amount;
- int sign = 1;
- if (amount < 0) {
- amo = -amount;
- sign = -1;
- }
- for (int i = 0; i < value.length; i++) {
- int cur = ((int) amo % 256) * sign;
- amo = (amo >> 8);
- int val = value[value.length - i - 1] & 0x0ff;
- int total = val + cur;
- if (total > 255) {
- amo += sign;
- total %= 256;
- } else if (total < 0) {
- amo -= sign;
- }
- value[value.length - i - 1] = (byte) total;
- if (amo == 0)
- return value;
- }
- return value;
- }
-
- /* increment/deincrement for negative value */
- private static byte[] binaryIncrementNeg(byte[] value, long amount) {
- long amo = amount;
- int sign = 1;
- if (amount < 0) {
- amo = -amount;
- sign = -1;
- }
- for (int i = 0; i < value.length; i++) {
- int cur = ((int) amo % 256) * sign;
- amo = (amo >> 8);
- int val = ((~value[value.length - i - 1]) & 0x0ff) + 1;
- int total = cur - val;
- if (total >= 0) {
- amo += sign;
- } else if (total < -256) {
- amo -= sign;
- total %= 256;
- }
- value[value.length - i - 1] = (byte) total;
- if (amo == 0)
- return value;
- }
- return value;
- }
+ public int compare(byte[] left, byte[] right) {
+ return compareTo(left, right);
+ }
+
+ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+ return compareTo(b1, s1, l1, b2, s2, l2);
+ }
+ }
+
+ /**
+ * Pass this to TreeMaps where byte [] are keys.
+ */
+ public static Comparator<byte[]> BYTES_COMPARATOR = new ByteArrayComparator();
+
+ /**
+ * Use comparing byte arrays, byte-by-byte
+ */
+ public static RawComparator<byte[]> BYTES_RAWCOMPARATOR = new ByteArrayComparator();
+
+ /**
+ * Read byte-array written with a WritableableUtils.vint prefix.
+ *
+ * @param in
+ * Input to read from.
+ * @return byte array read off <code>in</code>
+ * @throws IOException
+ * e
+ */
+ public static byte[] readByteArray(final DataInput in) throws IOException {
+ int len = WritableUtils.readVInt(in);
+ if (len < 0) {
+ throw new NegativeArraySizeException(Integer.toString(len));
+ }
+ byte[] result = new byte[len];
+ in.readFully(result, 0, len);
+ return result;
+ }
+
+ /**
+ * Read byte-array written with a WritableableUtils.vint prefix. IOException
+ * is converted to a RuntimeException.
+ *
+ * @param in
+ * Input to read from.
+ * @return byte array read off <code>in</code>
+ */
+ public static byte[] readByteArrayThrowsRuntime(final DataInput in) {
+ try {
+ return readByteArray(in);
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ /**
+ * Write byte-array with a WritableableUtils.vint prefix.
+ *
+ * @param out
+ * output stream to be written to
+ * @param b
+ * array to write
+ * @throws IOException
+ * e
+ */
+ public static void writeByteArray(final DataOutput out, final byte[] b)
+ throws IOException {
+ if (b == null) {
+ WritableUtils.writeVInt(out, 0);
+ } else {
+ writeByteArray(out, b, 0, b.length);
+ }
+ }
+
+ /**
+ * Write byte-array to out with a vint length prefix.
+ *
+ * @param out
+ * output stream
+ * @param b
+ * array
+ * @param offset
+ * offset into array
+ * @param length
+ * length past offset
+ * @throws IOException
+ * e
+ */
+ public static void writeByteArray(final DataOutput out, final byte[] b,
+ final int offset, final int length) throws IOException {
+ WritableUtils.writeVInt(out, length);
+ out.write(b, offset, length);
+ }
+
+ /**
+ * Write byte-array from src to tgt with a vint length prefix.
+ *
+ * @param tgt
+ * target array
+ * @param tgtOffset
+ * offset into target array
+ * @param src
+ * source array
+ * @param srcOffset
+ * source offset
+ * @param srcLength
+ * source length
+ * @return New offset in src array.
+ */
+ public static int writeByteArray(final byte[] tgt, final int tgtOffset,
+ final byte[] src, final int srcOffset, final int srcLength) {
+ byte[] vint = vintToBytes(srcLength);
+ System.arraycopy(vint, 0, tgt, tgtOffset, vint.length);
+ int offset = tgtOffset + vint.length;
+ System.arraycopy(src, srcOffset, tgt, offset, srcLength);
+ return offset + srcLength;
+ }
+
+ /**
+ * Put bytes at the specified byte array position.
+ *
+ * @param tgtBytes
+ * the byte array
+ * @param tgtOffset
+ * position in the array
+ * @param srcBytes
+ * array to write out
+ * @param srcOffset
+ * source offset
+ * @param srcLength
+ * source length
+ * @return incremented offset
+ */
+ public static int putBytes(byte[] tgtBytes, int tgtOffset, byte[] srcBytes,
+ int srcOffset, int srcLength) {
+ System.arraycopy(srcBytes, srcOffset, tgtBytes, tgtOffset, srcLength);
+ return tgtOffset + srcLength;
+ }
+
+ /**
+ * Write a single byte out to the specified byte array position.
+ *
+ * @param bytes
+ * the byte array
+ * @param offset
+ * position in the array
+ * @param b
+ * byte to write out
+ * @return incremented offset
+ */
+ public static int putByte(byte[] bytes, int offset, byte b) {
+ bytes[offset] = b;
+ return offset + 1;
+ }
+
+ /**
+ * Returns a new byte array, copied from the passed ByteBuffer.
+ *
+ * @param bb
+ * A ByteBuffer
+ * @return the byte array
+ */
+ public static byte[] toBytes(ByteBuffer bb) {
+ int length = bb.remaining();
+ byte[] result = new byte[length];
+ System.arraycopy(bb.array(), bb.arrayOffset() + bb.position(), result, 0,
+ length);
+ return result;
+ }
+
+ /**
+ * This method will convert utf8 encoded bytes into a string. If an
+ * UnsupportedEncodingException occurs, this method will eat it and return
+ * null instead.
+ *
+ * @param bb
+ * Presumed UTF-8 encoded ByteBuffer.
+ * @return String made from <code>b</code> or null
+ */
+ public static String toString(ByteBuffer bb) {
+ return bb == null ? null : toString(bb.array(),
+ bb.arrayOffset() + bb.position(), bb.remaining());
+ }
+
+ /**
+ * @param b
+ * Presumed UTF-8 encoded byte array.
+ * @return String made from <code>b</code>
+ */
+ public static String toString(final byte[] b) {
+ if (b == null) {
+ return null;
+ }
+ return toString(b, 0, b.length);
+ }
+
+ /**
+ * Joins two byte arrays together using a separator.
+ *
+ * @param b1
+ * The first byte array.
+ * @param sep
+ * The separator to use.
+ * @param b2
+ * The second byte array.
+ */
+ public static String toString(final byte[] b1, String sep, final byte[] b2) {
+ return toString(b1, 0, b1.length) + sep + toString(b2, 0, b2.length);
+ }
+
+ /**
+ * This method will convert utf8 encoded bytes into a string. If an
+ * UnsupportedEncodingException occurs, this method will eat it and return
+ * null instead.
+ *
+ * @param b
+ * Presumed UTF-8 encoded byte array.
+ * @param off
+ * offset into array
+ * @param len
+ * length of utf-8 sequence
+ * @return String made from <code>b</code> or null
+ */
+ public static String toString(final byte[] b, int off, int len) {
+ if (b == null) {
+ return null;
+ }
+ if (len == 0) {
+ return "";
+ }
+ try {
+ return new String(b, off, len, UTF8_ENCODING);
+ } catch (UnsupportedEncodingException e) {
+ LOG.error("UTF-8 not supported?", e);
+ return null;
+ }
+ }
+
+ /**
+ * Write a printable representation of a ByteBuffer. Non-printable characters
+ * are hex escaped in the format \\x%02X, eg: \x00 \x05 etc
+ *
+ * @param bb
+ * ByteBuffer to write out
+ * @return string output
+ */
+ public static String toStringBinary(ByteBuffer bb) {
+ return bb == null ? null : toStringBinary(bb.array(),
+ bb.arrayOffset() + bb.position(), bb.remaining());
+ }
+
+ /**
+ * Write a printable representation of a byte array.
+ *
+ * @param b
+ * byte array
+ * @return string
+ * @see #toStringBinary(byte[], int, int)
+ */
+ public static String toStringBinary(final byte[] b) {
+ return toStringBinary(b, 0, b.length);
+ }
+
+ /**
+ * Write a printable representation of a byte array. Non-printable characters
+ * are hex escaped in the format \\x%02X, eg: \x00 \x05 etc
+ *
+ * @param b
+ * array to write out
+ * @param off
+ * offset to start at
+ * @param len
+ * length to write
+ * @return string output
+ */
+ public static String toStringBinary(final byte[] b, int off, int len) {
+ StringBuilder result = new StringBuilder();
+ try {
+ String first = new String(b, off, len, "ISO-8859-1");
+ for (int i = 0; i < first.length(); ++i) {
+ int ch = first.charAt(i) & 0xFF;
+ if ((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z')
+ || (ch >= 'a' && ch <= 'z')
+ || " `~!@#$%^&*()-_=+[]{}\\|;:'\",.<>/?".indexOf(ch) >= 0) {
+ result.append(first.charAt(i));
+ } else {
+ result.append(String.format("\\x%02X", ch));
+ }
+ }
+ } catch (UnsupportedEncodingException e) {
+ LOG.error("ISO-8859-1 not supported?", e);
+ }
+ return result.toString();
+ }
+
+ private static boolean isHexDigit(char c) {
+ return (c >= 'A' && c <= 'F') || (c >= '0' && c <= '9');
+ }
+
+ /**
+ * Takes a ASCII digit in the range A-F0-9 and returns the corresponding
+ * integer/ordinal value.
+ *
+ * @param ch
+ * The hex digit.
+ * @return The converted hex value as a byte.
+ */
+ public static byte toBinaryFromHex(byte ch) {
+ if (ch >= 'A' && ch <= 'F')
+ return (byte) ((byte) 10 + (byte) (ch - 'A'));
+ // else
+ return (byte) (ch - '0');
+ }
+
+ public static byte[] toBytesBinary(String in) {
+ // this may be bigger than we need, but lets be safe.
+ byte[] b = new byte[in.length()];
+ int size = 0;
+ for (int i = 0; i < in.length(); ++i) {
+ char ch = in.charAt(i);
+ if (ch == '\\') {
+ // begin hex escape:
+ char next = in.charAt(i + 1);
+ if (next != 'x') {
+ // invalid escape sequence, ignore this one.
+ b[size++] = (byte) ch;
+ continue;
+ }
+ // ok, take next 2 hex digits.
+ char hd1 = in.charAt(i + 2);
+ char hd2 = in.charAt(i + 3);
+
+ // they need to be A-F0-9:
+ if (!isHexDigit(hd1) || !isHexDigit(hd2)) {
+ // bogus escape code, ignore:
+ continue;
+ }
+ // turn hex ASCII digit -> number
+ byte d = (byte) ((toBinaryFromHex((byte) hd1) << 4) + toBinaryFromHex((byte) hd2));
+
+ b[size++] = d;
+ i += 3; // skip 3
+ } else {
+ b[size++] = (byte) ch;
+ }
+ }
+ // resize:
+ byte[] b2 = new byte[size];
+ System.arraycopy(b, 0, b2, 0, size);
+ return b2;
+ }
+
+ /**
+ * Converts a string to a UTF-8 byte array.
+ *
+ * @param s
+ * string
+ * @return the byte array
+ */
+ public static byte[] toBytes(String s) {
+ try {
+ return s.getBytes(UTF8_ENCODING);
+ } catch (UnsupportedEncodingException e) {
+ LOG.error("UTF-8 not supported?", e);
+ return null;
+ }
+ }
+
+ /**
+ * Convert a boolean to a byte array. True becomes -1 and false becomes 0.
+ *
+ * @param b
+ * value
+ * @return <code>b</code> encoded in a byte array.
+ */
+ public static byte[] toBytes(final boolean b) {
+ return new byte[] { b ? (byte) -1 : (byte) 0 };
+ }
+
+ /**
+ * Reverses {@link #toBytes(boolean)}
+ *
+ * @param b
+ * array
+ * @return True or false.
+ */
+ public static boolean toBoolean(final byte[] b) {
+ if (b.length != 1) {
+ throw new IllegalArgumentException("Array has wrong size: " + b.length);
+ }
+ return b[0] != (byte) 0;
+ }
+
+ /**
+ * Convert a long value to a byte array using big-endian.
+ *
+ * @param val
+ * value to convert
+ * @return the byte array
+ */
+ public static byte[] toBytes(long val) {
+ byte[] b = new byte[8];
+ for (int i = 7; i > 0; i--) {
+ b[i] = (byte) val;
+ val >>>= 8;
+ }
+ b[0] = (byte) val;
+ return b;
+ }
+
+ /**
+ * Converts a byte array to a long value. Reverses {@link #toBytes(long)}
+ *
+ * @param bytes
+ * array
+ * @return the long value
+ */
+ public static long toLong(byte[] bytes) {
+ return toLong(bytes, 0, SIZEOF_LONG);
+ }
+
+ /**
+ * Converts a byte array to a long value. Assumes there will be
+ * {@link #SIZEOF_LONG} bytes available.
+ *
+ * @param bytes
+ * bytes
+ * @param offset
+ * offset
+ * @return the long value
+ */
+ public static long toLong(byte[] bytes, int offset) {
+ return toLong(bytes, offset, SIZEOF_LONG);
+ }
+
+ /**
+ * Converts a byte array to a long value.
+ *
+ * @param bytes
+ * array of bytes
+ * @param offset
+ * offset into array
+ * @param length
+ * length of data (must be {@link #SIZEOF_LONG})
+ * @return the long value
+ * @throws IllegalArgumentException
+ * if length is not {@link #SIZEOF_LONG} or if there's not enough
+ * room in the array at the offset indicated.
+ */
+ public static long toLong(byte[] bytes, int offset, final int length) {
+ if (length != SIZEOF_LONG || offset + length > bytes.length) {
+ throw explainWrongLengthOrOffset(bytes, offset, length, SIZEOF_LONG);
+ }
+ long l = 0;
+ for (int i = offset; i < offset + length; i++) {
+ l <<= 8;
+ l ^= bytes[i] & 0xFF;
+ }
+ return l;
+ }
+
+ private static IllegalArgumentException explainWrongLengthOrOffset(
+ final byte[] bytes, final int offset, final int length,
+ final int expectedLength) {
+ String reason;
+ if (length != expectedLength) {
+ reason = "Wrong length: " + length + ", expected " + expectedLength;
+ } else {
+ reason = "offset (" + offset + ") + length (" + length + ") exceed the"
+ + " capacity of the array: " + bytes.length;
+ }
+ return new IllegalArgumentException(reason);
+ }
+
+ /**
+ * Put a long value out to the specified byte array position.
+ *
+ * @param bytes
+ * the byte array
+ * @param offset
+ * position in the array
+ * @param val
+ * long to write out
+ * @return incremented offset
+ * @throws IllegalArgumentException
+ * if the byte array given doesn't have enough room at the offset
+ * specified.
+ */
+ public static int putLong(byte[] bytes, int offset, long val) {
+ if (bytes.length - offset < SIZEOF_LONG) {
+ throw new IllegalArgumentException("Not enough room to put a long at"
+ + " offset " + offset + " in a " + bytes.length + " byte array");
+ }
+ for (int i = offset + 7; i > offset; i--) {
+ bytes[i] = (byte) val;
+ val >>>= 8;
+ }
+ bytes[offset] = (byte) val;
+ return offset + SIZEOF_LONG;
+ }
+
+ /**
+ * Presumes float encoded as IEEE 754 floating-point "single format"
+ *
+ * @param bytes
+ * byte array
+ * @return Float made from passed byte array.
+ */
+ public static float toFloat(byte[] bytes) {
+ return toFloat(bytes, 0);
+ }
+
+ /**
+ * Presumes float encoded as IEEE 754 floating-point "single format"
+ *
+ * @param bytes
+ * array to convert
+ * @param offset
+ * offset into array
+ * @return Float made from passed byte array.
+ */
+ public static float toFloat(byte[] bytes, int offset) {
+ return Float.intBitsToFloat(toInt(bytes, offset, SIZEOF_INT));
+ }
+
+ /**
+ * @param bytes
+ * byte array
+ * @param offset
+ * offset to write to
+ * @param f
+ * float value
+ * @return New offset in <code>bytes</code>
+ */
+ public static int putFloat(byte[] bytes, int offset, float f) {
+ return putInt(bytes, offset, Float.floatToRawIntBits(f));
+ }
+
+ /**
+ * @param f
+ * float value
+ * @return the float represented as byte []
+ */
+ public static byte[] toBytes(final float f) {
+ // Encode it as int
+ return Bytes.toBytes(Float.floatToRawIntBits(f));
+ }
+
+ /**
+ * @param bytes
+ * byte array
+ * @return Return double made from passed bytes.
+ */
+ public static double toDouble(final byte[] bytes) {
+ return toDouble(bytes, 0);
+ }
+
+ /**
+ * @param bytes
+ * byte array
+ * @param offset
+ * offset where double is
+ * @return Return double made from passed bytes.
+ */
+ public static double toDouble(final byte[] bytes, final int offset) {
+ return Double.longBitsToDouble(toLong(bytes, offset, SIZEOF_LONG));
+ }
+
+ /**
+ * @param bytes
+ * byte array
+ * @param offset
+ * offset to write to
+ * @param d
+ * value
+ * @return New offset into array <code>bytes</code>
+ */
+ public static int putDouble(byte[] bytes, int offset, double d) {
+ return putLong(bytes, offset, Double.doubleToLongBits(d));
+ }
+
+ /**
+ * Serialize a double as the IEEE 754 double format output. The resultant
+ * array will be 8 bytes long.
+ *
+ * @param d
+ * value
+ * @return the double represented as byte []
+ */
+ public static byte[] toBytes(final double d) {
+ // Encode it as a long
+ return Bytes.toBytes(Double.doubleToRawLongBits(d));
+ }
+
+ /**
+ * Convert an int value to a byte array
+ *
+ * @param val
+ * value
+ * @return the byte array
+ */
+ public static byte[] toBytes(int val) {
+ byte[] b = new byte[4];
+ for (int i = 3; i > 0; i--) {
+ b[i] = (byte) val;
+ val >>>= 8;
+ }
+ b[0] = (byte) val;
+ return b;
+ }
+
+ /**
+ * Converts a byte array to an int value
+ *
+ * @param bytes
+ * byte array
+ * @return the int value
+ */
+ public static int toInt(byte[] bytes) {
+ return toInt(bytes, 0, SIZEOF_INT);
+ }
+
+ /**
+ * Converts a byte array to an int value
+ *
+ * @param bytes
+ * byte array
+ * @param offset
+ * offset into array
+ * @return the int value
+ */
+ public static int toInt(byte[] bytes, int offset) {
+ return toInt(bytes, offset, SIZEOF_INT);
+ }
+
+ /**
+ * Converts a byte array to an int value
+ *
+ * @param bytes
+ * byte array
+ * @param offset
+ * offset into array
+ * @param length
+ * length of int (has to be {@link #SIZEOF_INT})
+ * @return the int value
+ * @throws IllegalArgumentException
+ * if length is not {@link #SIZEOF_INT} or if there's not enough
+ * room in the array at the offset indicated.
+ */
+ public static int toInt(byte[] bytes, int offset, final int length) {
+ if (length != SIZEOF_INT || offset + length > bytes.length) {
+ throw explainWrongLengthOrOffset(bytes, offset, length, SIZEOF_INT);
+ }
+ int n = 0;
+ for (int i = offset; i < (offset + length); i++) {
+ n <<= 8;
+ n ^= bytes[i] & 0xFF;
+ }
+ return n;
+ }
+
+ /**
+ * Put an int value out to the specified byte array position.
+ *
+ * @param bytes
+ * the byte array
+ * @param offset
+ * position in the array
+ * @param val
+ * int to write out
+ * @return incremented offset
+ * @throws IllegalArgumentException
+ * if the byte array given doesn't have enough room at the offset
+ * specified.
+ */
+ public static int putInt(byte[] bytes, int offset, int val) {
+ if (bytes.length - offset < SIZEOF_INT) {
+ throw new IllegalArgumentException("Not enough room to put an int at"
+ + " offset " + offset + " in a " + bytes.length + " byte array");
+ }
+ for (int i = offset + 3; i > offset; i--) {
+ bytes[i] = (byte) val;
+ val >>>= 8;
+ }
+ bytes[offset] = (byte) val;
+ return offset + SIZEOF_INT;
+ }
+
+ /**
+ * Convert a short value to a byte array of {@link #SIZEOF_SHORT} bytes long.
+ *
+ * @param val
+ * value
+ * @return the byte array
+ */
+ public static byte[] toBytes(short val) {
+ byte[] b = new byte[SIZEOF_SHORT];
+ b[1] = (byte) val;
+ val >>= 8;
+ b[0] = (byte) val;
+ return b;
+ }
+
+ /**
+ * Converts a byte array to a short value
+ *
+ * @param bytes
+ * byte array
+ * @return the short value
+ */
+ public static short toShort(byte[] bytes) {
+ return toShort(bytes, 0, SIZEOF_SHORT);
+ }
+
+ /**
+ * Converts a byte array to a short value
+ *
+ * @param bytes
+ * byte array
+ * @param offset
+ * offset into array
+ * @return the short value
+ */
+ public static short toShort(byte[] bytes, int offset) {
+ return toShort(bytes, offset, SIZEOF_SHORT);
+ }
+
+ /**
+ * Converts a byte array to a short value
+ *
+ * @param bytes
+ * byte array
+ * @param offset
+ * offset into array
+ * @param length
+ * length, has to be {@link #SIZEOF_SHORT}
+ * @return the short value
+ * @throws IllegalArgumentException
+ * if length is not {@link #SIZEOF_SHORT} or if there's not enough
+ * room in the array at the offset indicated.
+ */
+ public static short toShort(byte[] bytes, int offset, final int length) {
+ if (length != SIZEOF_SHORT || offset + length > bytes.length) {
+ throw explainWrongLengthOrOffset(bytes, offset, length, SIZEOF_SHORT);
+ }
+ short n = 0;
+ n ^= bytes[offset] & 0xFF;
+ n <<= 8;
+ n ^= bytes[offset + 1] & 0xFF;
+ return n;
+ }
+
+ /**
+ * Put a short value out to the specified byte array position.
+ *
+ * @param bytes
+ * the byte array
+ * @param offset
+ * position in the array
+ * @param val
+ * short to write out
+ * @return incremented offset
+ * @throws IllegalArgumentException
+ * if the byte array given doesn't have enough room at the offset
+ * specified.
+ */
+ public static int putShort(byte[] bytes, int offset, short val) {
+ if (bytes.length - offset < SIZEOF_SHORT) {
+ throw new IllegalArgumentException("Not enough room to put a short at"
+ + " offset " + offset + " in a " + bytes.length + " byte array");
+ }
+ bytes[offset + 1] = (byte) val;
+ val >>= 8;
+ bytes[offset] = (byte) val;
+ return offset + SIZEOF_SHORT;
+ }
+
+ /**
+ * @param vint
+ * Integer to make a vint of.
+ * @return Vint as bytes array.
+ */
+ public static byte[] vintToBytes(final long vint) {
+ long i = vint;
+ int size = WritableUtils.getVIntSize(i);
+ byte[] result = new byte[size];
+ int offset = 0;
+ if (i >= -112 && i <= 127) {
+ result[offset] = (byte) i;
+ return result;
+ }
+
+ int len = -112;
+ if (i < 0) {
+ i ^= -1L; // take one's complement'
+ len = -120;
+ }
+
+ long tmp = i;
+ while (tmp != 0) {
+ tmp = tmp >> 8;
+ len--;
+ }
+
+ result[offset++] = (byte) len;
+
+ len = (len < -120) ? -(len + 120) : -(len + 112);
+
+ for (int idx = len; idx != 0; idx--) {
+ int shiftbits = (idx - 1) * 8;
+ long mask = 0xFFL << shiftbits;
+ result[offset++] = (byte) ((i & mask) >> shiftbits);
+ }
+ return result;
+ }
+
+ /**
+ * @param buffer
+ * buffer to convert
+ * @return vint bytes as an integer.
+ */
+ public static long bytesToVint(final byte[] buffer) {
+ int offset = 0;
+ byte firstByte = buffer[offset++];
+ int len = WritableUtils.decodeVIntSize(firstByte);
+ if (len == 1) {
+ return firstByte;
+ }
+ long i = 0;
+ for (int idx = 0; idx < len - 1; idx++) {
+ byte b = buffer[offset++];
+ i = i << 8;
+ i = i | (b & 0xFF);
+ }
+ return (WritableUtils.isNegativeVInt(firstByte) ? ~i : i);
+ }
+
+ /**
+ * Reads a zero-compressed encoded long from input stream and returns it.
+ *
+ * @param buffer
+ * Binary array
+ * @param offset
+ * Offset into array at which vint begins.
+ * @throws java.io.IOException
+ * e
+ * @return deserialized long from stream.
+ */
+ public static long readVLong(final byte[] buffer, final int offset)
+ throws IOException {
+ byte firstByte = buffer[offset];
+ int len = WritableUtils.decodeVIntSize(firstByte);
+ if (len == 1) {
+ return firstByte;
+ }
+ long i = 0;
+ for (int idx = 0; idx < len - 1; idx++) {
+ byte b = buffer[offset + 1 + idx];
+ i = i << 8;
+ i = i | (b & 0xFF);
+ }
+ return (WritableUtils.isNegativeVInt(firstByte) ? ~i : i);
+ }
+
+ /**
+ * @param left
+ * left operand
+ * @param right
+ * right operand
+ * @return 0 if equal, < 0 if left is less than right, etc.
+ */
+ public static int compareTo(final byte[] left, final byte[] right) {
+ return compareTo(left, 0, left.length, right, 0, right.length);
+ }
+
+ /**
+ * Lexographically compare two arrays.
+ *
+ * @param buffer1
+ * left operand
+ * @param buffer2
+ * right operand
+ * @param offset1
+ * Where to start comparing in the left buffer
+ * @param offset2
+ * Where to start comparing in the right buffer
+ * @param length1
+ * How much to compare from the left buffer
+ * @param length2
+ * How much to compare from the right buffer
+ * @return 0 if equal, < 0 if left is less than right, etc.
+ */
+ public static int compareTo(byte[] buffer1, int offset1, int length1,
+ byte[] buffer2, int offset2, int length2) {
+ // Bring WritableComparator code local
+ int end1 = offset1 + length1;
+ int end2 = offset2 + length2;
+ for (int i = offset1, j = offset2; i < end1 && j < end2; i++, j++) {
+ int a = (buffer1[i] & 0xff);
+ int b = (buffer2[j] & 0xff);
+ if (a != b) {
+ return a - b;
+ }
+ }
+ return length1 - length2;
+ }
+
+ /**
+ * @param left
+ * left operand
+ * @param right
+ * right operand
+ * @return True if equal
+ */
+ public static boolean equals(final byte[] left, final byte[] right) {
+ // Could use Arrays.equals?
+ // noinspection SimplifiableConditionalExpression
+ if (left == null && right == null) {
+ return true;
+ }
+ return (left == null || right == null || (left.length != right.length) ? false
+ : compareTo(left, right) == 0);
+ }
+
+ /**
+ * Return true if the byte array on the right is a prefix of the byte array on
+ * the left.
+ */
+ public static boolean startsWith(byte[] bytes, byte[] prefix) {
+ return bytes != null && prefix != null && bytes.length >= prefix.length
+ && compareTo(bytes, 0, prefix.length, prefix, 0, prefix.length) == 0;
+ }
+
+ /**
+ * @param b
+ * bytes to hash
+ * @return Runs {@link WritableComparator#hashBytes(byte[], int)} on the
+ * passed in array. This method is what
+ * {@link org.apache.hadoop.io.Text} and
+ * {@link ImmutableBytesWritable} use calculating hash code.
+ */
+ public static int hashCode(final byte[] b) {
+ return hashCode(b, b.length);
+ }
+
+ /**
+ * @param b
+ * value
+ * @param length
+ * length of the value
+ * @return Runs {@link WritableComparator#hashBytes(byte[], int)} on the
+ * passed in array. This method is what
+ * {@link org.apache.hadoop.io.Text} and
+ * {@link ImmutableBytesWritable} use calculating hash code.
+ */
+ public static int hashCode(final byte[] b, final int length) {
+ return WritableComparator.hashBytes(b, length);
+ }
+
+ /**
+ * @param b
+ * bytes to hash
+ * @return A hash of <code>b</code> as an Integer that can be used as key in
+ * Maps.
+ */
+ public static Integer mapKey(final byte[] b) {
+ return hashCode(b);
+ }
+
+ /**
+ * @param b
+ * bytes to hash
+ * @param length
+ * length to hash
+ * @return A hash of <code>b</code> as an Integer that can be used as key in
+ * Maps.
+ */
+ public static Integer mapKey(final byte[] b, final int length) {
+ return hashCode(b, length);
+ }
+
+ /**
+ * @param a
+ * lower half
+ * @param b
+ * upper half
+ * @return New array that has a in lower half and b in upper half.
+ */
+ public static byte[] add(final byte[] a, final byte[] b) {
+ return add(a, b, EMPTY_BYTE_ARRAY);
+ }
+
+ /**
+ * @param a
+ * first third
+ * @param b
+ * second third
+ * @param c
+ * third third
+ * @return New array made from a, b and c
+ */
+ public static byte[] add(final byte[] a, final byte[] b, final byte[] c) {
+ byte[] result = new byte[a.length + b.length + c.length];
+ System.arraycopy(a, 0, result, 0, a.length);
+ System.arraycopy(b, 0, result, a.length, b.length);
+ System.arraycopy(c, 0, result, a.length + b.length, c.length);
+ return result;
+ }
+
+ /**
+ * @param a
+ * array
+ * @param length
+ * amount of bytes to grab
+ * @return First <code>length</code> bytes from <code>a</code>
+ */
+ public static byte[] head(final byte[] a, final int length) {
+ if (a.length < length) {
+ return null;
+ }
+ byte[] result = new byte[length];
+ System.arraycopy(a, 0, result, 0, length);
+ return result;
+ }
+
+ /**
+ * @param a
+ * array
+ * @param length
+ * amount of bytes to snarf
+ * @return Last <code>length</code> bytes from <code>a</code>
+ */
+ public static byte[] tail(final byte[] a, final int length) {
+ if (a.length < length) {
+ return null;
+ }
+ byte[] result = new byte[length];
+ System.arraycopy(a, a.length - length, result, 0, length);
+ return result;
+ }
+
+ /**
+ * @param a
+ * array
+ * @param length
+ * new array size
+ * @return Value in <code>a</code> plus <code>length</code> prepended 0 bytes
+ */
+ public static byte[] padHead(final byte[] a, final int length) {
+ byte[] padding = new byte[length];
+ for (int i = 0; i < length; i++) {
+ padding[i] = 0;
+ }
+ return add(padding, a);
+ }
+
+ /**
+ * @param a
+ * array
+ * @param length
+ * new array size
+ * @return Value in <code>a</code> plus <code>length</code> appended 0 bytes
+ */
+ public static byte[] padTail(final byte[] a, final int length) {
+ byte[] padding = new byte[length];
+ for (int i = 0; i < length; i++) {
+ padding[i] = 0;
+ }
+ return add(a, padding);
+ }
+
+ /**
+ * Split passed range. Expensive operation relatively. Uses BigInteger math.
+ * Useful splitting ranges for MapReduce jobs.
+ *
+ * @param a
+ * Beginning of range
+ * @param b
+ * End of range
+ * @param num
+ * Number of times to split range. Pass 1 if you want to split the
+ * range in two; i.e. one split.
+ * @return Array of dividing values
+ */
+ public static byte[][] split(final byte[] a, final byte[] b, final int num) {
+ byte[][] ret = new byte[num + 2][];
+ int i = 0;
+ Iterable<byte[]> iter = iterateOnSplits(a, b, num);
+ if (iter == null)
+ return null;
+ for (byte[] elem : iter) {
+ ret[i++] = elem;
+ }
+ return ret;
+ }
+
+ /**
+ * Iterate over keys within the passed inclusive range.
+ */
+ public static Iterable<byte[]> iterateOnSplits(final byte[] a,
+ final byte[] b, final int num) {
+ byte[] aPadded;
+ byte[] bPadded;
+ if (a.length < b.length) {
+ aPadded = padTail(a, b.length - a.length);
+ bPadded = b;
+ } else if (b.length < a.length) {
+ aPadded = a;
+ bPadded = padTail(b, a.length - b.length);
+ } else {
+ aPadded = a;
+ bPadded = b;
+ }
+ if (compareTo(aPadded, bPadded) >= 0) {
+ throw new IllegalArgumentException("b <= a");
+ }
+ if (num <= 0) {
+ throw new IllegalArgumentException("num cannot be < 0");
+ }
+ byte[] prependHeader = { 1, 0 };
+ final BigInteger startBI = new BigInteger(add(prependHeader, aPadded));
+ final BigInteger stopBI = new BigInteger(add(prependHeader, bPadded));
+ final BigInteger diffBI = stopBI.subtract(startBI);
+ final BigInteger splitsBI = BigInteger.valueOf(num + 1);
+ if (diffBI.compareTo(splitsBI) < 0) {
+ return null;
+ }
+ final BigInteger intervalBI;
+ try {
+ intervalBI = diffBI.divide(splitsBI);
+ } catch (Exception e) {
+ LOG.error("Exception caught during division", e);
+ return null;
+ }
+
+ final Iterator<byte[]> iterator = new Iterator<byte[]>() {
+ private int i = -1;
+
+ @Override
+ public boolean hasNext() {
+ return i < num + 1;
+ }
+
+ @Override
+ public byte[] next() {
+ i++;
+ if (i == 0)
+ return a;
+ if (i == num + 1)
+ return b;
+
+ BigInteger curBI = startBI.add(intervalBI.multiply(BigInteger
+ .valueOf(i)));
+ byte[] padded = curBI.toByteArray();
+ if (padded[1] == 0)
+ padded = tail(padded, padded.length - 2);
+ else
+ padded = tail(padded, padded.length - 1);
+ return padded;
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+
+ };
+
+ return new Iterable<byte[]>() {
+ @Override
+ public Iterator<byte[]> iterator() {
+ return iterator;
+ }
+ };
+ }
+
+ /**
+ * @param t
+ * operands
+ * @return Array of byte arrays made from passed array of Text
+ */
+ public static byte[][] toByteArrays(final String[] t) {
+ byte[][] result = new byte[t.length][];
+ for (int i = 0; i < t.length; i++) {
+ result[i] = Bytes.toBytes(t[i]);
+ }
+ return result;
+ }
+
+ /**
+ * @param column
+ * operand
+ * @return A byte array of a byte array where first and only entry is
+ * <code>column</code>
+ */
+ public static byte[][] toByteArrays(final String column) {
+ return toByteArrays(toBytes(column));
+ }
+
+ /**
+ * @param column
+ * operand
+ * @return A byte array of a byte array where first and only entry is
+ * <code>column</code>
+ */
+ public static byte[][] toByteArrays(final byte[] column) {
+ byte[][] result = new byte[1][];
+ result[0] = column;
+ return result;
+ }
+
+ /**
+ * Binary search for keys in indexes.
+ *
+ * @param arr
+ * array of byte arrays to search for
+ * @param key
+ * the key you want to find
+ * @param offset
+ * the offset in the key you want to find
+ * @param length
+ * the length of the key
+ * @param comparator
+ * a comparator to compare.
+ * @return index of key
+ */
+ public static int binarySearch(byte[][] arr, byte[] key, int offset,
+ int length, RawComparator<byte[]> comparator) {
+ int low = 0;
+ int high = arr.length - 1;
+
+ while (low <= high) {
+ int mid = (low + high) >>> 1;
+ // we have to compare in this order, because the comparator order
+ // has special logic when the 'left side' is a special key.
+ int cmp = comparator.compare(key, offset, length, arr[mid], 0,
+ arr[mid].length);
+ // key lives above the midpoint
+ if (cmp > 0)
+ low = mid + 1;
+ // key lives below the midpoint
+ else if (cmp < 0)
+ high = mid - 1;
+ // BAM. how often does this really happen?
+ else
+ return mid;
+ }
+ return -(low + 1);
+ }
+
+ /**
+ * Bytewise binary increment/deincrement of long contained in byte array on
+ * given amount.
+ *
+ * @param value
+ * - array of bytes containing long (length <= SIZEOF_LONG)
+ * @param amount
+ * value will be incremented on (deincremented if negative)
+ * @return array of bytes containing incremented long (length == SIZEOF_LONG)
+ * @throws IOException
+ * - if value.length > SIZEOF_LONG
+ */
+ public static byte[] incrementBytes(byte[] value, long amount)
+ throws IOException {
+ byte[] val = value;
+ if (val.length < SIZEOF_LONG) {
+ // Hopefully this doesn't happen too often.
+ byte[] newvalue;
+ if (val[0] < 0) {
+ newvalue = new byte[] { -1, -1, -1, -1, -1, -1, -1, -1 };
+ } else {
+ newvalue = new byte[SIZEOF_LONG];
+ }
+ System.arraycopy(val, 0, newvalue, newvalue.length - val.length,
+ val.length);
+ val = newvalue;
+ } else if (val.length > SIZEOF_LONG) {
+ throw new IllegalArgumentException("Increment Bytes - value too big: "
+ + val.length);
+ }
+ if (amount == 0)
+ return val;
+ if (val[0] < 0) {
+ return binaryIncrementNeg(val, amount);
+ }
+ return binaryIncrementPos(val, amount);
+ }
+
+ /* increment/deincrement for positive value */
+ private static byte[] binaryIncrementPos(byte[] value, long amount) {
+ long amo = amount;
+ int sign = 1;
+ if (amount < 0) {
+ amo = -amount;
+ sign = -1;
+ }
+ for (int i = 0; i < value.length; i++) {
+ int cur = ((int) amo % 256) * sign;
+ amo = (amo >> 8);
+ int val = value[value.length - i - 1] & 0x0ff;
+ int total = val + cur;
+ if (total > 255) {
+ amo += sign;
+ total %= 256;
+ } else if (total < 0) {
+ amo -= sign;
+ }
+ value[value.length - i - 1] = (byte) total;
+ if (amo == 0)
+ return value;
+ }
+ return value;
+ }
+
+ /* increment/deincrement for negative value */
+ private static byte[] binaryIncrementNeg(byte[] value, long amount) {
+ long amo = amount;
+ int sign = 1;
+ if (amount < 0) {
+ amo = -amount;
+ sign = -1;
+ }
+ for (int i = 0; i < value.length; i++) {
+ int cur = ((int) amo % 256) * sign;
+ amo = (amo >> 8);
+ int val = ((~value[value.length - i - 1]) & 0x0ff) + 1;
+ int total = cur - val;
+ if (total >= 0) {
+ amo += sign;
+ } else if (total < -256) {
+ amo -= sign;
+ total %= 256;
+ }
+ value[value.length - i - 1] = (byte) total;
+ if (amo == 0)
+ return value;
+ }
+ return value;
+ }
}
Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/CommandRunner.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/CommandRunner.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/util/CommandRunner.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/util/CommandRunner.java Fri Jan 9 06:34:33 2015
@@ -82,11 +82,11 @@ public class CommandRunner {
}
public void evaluate() throws IOException {
- this.exec();
+ this.exec();
}
/**
- *
+ *
* @return process exit value (return code) or -1 if timed out.
* @throws IOException
*/
@@ -94,13 +94,11 @@ public class CommandRunner {
Process proc = Runtime.getRuntime().exec(_command);
_barrier = new CyclicBarrier(3 + ((_stdin != null) ? 1 : 0));
- PullerThread so =
- new PullerThread("STDOUT", proc.getInputStream(), _stdout);
+ PullerThread so = new PullerThread("STDOUT", proc.getInputStream(), _stdout);
so.setDaemon(true);
so.start();
- PullerThread se =
- new PullerThread("STDERR", proc.getErrorStream(), _stderr);
+ PullerThread se = new PullerThread("STDERR", proc.getErrorStream(), _stderr);
se.setDaemon(true);
se.start();
@@ -145,11 +143,11 @@ public class CommandRunner {
Thread.sleep(1000);
_xit = proc.exitValue();
} catch (InterruptedException ie) {
- if (Thread.interrupted()) {
- break; // stop waiting on an interrupt for this thread
- } else {
- continue;
- }
+ if (Thread.interrupted()) {
+ break; // stop waiting on an interrupt for this thread
+ } else {
+ continue;
+ }
} catch (IllegalThreadStateException iltse) {
continue;
}
@@ -181,11 +179,8 @@ public class CommandRunner {
private boolean _closeInput;
- protected PumperThread(
- String name,
- InputStream is,
- OutputStream os,
- boolean closeInput) {
+ protected PumperThread(String name, InputStream is, OutputStream os,
+ boolean closeInput) {
super(name);
_is = is;
_os = os;
@@ -218,12 +213,12 @@ public class CommandRunner {
}
}
try {
- _barrier.await();
- } catch (InterruptedException ie) {
- /* IGNORE */
- } catch (BrokenBarrierException bbe) {
- /* IGNORE */
- }
+ _barrier.await();
+ } catch (InterruptedException ie) {
+ /* IGNORE */
+ } catch (BrokenBarrierException bbe) {
+ /* IGNORE */
+ }
}
}
@@ -269,8 +264,9 @@ public class CommandRunner {
for (int i = 0; i < args.length; i++) {
if (args[i].equals("-timeout")) {
- timeout = Integer.parseInt(args[++i]);;
- } else if (i != args.length-2) {
+ timeout = Integer.parseInt(args[++i]);
+ ;
+ } else if (i != args.length - 2) {
System.err.println(usage);
System.exit(-1);
} else {
@@ -290,6 +286,6 @@ public class CommandRunner {
cr.evaluate();
- System.err.println("output value: "+cr.getExitValue());
+ System.err.println("output value: " + cr.getExitValue());
}
}
Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/DeflateUtils.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/DeflateUtils.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/util/DeflateUtils.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/util/DeflateUtils.java Fri Jan 9 06:34:33 2015
@@ -28,19 +28,18 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
- * A collection of utility methods for working on deflated data.
+ * A collection of utility methods for working on deflated data.
*/
public class DeflateUtils {
-
+
private static final Logger LOG = LoggerFactory.getLogger(DeflateUtils.class);
private static final int EXPECTED_COMPRESSION_RATIO = 5;
private static final int BUF_SIZE = 4096;
/**
- * Returns an inflated copy of the input array. If the deflated
- * input has been truncated or corrupted, a best-effort attempt is
- * made to inflate as much as possible. If no data can be extracted
- * <code>null</code> is returned.
+ * Returns an inflated copy of the input array. If the deflated input has been
+ * truncated or corrupted, a best-effort attempt is made to inflate as much as
+ * possible. If no data can be extracted <code>null</code> is returned.
*/
public static final byte[] inflateBestEffort(byte[] in) {
return inflateBestEffort(in, Integer.MAX_VALUE);
@@ -48,37 +47,36 @@ public class DeflateUtils {
/**
* Returns an inflated copy of the input array, truncated to
- * <code>sizeLimit</code> bytes, if necessary. If the deflated input
- * has been truncated or corrupted, a best-effort attempt is made to
- * inflate as much as possible. If no data can be extracted
- * <code>null</code> is returned.
+ * <code>sizeLimit</code> bytes, if necessary. If the deflated input has been
+ * truncated or corrupted, a best-effort attempt is made to inflate as much as
+ * possible. If no data can be extracted <code>null</code> is returned.
*/
public static final byte[] inflateBestEffort(byte[] in, int sizeLimit) {
- // decompress using InflaterInputStream
- ByteArrayOutputStream outStream =
- new ByteArrayOutputStream(EXPECTED_COMPRESSION_RATIO * in.length);
+ // decompress using InflaterInputStream
+ ByteArrayOutputStream outStream = new ByteArrayOutputStream(
+ EXPECTED_COMPRESSION_RATIO * in.length);
// "true" because HTTP does not provide zlib headers
Inflater inflater = new Inflater(true);
- InflaterInputStream inStream =
- new InflaterInputStream(new ByteArrayInputStream(in), inflater);
+ InflaterInputStream inStream = new InflaterInputStream(
+ new ByteArrayInputStream(in), inflater);
byte[] buf = new byte[BUF_SIZE];
int written = 0;
while (true) {
try {
- int size = inStream.read(buf);
- if (size <= 0)
- break;
- if ((written + size) > sizeLimit) {
- outStream.write(buf, 0, sizeLimit - written);
- break;
- }
- outStream.write(buf, 0, size);
- written+= size;
+ int size = inStream.read(buf);
+ if (size <= 0)
+ break;
+ if ((written + size) > sizeLimit) {
+ outStream.write(buf, 0, sizeLimit - written);
+ break;
+ }
+ outStream.write(buf, 0, size);
+ written += size;
} catch (Exception e) {
- LOG.info( "Caught Exception in inflateBestEffort", e );
- break;
+ LOG.info("Caught Exception in inflateBestEffort", e);
+ break;
}
}
try {
@@ -89,23 +87,24 @@ public class DeflateUtils {
return outStream.toByteArray();
}
-
/**
- * Returns an inflated copy of the input array.
- * @throws IOException if the input cannot be properly decompressed
+ * Returns an inflated copy of the input array.
+ *
+ * @throws IOException
+ * if the input cannot be properly decompressed
*/
public static final byte[] inflate(byte[] in) throws IOException {
- // decompress using InflaterInputStream
- ByteArrayOutputStream outStream =
- new ByteArrayOutputStream(EXPECTED_COMPRESSION_RATIO * in.length);
+ // decompress using InflaterInputStream
+ ByteArrayOutputStream outStream = new ByteArrayOutputStream(
+ EXPECTED_COMPRESSION_RATIO * in.length);
- InflaterInputStream inStream =
- new InflaterInputStream ( new ByteArrayInputStream(in) );
+ InflaterInputStream inStream = new InflaterInputStream(
+ new ByteArrayInputStream(in));
byte[] buf = new byte[BUF_SIZE];
while (true) {
int size = inStream.read(buf);
- if (size <= 0)
+ if (size <= 0)
break;
outStream.write(buf, 0, size);
}
@@ -118,9 +117,9 @@ public class DeflateUtils {
* Returns a deflated copy of the input array.
*/
public static final byte[] deflate(byte[] in) {
- // compress using DeflaterOutputStream
- ByteArrayOutputStream byteOut =
- new ByteArrayOutputStream(in.length / EXPECTED_COMPRESSION_RATIO);
+ // compress using DeflaterOutputStream
+ ByteArrayOutputStream byteOut = new ByteArrayOutputStream(in.length
+ / EXPECTED_COMPRESSION_RATIO);
DeflaterOutputStream outStream = new DeflaterOutputStream(byteOut);
Modified: nutch/branches/2.x/src/java/org/apache/nutch/util/DomUtil.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/util/DomUtil.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/util/DomUtil.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/util/DomUtil.java Fri Jan 9 06:34:33 2015
@@ -38,7 +38,6 @@ import org.xml.sax.SAXException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-
public class DomUtil {
private final static Logger LOG = LoggerFactory.getLogger(DomUtil.class);
@@ -61,16 +60,16 @@ public class DomUtil {
input.setEncoding("UTF-8");
parser.parse(input);
int i = 0;
- while (! (parser.getDocument().getChildNodes().item(i) instanceof Element)) {
- i++;
- }
- element = (Element)parser.getDocument().getChildNodes().item(i);
+ while (!(parser.getDocument().getChildNodes().item(i) instanceof Element)) {
+ i++;
+ }
+ element = (Element) parser.getDocument().getChildNodes().item(i);
} catch (FileNotFoundException e) {
- LOG.error("Failed to find file: ", e);
+ LOG.error("Failed to find file: ", e);
} catch (SAXException e) {
- LOG.error("Failed with the following SAX exception: ", e);
+ LOG.error("Failed with the following SAX exception: ", e);
} catch (IOException e) {
- LOG.error("Failed with the following IOException", e);
+ LOG.error("Failed with the following IOException", e);
}
return element;
}
@@ -93,13 +92,14 @@ public class DomUtil {
transformer.transform(source, result);
os.flush();
} catch (UnsupportedEncodingException e1) {
- LOG.error("Failed with the following UnsupportedEncodingException: ", e1);
+ LOG.error("Failed with the following UnsupportedEncodingException: ", e1);
} catch (IOException e1) {
- LOG.error("Failed to with the following IOException: ", e1);
+ LOG.error("Failed to with the following IOException: ", e1);
} catch (TransformerConfigurationException e2) {
- LOG.error("Failed with the following TransformerConfigurationException: ", e2);
+ LOG.error(
+ "Failed with the following TransformerConfigurationException: ", e2);
} catch (TransformerException ex) {
- LOG.error("Failed with the following TransformerException: ", ex);
+ LOG.error("Failed with the following TransformerException: ", ex);
}
}
}