You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2009/04/27 20:05:35 UTC
svn commit: r769076 - in /hadoop/hbase/trunk: CHANGES.txt
src/java/org/apache/hadoop/hbase/util/Bytes.java
src/test/org/apache/hadoop/hbase/util/TestBytes.java
Author: stack
Date: Mon Apr 27 18:05:34 2009
New Revision: 769076
URL: http://svn.apache.org/viewvc?rev=769076&view=rev
Log:
HBASE-1183 New MR splitting algorithm and other new features need a way to split a key range in N chunks
Modified:
hadoop/hbase/trunk/CHANGES.txt
hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/Bytes.java
hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/util/TestBytes.java
Modified: hadoop/hbase/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/CHANGES.txt?rev=769076&r1=769075&r2=769076&view=diff
==============================================================================
--- hadoop/hbase/trunk/CHANGES.txt (original)
+++ hadoop/hbase/trunk/CHANGES.txt Mon Apr 27 18:05:34 2009
@@ -167,6 +167,8 @@
(Evgeny Ryabitskiy via Stack)
HBASE-1260 Bytes utility class changes: remove usage of ByteBuffer and
provide additional ByteBuffer primitives (Jon Gray via Stack)
+ HBASE-1183 New MR splitting algorithm and other new features need a way to
+ split a key range in N chunks (Jon Gray via Stack)
Release 0.19.0 - 01/21/2009
INCOMPATIBLE CHANGES
Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/Bytes.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/Bytes.java?rev=769076&r1=769075&r2=769076&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/Bytes.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/Bytes.java Mon Apr 27 18:05:34 2009
@@ -25,6 +25,7 @@
import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
import java.util.Comparator;
+import java.math.BigInteger;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
@@ -749,8 +750,108 @@
return result;
}
+ /**
+ * @param a
+ * @param length
+ * @return First <code>length</code> bytes from <code>a</code>
+ */
+ public static byte [] head(final byte [] a, final int length) {
+ if(a.length < length) return null;
+ byte [] result = new byte[length];
+ System.arraycopy(a, 0, result, 0, length);
+ return result;
+ }
/**
+ * @param a
+ * @param length
+ * @return Last <code>length</code> bytes from <code>a</code>
+ */
+ public static byte [] tail(final byte [] a, final int length) {
+ if(a.length < length) return null;
+ byte [] result = new byte[length];
+ System.arraycopy(a, a.length - length, result, 0, length);
+ return result;
+ }
+
+ /**
+ * @param a
+ * @param length
+ * @return Value in <code>a</code> plus <code>length</code> prepended 0 bytes
+ */
+ public static byte [] padHead(final byte [] a, final int length) {
+ byte [] padding = new byte[length];
+ for(int i=0;i<length;i++) padding[i] = 0;
+ return add(padding,a);
+ }
+
+ /**
+ * @param a
+ * @param length
+ * @return Value in <code>a</code> plus <code>length</code> appended 0 bytes
+ */
+ public static byte [] padTail(final byte [] a, final int length) {
+ byte [] padding = new byte[length];
+ for(int i=0;i<length;i++) padding[i] = 0;
+ return add(a,padding);
+ }
+
+ /**
+ * Split passed range. Expensive operation relatively. Uses BigInteger math.
+ * Useful splitting ranges for MapReduce jobs.
+ * @param a Beginning of range
+ * @param b End of range
+ * @param num Number of times to split range. Pass 1 if you want to split
+ * the range in two; i.e. one split.
+ * @return Array of dividing values
+ */
+ public static byte [][] split(final byte [] a, final byte [] b, final int num) {
+ byte [] aPadded = null;
+ byte [] bPadded = null;
+ if (a.length < b.length) {
+ aPadded = padTail(a,b.length-a.length);
+ bPadded = b;
+ } else if (b.length < a.length) {
+ aPadded = a;
+ bPadded = padTail(b,a.length-b.length);
+ } else {
+ aPadded = a;
+ bPadded = b;
+ }
+ if (compareTo(aPadded,bPadded) > 1) {
+ throw new IllegalArgumentException("b > a");
+ }
+ if (num <= 0) throw new IllegalArgumentException("num cannot be < 0");
+ byte [] prependHeader = {1, 0};
+ BigInteger startBI = new BigInteger(add(prependHeader, aPadded));
+ BigInteger stopBI = new BigInteger(add(prependHeader, bPadded));
+ BigInteger diffBI = stopBI.subtract(startBI);
+ BigInteger splitsBI = BigInteger.valueOf(num + 1);
+ if(diffBI.compareTo(splitsBI) <= 0) return null;
+ BigInteger intervalBI = null;
+ try {
+ intervalBI = diffBI.divide(splitsBI);
+ } catch(Exception e) {
+ return null;
+ }
+
+ byte [][] result = new byte[num+2][];
+ result[0] = a;
+
+ for (int i = 1; i <= num; i++) {
+ BigInteger curBI = startBI.add(intervalBI.multiply(BigInteger.valueOf(i)));
+ byte [] padded = curBI.toByteArray();
+ if (padded[1] == 0)
+ padded = tail(padded,padded.length-2);
+ else
+ padded = tail(padded,padded.length-1);
+ result[i] = padded;
+ }
+ result[num+1] = b;
+ return result;
+ }
+
+ /**
* @param t
* @return Array of byte arrays made from passed array of Text
*/
Modified: hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/util/TestBytes.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/util/TestBytes.java?rev=769076&r1=769075&r2=769076&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/util/TestBytes.java (original)
+++ hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/util/TestBytes.java Mon Apr 27 18:05:34 2009
@@ -24,6 +24,40 @@
import junit.framework.TestCase;
public class TestBytes extends TestCase {
+ public void testSplit() throws Exception {
+ byte [] lowest = Bytes.toBytes("AAA");
+ byte [] middle = Bytes.toBytes("CCC");
+ byte [] highest = Bytes.toBytes("EEE");
+ byte [][] parts = Bytes.split(lowest, highest, 1);
+ for (int i = 0; i < parts.length; i++) {
+ System.out.println(Bytes.toString(parts[i]));
+ }
+ assertEquals(3, parts.length);
+ assertTrue(Bytes.equals(parts[1], middle));
+ // Now divide into three parts. Change highest so split is even.
+ highest = Bytes.toBytes("DDD");
+ parts = Bytes.split(lowest, highest, 2);
+ for (int i = 0; i < parts.length; i++) {
+ System.out.println(Bytes.toString(parts[i]));
+ }
+ assertEquals(4, parts.length);
+ // Assert that 3rd part is 'CCC'.
+ assertTrue(Bytes.equals(parts[2], middle));
+ }
+
+ public void testSplit2() throws Exception {
+ // More split tests.
+ byte [] lowest = Bytes.toBytes("http://A");
+ byte [] highest = Bytes.toBytes("http://z");
+ byte [] middle = Bytes.toBytes("http://[");
+ byte [][] parts = Bytes.split(lowest, highest, 1);
+ for (int i = 0; i < parts.length; i++) {
+ System.out.println(Bytes.toString(parts[i]));
+ }
+ assertEquals(2, parts.length);
+ assertTrue(Bytes.equals(parts[1], middle));
+ }
+
public void testToLong() throws Exception {
long [] longs = {-1l, 123l, 122232323232l};
for (int i = 0; i < longs.length; i++) {