You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2009/04/27 20:05:35 UTC

svn commit: r769076 - in /hadoop/hbase/trunk: CHANGES.txt src/java/org/apache/hadoop/hbase/util/Bytes.java src/test/org/apache/hadoop/hbase/util/TestBytes.java

Author: stack
Date: Mon Apr 27 18:05:34 2009
New Revision: 769076

URL: http://svn.apache.org/viewvc?rev=769076&view=rev
Log:
HBASE-1183 New MR splitting algorithm and other new features need a way to split a key range in N chunks

Modified:
    hadoop/hbase/trunk/CHANGES.txt
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/Bytes.java
    hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/util/TestBytes.java

Modified: hadoop/hbase/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/CHANGES.txt?rev=769076&r1=769075&r2=769076&view=diff
==============================================================================
--- hadoop/hbase/trunk/CHANGES.txt (original)
+++ hadoop/hbase/trunk/CHANGES.txt Mon Apr 27 18:05:34 2009
@@ -167,6 +167,8 @@
                (Evgeny Ryabitskiy via Stack)
    HBASE-1260  Bytes utility class changes: remove usage of ByteBuffer and
                provide additional ByteBuffer primitives (Jon Gray via Stack)
+   HBASE-1183  New MR splitting algorithm and other new features need a way to
+               split a key range in N chunks (Jon Gray via Stack)
 
 Release 0.19.0 - 01/21/2009
   INCOMPATIBLE CHANGES

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/Bytes.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/Bytes.java?rev=769076&r1=769075&r2=769076&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/Bytes.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/Bytes.java Mon Apr 27 18:05:34 2009
@@ -25,6 +25,7 @@
 import java.io.UnsupportedEncodingException;
 import java.nio.ByteBuffer;
 import java.util.Comparator;
+import java.math.BigInteger;
 
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
@@ -749,8 +750,108 @@
     return result;
   }
   
+  /**
+   * @param a
+   * @param length
+   * @return First <code>length</code> bytes from <code>a</code>
+   */
+  public static byte [] head(final byte [] a, final int length) {
+    if(a.length < length) return null;
+    byte [] result = new byte[length];
+    System.arraycopy(a, 0, result, 0, length);
+    return result;
+  }
 
   /**
+   * @param a
+   * @param length
+   * @return Last <code>length</code> bytes from <code>a</code>
+   */
+  public static byte [] tail(final byte [] a, final int length) {
+    if(a.length < length) return null;
+    byte [] result = new byte[length];
+    System.arraycopy(a, a.length - length, result, 0, length);
+    return result;
+  }
+
+  /**
+   * @param a
+   * @param length
+   * @return Value in <code>a</code> plus <code>length</code> prepended 0 bytes
+   */
+  public static byte [] padHead(final byte [] a, final int length) {
+    byte [] padding = new byte[length];
+    for(int i=0;i<length;i++) padding[i] = 0;
+    return add(padding,a);
+  }
+  
+  /**
+   * @param a
+   * @param length
+   * @return Value in <code>a</code> plus <code>length</code> appended 0 bytes
+   */
+  public static byte [] padTail(final byte [] a, final int length) {
+    byte [] padding = new byte[length];
+    for(int i=0;i<length;i++) padding[i] = 0;
+    return add(a,padding);
+  }
+  
+  /**
+   * Split passed range.  Expensive operation relatively.  Uses BigInteger math.
+   * Useful splitting ranges for MapReduce jobs.
+   * @param a Beginning of range
+   * @param b End of range
+   * @param num Number of times to split range.  Pass 1 if you want to split
+   * the range in two; i.e. one split.
+   * @return Array of dividing values
+   */
+  public static byte [][] split(final byte [] a, final byte [] b, final int num) {
+    byte [] aPadded = null;
+    byte [] bPadded = null;
+    if (a.length < b.length) {
+      aPadded = padTail(a,b.length-a.length);
+      bPadded = b;
+    } else if (b.length < a.length) {
+      aPadded = a;
+      bPadded = padTail(b,a.length-b.length);
+    } else {
+      aPadded = a;
+      bPadded = b;
+    }
+    if (compareTo(aPadded,bPadded) > 1) {
+      throw new IllegalArgumentException("b > a");
+    }
+    if (num <= 0) throw new IllegalArgumentException("num cannot be < 0");
+    byte [] prependHeader = {1, 0};
+    BigInteger startBI = new BigInteger(add(prependHeader, aPadded));
+    BigInteger stopBI = new BigInteger(add(prependHeader, bPadded));
+    BigInteger diffBI = stopBI.subtract(startBI);
+    BigInteger splitsBI = BigInteger.valueOf(num + 1);
+    if(diffBI.compareTo(splitsBI) <= 0) return null;
+    BigInteger intervalBI = null;
+    try {
+      intervalBI = diffBI.divide(splitsBI);
+    } catch(Exception e) {
+      return null;
+    }
+
+    byte [][] result = new byte[num+2][];
+    result[0] = a;
+
+    for (int i = 1; i <= num; i++) {
+      BigInteger curBI = startBI.add(intervalBI.multiply(BigInteger.valueOf(i)));
+      byte [] padded = curBI.toByteArray();
+      if (padded[1] == 0)
+        padded = tail(padded,padded.length-2);
+      else
+        padded = tail(padded,padded.length-1);
+      result[i] = padded;
+    }
+    result[num+1] = b;
+    return result;
+  }
+  
+  /**
    * @param t
    * @return Array of byte arrays made from passed array of Text
    */

Modified: hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/util/TestBytes.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/util/TestBytes.java?rev=769076&r1=769075&r2=769076&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/util/TestBytes.java (original)
+++ hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/util/TestBytes.java Mon Apr 27 18:05:34 2009
@@ -24,6 +24,40 @@
 import junit.framework.TestCase;
 
 public class TestBytes extends TestCase {
+  public void testSplit() throws Exception {
+    byte [] lowest = Bytes.toBytes("AAA");
+    byte [] middle = Bytes.toBytes("CCC");
+    byte [] highest = Bytes.toBytes("EEE");
+    byte [][] parts = Bytes.split(lowest, highest, 1);
+    for (int i = 0; i < parts.length; i++) {
+      System.out.println(Bytes.toString(parts[i]));
+    }
+    assertEquals(3, parts.length);
+    assertTrue(Bytes.equals(parts[1], middle));
+    // Now divide into three parts.  Change highest so split is even.
+    highest = Bytes.toBytes("DDD");
+    parts = Bytes.split(lowest, highest, 2);
+    for (int i = 0; i < parts.length; i++) {
+      System.out.println(Bytes.toString(parts[i]));
+    }
+    assertEquals(4, parts.length);
+    // Assert that 3rd part is 'CCC'.
+    assertTrue(Bytes.equals(parts[2], middle));
+  }
+
+  public void testSplit2() throws Exception {
+    // More split tests.
+    byte [] lowest = Bytes.toBytes("http://A");
+    byte [] highest = Bytes.toBytes("http://z");
+    byte [] middle = Bytes.toBytes("http://[");
+    byte [][] parts = Bytes.split(lowest, highest, 1);
+    for (int i = 0; i < parts.length; i++) {
+      System.out.println(Bytes.toString(parts[i]));
+    }
+    assertEquals(2, parts.length);
+    assertTrue(Bytes.equals(parts[1], middle));
+  }
+
   public void testToLong() throws Exception {
     long [] longs = {-1l, 123l, 122232323232l};
     for (int i = 0; i < longs.length; i++) {