You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ns...@apache.org on 2011/10/20 02:09:58 UTC
svn commit: r1186575 - in /hbase/trunk: CHANGES.txt
src/main/java/org/apache/hadoop/hbase/util/RegionSplitter.java
src/test/java/org/apache/hadoop/hbase/util/TestRegionSplitter.java
Author: nspiegelberg
Date: Thu Oct 20 00:09:57 2011
New Revision: 1186575
URL: http://svn.apache.org/viewvc?rev=1186575&view=rev
Log:
HBASE-4489 Better key splitting in RegionSplitter
Added:
hbase/trunk/src/test/java/org/apache/hadoop/hbase/util/TestRegionSplitter.java
Modified:
hbase/trunk/CHANGES.txt
hbase/trunk/src/main/java/org/apache/hadoop/hbase/util/RegionSplitter.java
Modified: hbase/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hbase/trunk/CHANGES.txt?rev=1186575&r1=1186574&r2=1186575&view=diff
==============================================================================
--- hbase/trunk/CHANGES.txt (original)
+++ hbase/trunk/CHANGES.txt Thu Oct 20 00:09:57 2011
@@ -17,6 +17,7 @@ Release 0.93.0 - Unreleased
HBASE-4102 atomicAppend: A put that appends to the latest version of a cell (Lars H)
HBASE-4469 Avoid top row seek by looking up bloomfilter (liyin via jgray)
HBASE-4418 Show all the hbase configuration in the web ui
+ HBASE-4489 Better key splitting in RegionSplitter
BUG FIXES
HBASE-4488 Store could miss rows during flush (Lars H via jgray)
Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/util/RegionSplitter.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/util/RegionSplitter.java?rev=1186575&r1=1186574&r2=1186575&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/util/RegionSplitter.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/util/RegionSplitter.java Thu Oct 20 00:09:57 2011
@@ -35,6 +35,7 @@ import org.apache.commons.cli.HelpFormat
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
+import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -138,10 +139,11 @@ public class RegionSplitter {
/**
* A generic interface for the RegionSplitter code to use for all it's
* functionality. Note that the original authors of this code use
- * {@link MD5StringSplit} to partition their table and set it as default, but
+ * {@link HexStringSplit} to partition their table and set it as default, but
* provided this for your custom algorithm. To use, create a new derived class
- * from this interface and call the RegionSplitter class with the argument: <br>
- * <b>-D split.algorithm=<your_class_path></b>
+ * from this interface and call {@link RegionSplitter#createPresplitTable} or
+ * {@link RegionSplitter#rollingSplit(String, String, Configuration)} with the
+ * argument splitClassName giving the name of your class.
*/
public static interface SplitAlgorithm {
/**
@@ -158,12 +160,13 @@ public class RegionSplitter {
/**
* Split an entire table.
*
- * @param numberOfSplits
+ * @param numRegions
* number of regions to split the table into
*
- * @return array of split keys for the initial regions of the table
+ * @return array of split keys for the initial regions of the table. The length of the
+ * returned array should be numRegions-1.
*/
- byte[][] split(int numberOfSplits);
+ byte[][] split(int numRegions);
/**
* In HBase, the first row is represented by an empty byte array. This might
@@ -208,21 +211,29 @@ public class RegionSplitter {
* <p>
* <ul>
* <li>create a table named 'myTable' with 60 pre-split regions containing 2
- * column families 'test' & 'rs' bin/hbase
+ * column families 'test' & 'rs', assuming the keys are hex-encoded ASCII:
* <ul>
- * <li>org.apache.hadoop.hbase.util.RegionSplitter -c 60 -f test:rs myTable
+ * <li>bin/hbase org.apache.hadoop.hbase.util.RegionSplitter -c 60 -f test:rs
+ * myTable HexStringSplit
* </ul>
* <li>perform a rolling split of 'myTable' (i.e. 60 => 120 regions), # 2
- * outstanding splits at a time bin/hbase
+ * outstanding splits at a time, assuming keys are uniformly distributed
+ * bytes:
* <ul>
- * <li>org.apache.hadoop.hbase.util.RegionSplitter -r -o 2 myTable
+ * <li>bin/hbase org.apache.hadoop.hbase.util.RegionSplitter -r -o 2 myTable
+ * UniformSplit
* </ul>
* </ul>
*
+ * There are two SplitAlgorithms built into RegionSplitter, HexStringSplit
+ * and UniformSplit. These are different strategies for choosing region
+ * boundaries. See their source code for details.
+ *
* @param args
- * Usage: RegionSplitter <TABLE> <-c <# regions> -f
- * <family:family:...> | -r [-o <# outstanding
- * splits>]> [-D <conf.param=value>]
+ * Usage: RegionSplitter <TABLE> <SPLITALGORITHM>
+ * <-c <# regions> -f <family:family:...> | -r
+ * [-o <# outstanding splits>]>
+ * [-D <conf.param=value>]
* @throws IOException
* HBase IO problem
* @throws InterruptedException
@@ -277,35 +288,35 @@ public class RegionSplitter {
boolean rollingSplit = cmd.hasOption("r");
boolean oneOperOnly = createTable ^ rollingSplit;
- if (1 != cmd.getArgList().size() || !oneOperOnly || cmd.hasOption("h")) {
- new HelpFormatter().printHelp("RegionSplitter <TABLE>", opt);
+ if (2 != cmd.getArgList().size() || !oneOperOnly || cmd.hasOption("h")) {
+ new HelpFormatter().printHelp("RegionSplitter <TABLE> <SPLITALGORITHM>\n"+
+ "SPLITALGORITHM is a java class name of a class implementing " +
+ "SplitAlgorithm, or one of the special strings HexStringSplit " +
+ "or UniformSplit, which are built-in split algorithms. " +
+ "HexStringSplit treats keys as hexadecimal ASCII, and " +
+ "UniformSplit treats keys as arbitrary bytes.", opt);
return;
}
String tableName = cmd.getArgs()[0];
+ String splitClass = cmd.getArgs()[1];
if (createTable) {
conf.set("split.count", cmd.getOptionValue("c"));
- createPresplitTable(tableName, cmd.getOptionValue("f").split(":"), conf);
+ createPresplitTable(tableName, splitClass, cmd.getOptionValue("f").split(":"), conf);
}
if (rollingSplit) {
if (cmd.hasOption("o")) {
conf.set("split.outstanding", cmd.getOptionValue("o"));
}
- rollingSplit(tableName, conf);
+ rollingSplit(tableName, splitClass, conf);
}
}
- static void createPresplitTable(String tableName, String[] columnFamilies,
- Configuration conf) throws IOException, InterruptedException {
- Class<? extends SplitAlgorithm> splitClass = conf.getClass(
- "split.algorithm", MD5StringSplit.class, SplitAlgorithm.class);
- SplitAlgorithm splitAlgo;
- try {
- splitAlgo = splitClass.newInstance();
- } catch (Exception e) {
- throw new IOException("Problem loading split algorithm: ", e);
- }
+ static void createPresplitTable(String tableName, String splitClassName,
+ String[] columnFamilies, Configuration conf) throws IOException,
+ InterruptedException {
+ SplitAlgorithm splitAlgo = newSplitAlgoInstance(conf, splitClassName);
final int splitCount = conf.getInt("split.count", 0);
Preconditions.checkArgument(splitCount > 1, "Split count must be > 1");
@@ -340,16 +351,9 @@ public class RegionSplitter {
LOG.debug("Finished creating table with " + splitCount + " regions");
}
- static void rollingSplit(String tableName, Configuration conf)
- throws IOException, InterruptedException {
- Class<? extends SplitAlgorithm> splitClass = conf.getClass(
- "split.algorithm", MD5StringSplit.class, SplitAlgorithm.class);
- SplitAlgorithm splitAlgo;
- try {
- splitAlgo = splitClass.newInstance();
- } catch (Exception e) {
- throw new IOException("Problem loading split algorithm: ", e);
- }
+ static void rollingSplit(String tableName, String splitClassName,
+ Configuration conf) throws IOException, InterruptedException {
+ SplitAlgorithm splitAlgo = newSplitAlgoInstance(conf, splitClassName);
final int minOS = conf.getInt("split.outstanding", 2);
HTable table = new HTable(conf, tableName);
@@ -541,6 +545,41 @@ public class RegionSplitter {
fs.delete(splitFile, false);
}
+ /**
+ * @throws IOException if the specified SplitAlgorithm class couldn't be
+ * instantiated
+ */
+ static SplitAlgorithm newSplitAlgoInstance(Configuration conf,
+ String splitClassName) throws IOException {
+ Class<?> splitClass;
+
+ // For split algorithms builtin to RegionSplitter, the user can specify
+ // their simple class name instead of a fully qualified class name.
+ if(splitClassName.equals(HexStringSplit.class.getSimpleName())) {
+ splitClass = HexStringSplit.class;
+ } else if (splitClassName.equals(UniformSplit.class.getSimpleName())) {
+ splitClass = UniformSplit.class;
+ } else {
+ try {
+ splitClass = conf.getClassByName(splitClassName);
+ } catch (ClassNotFoundException e) {
+ throw new IOException("Couldn't load split class " + splitClassName, e);
+ }
+ if(splitClass == null) {
+ throw new IOException("Failed loading split class " + splitClassName);
+ }
+ if(!SplitAlgorithm.class.isAssignableFrom(splitClass)) {
+ throw new IOException(
+ "Specified split class doesn't implement SplitAlgorithm");
+ }
+ }
+ try {
+ return splitClass.asSubclass(SplitAlgorithm.class).newInstance();
+ } catch (Exception e) {
+ throw new IOException("Problem loading split algorithm: ", e);
+ }
+ }
+
static LinkedList<Pair<byte[], byte[]>> splitScan(
LinkedList<Pair<byte[], byte[]>> regionList, HTable table,
SplitAlgorithm splitAlgo)
@@ -714,16 +753,20 @@ public class RegionSplitter {
}
/**
- * MD5StringSplit is the default {@link SplitAlgorithm} for creating pre-split
- * tables. The format of MD5StringSplit is the ASCII representation of an MD5
- * checksum. Row are long values in the range <b>"00000000" => "7FFFFFFF"</b>
- * and are left-padded with zeros to keep the same order lexographically as if
- * they were binary.
+ * HexStringSplit is one possible {@link SplitAlgorithm} for choosing region
+ * boundaries. The format of a HexStringSplit region boundary is the
+ * ASCII representation of an MD5 checksum, or any other uniformly distributed
+ * bytes. Row are hex-encoded long values in the range <b>"00000000" =>
+ * "FFFFFFFF"</b> and are left-padded with zeros to keep the same order
+ * lexicographically as if they were binary.
+ *
+ * This split algorithm is only appropriate if you will use hex strings as
+ * keys.
*/
- public static class MD5StringSplit implements SplitAlgorithm {
- final static String MAXMD5 = "7FFFFFFF";
- final static BigInteger MAXMD5_INT = new BigInteger(MAXMD5, 16);
- final static int rowComparisonLength = MAXMD5.length();
+ public static class HexStringSplit implements SplitAlgorithm {
+ final static String MAXHEX = "FFFFFFFF";
+ final static BigInteger MAXHEX_INT = new BigInteger(MAXHEX, 16);
+ final static int rowComparisonLength = MAXHEX.length();
public byte[] split(byte[] start, byte[] end) {
BigInteger s = convertToBigInteger(start);
@@ -734,10 +777,10 @@ public class RegionSplitter {
public byte[][] split(int n) {
BigInteger[] splits = new BigInteger[n - 1];
- BigInteger sizeOfEachSplit = MAXMD5_INT.divide(BigInteger.valueOf(n));
+ BigInteger sizeOfEachSplit = MAXHEX_INT.divide(BigInteger.valueOf(n));
for (int i = 1; i < n; i++) {
// NOTE: this means the last region gets all the slop.
- // This is not a big deal if we're assuming n << MAXMD5
+ // This is not a big deal if we're assuming n << MAXHEX
splits[i - 1] = sizeOfEachSplit.multiply(BigInteger.valueOf(i));
}
return convertToBytes(splits);
@@ -748,7 +791,7 @@ public class RegionSplitter {
}
public byte[] lastRow() {
- return convertToByte(MAXMD5_INT);
+ return convertToByte(MAXHEX_INT);
}
public byte[] strToRow(String in) {
@@ -806,4 +849,54 @@ public class RegionSplitter {
}
}
+ /**
+ * A SplitAlgorithm that divides the space of possible keys evenly. Useful
+ * when the keys are approximately uniform random bytes (e.g. hashes).
+ * You probably shouldn't use this if your keys are ASCII, or if your keys
+ * tend to have similar prefixes.
+ */
+ public static class UniformSplit implements SplitAlgorithm {
+ static final byte xFF = (byte)0xFF;
+ static final byte[] firstRowBytes = ArrayUtils.EMPTY_BYTE_ARRAY;
+ static final byte[] lastRowBytes =
+ new byte[] {xFF, xFF, xFF, xFF, xFF, xFF, xFF, xFF};
+ public byte[] split(byte[] start, byte[] end) {
+ return Bytes.split(start, end, 1)[1];
+ }
+
+ @Override
+ public byte[][] split(int numRegions) {
+ byte[][] splitKeysPlusEndpoints = Bytes.split(firstRowBytes, lastRowBytes,
+ numRegions-1);
+ byte[][] splitAtKeys = new byte[splitKeysPlusEndpoints.length-2][];
+ System.arraycopy(splitKeysPlusEndpoints, 1, splitAtKeys, 0,
+ splitKeysPlusEndpoints.length-2);
+ return splitAtKeys;
+ }
+
+ @Override
+ public byte[] firstRow() {
+ return firstRowBytes;
+ }
+
+ @Override
+ public byte[] lastRow() {
+ return lastRowBytes;
+ }
+
+ @Override
+ public byte[] strToRow(String input) {
+ return Bytes.toBytesBinary(input);
+ }
+
+ @Override
+ public String rowToStr(byte[] row) {
+ return Bytes.toStringBinary(row);
+ }
+
+ @Override
+ public String separator() {
+ return ",";
+ }
+ }
}
Added: hbase/trunk/src/test/java/org/apache/hadoop/hbase/util/TestRegionSplitter.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/util/TestRegionSplitter.java?rev=1186575&view=auto
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/util/TestRegionSplitter.java (added)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/util/TestRegionSplitter.java Thu Oct 20 00:09:57 2011
@@ -0,0 +1,294 @@
+/**
+ * Copyright 2011 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.util;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotSame;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.lang.ArrayUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HServerAddress;
+import org.apache.hadoop.hbase.client.HTable;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.util.RegionSplitter.HexStringSplit;
+import org.apache.hadoop.hbase.util.RegionSplitter.UniformSplit;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ * Tests for {@link RegionSplitter}, which can create a pre-split table or do a
+ * rolling split of an existing table.
+ */
+public class TestRegionSplitter {
+ private final static HBaseTestingUtility UTIL = new HBaseTestingUtility();
+ private final static String HEX_SPLIT_CLASS_NAME =
+ "org.apache.hadoop.hbase.util.RegionSplitter$HexStringSplit";
+ private final static String UNIFORM_SPLIT_CLASS_NAME =
+ "org.apache.hadoop.hbase.util.RegionSplitter$UniformSplit";
+ private final static String CF_NAME = "SPLIT_TEST_CF";
+ private final static byte xFF = (byte)0xff;
+
+ @BeforeClass
+ public static void setup() throws Exception {
+ UTIL.startMiniCluster();
+ }
+
+ @AfterClass
+ public static void teardown() throws IOException {
+ UTIL.shutdownMiniCluster();
+ }
+
+ /**
+ * Test creating a pre-split table using the HexStringSplit algorithm.
+ */
+ @Test
+ public void testCreatePresplitTableHex() throws Exception {
+ final List<byte[]> expectedBounds = new ArrayList<byte[]>();
+ expectedBounds.add(ArrayUtils.EMPTY_BYTE_ARRAY);
+ expectedBounds.add("0fffffff".getBytes());
+ expectedBounds.add("1ffffffe".getBytes());
+ expectedBounds.add("2ffffffd".getBytes());
+ expectedBounds.add("3ffffffc".getBytes());
+ expectedBounds.add("4ffffffb".getBytes());
+ expectedBounds.add("5ffffffa".getBytes());
+ expectedBounds.add("6ffffff9".getBytes());
+ expectedBounds.add("7ffffff8".getBytes());
+ expectedBounds.add("8ffffff7".getBytes());
+ expectedBounds.add("9ffffff6".getBytes());
+ expectedBounds.add("affffff5".getBytes());
+ expectedBounds.add("bffffff4".getBytes());
+ expectedBounds.add("cffffff3".getBytes());
+ expectedBounds.add("dffffff2".getBytes());
+ expectedBounds.add("effffff1".getBytes());
+ expectedBounds.add(ArrayUtils.EMPTY_BYTE_ARRAY);
+
+ // Do table creation/pre-splitting and verification of region boundaries
+ preSplitTableAndVerify(expectedBounds, HEX_SPLIT_CLASS_NAME,
+ "NewHexPresplitTable");
+ }
+
+ /**
+ * Test creating a pre-split table using the UniformSplit algorithm.
+ */
+ @Test
+ public void testCreatePresplitTableUniform() throws Exception {
+ List<byte[]> expectedBounds = new ArrayList<byte[]>();
+ expectedBounds.add(ArrayUtils.EMPTY_BYTE_ARRAY);
+ expectedBounds.add(new byte[] { 0x0f, xFF, xFF, xFF, xFF, xFF, xFF, xFF});
+ expectedBounds.add(new byte[] { 0x1f, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xfe});
+ expectedBounds.add(new byte[] { 0x2f, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xfd});
+ expectedBounds.add(new byte[] { 0x3f, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xfc});
+ expectedBounds.add(new byte[] { 0x4f, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xfb});
+ expectedBounds.add(new byte[] { 0x5f, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xfa});
+ expectedBounds.add(new byte[] { 0x6f, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xf9});
+ expectedBounds.add(new byte[] { 0x7f, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xf8});
+ expectedBounds.add(new byte[] {(byte)0x8f, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xf7});
+ expectedBounds.add(new byte[] {(byte)0x9f, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xf6});
+ expectedBounds.add(new byte[] {(byte)0xaf, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xf5});
+ expectedBounds.add(new byte[] {(byte)0xbf, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xf4});
+ expectedBounds.add(new byte[] {(byte)0xcf, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xf3});
+ expectedBounds.add(new byte[] {(byte)0xdf, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xf2});
+ expectedBounds.add(new byte[] {(byte)0xef, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xf1});
+ expectedBounds.add(ArrayUtils.EMPTY_BYTE_ARRAY);
+
+ // Do table creation/pre-splitting and verification of region boundaries
+ preSplitTableAndVerify(expectedBounds,
+ "org.apache.hadoop.hbase.util.RegionSplitter$UniformSplit",
+ "NewUniformPresplitTable");
+ }
+
+ /**
+ * Unit tests for the HexStringSplit algorithm. Makes sure it divides up the
+ * space of keys in the way that we expect.
+ */
+ @Test
+ public void unitTestHexStringSplit() {
+ HexStringSplit splitter = new HexStringSplit();
+ // Check splitting while starting from scratch
+
+ byte[][] twoRegionsSplits = splitter.split(2);
+ assertEquals(1, twoRegionsSplits.length);
+ assertArrayEquals(twoRegionsSplits[0], "7fffffff".getBytes());
+
+ byte[][] threeRegionsSplits = splitter.split(3);
+ assertEquals(2, threeRegionsSplits.length);
+ byte[] expectedSplit0 = "55555555".getBytes();
+ assertArrayEquals(expectedSplit0, threeRegionsSplits[0]);
+ byte[] expectedSplit1 = "aaaaaaaa".getBytes();
+ assertArrayEquals(expectedSplit1, threeRegionsSplits[1]);
+
+ // Check splitting existing regions that have start and end points
+ byte[] splitPoint = splitter.split("10000000".getBytes(), "30000000".getBytes());
+ assertArrayEquals("20000000".getBytes(), splitPoint);
+
+ byte[] lastRow = "ffffffff".getBytes();
+ assertArrayEquals(lastRow, splitter.lastRow());
+ byte[] firstRow = "00000000".getBytes();
+ assertArrayEquals(firstRow, splitter.firstRow());
+
+ // Halfway between 00... and 20... should be 10...
+ splitPoint = splitter.split(firstRow, "20000000".getBytes());
+ assertArrayEquals(splitPoint, "10000000".getBytes());
+
+ // Halfway between 5f... and 7f... should be 6f....
+ splitPoint = splitter.split("dfffffff".getBytes(), lastRow);
+ assertArrayEquals(splitPoint,"efffffff".getBytes());
+ }
+
+ /**
+ * Unit tests for the UniformSplit algorithm. Makes sure it divides up the space of
+ * keys in the way that we expect.
+ */
+ @Test
+ public void unitTestUniformSplit() {
+ UniformSplit splitter = new UniformSplit();
+
+ // Check splitting while starting from scratch
+ try {
+ splitter.split(1);
+ throw new AssertionError("Splitting into <2 regions should have thrown exception");
+ } catch (IllegalArgumentException e) { }
+
+ byte[][] twoRegionsSplits = splitter.split(2);
+ assertEquals(1, twoRegionsSplits.length);
+ assertArrayEquals(twoRegionsSplits[0],
+ new byte[] {0x7f, xFF, xFF, xFF, xFF, xFF, xFF, xFF});
+
+ byte[][] threeRegionsSplits = splitter.split(3);
+ assertEquals(2, threeRegionsSplits.length);
+ byte[] expectedSplit0 = new byte[] {0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55};
+ assertArrayEquals(expectedSplit0, threeRegionsSplits[0]);
+ byte[] expectedSplit1 = new byte[] {(byte)0xAA, (byte)0xAA, (byte)0xAA, (byte)0xAA,
+ (byte)0xAA, (byte)0xAA, (byte)0xAA, (byte)0xAA};
+ assertArrayEquals(expectedSplit1, threeRegionsSplits[1]);
+
+ // Check splitting existing regions that have start and end points
+ byte[] splitPoint = splitter.split(new byte[] {0x10}, new byte[] {0x30});
+ assertArrayEquals(new byte[] {0x20}, splitPoint);
+
+ byte[] lastRow = new byte[] {xFF, xFF, xFF, xFF, xFF, xFF, xFF, xFF};
+ assertArrayEquals(lastRow, splitter.lastRow());
+ byte[] firstRow = ArrayUtils.EMPTY_BYTE_ARRAY;
+ assertArrayEquals(firstRow, splitter.firstRow());
+
+ splitPoint = splitter.split(firstRow, new byte[] {0x20});
+ assertArrayEquals(splitPoint, new byte[] {0x10});
+
+ splitPoint = splitter.split(new byte[] {(byte)0xdf, xFF, xFF, xFF, xFF,
+ xFF, xFF, xFF}, lastRow);
+ assertArrayEquals(splitPoint,
+ new byte[] {(byte)0xef, xFF, xFF, xFF, xFF, xFF, xFF, xFF});
+ }
+
+ /**
+ * Creates a pre-split table with expectedBounds.size()+1 regions, then
+ * verifies that the region boundaries are the same as the expected
+ * region boundaries in expectedBounds.
+ * @throws Various junit assertions
+ */
+ private void preSplitTableAndVerify(List<byte[]> expectedBounds,
+ String splitAlgo, String tableName) throws Exception {
+ final int numRegions = expectedBounds.size()-1;
+ final Configuration conf = UTIL.getConfiguration();
+ conf.setInt("split.count", numRegions);
+ RegionSplitter.createPresplitTable(tableName, splitAlgo,
+ new String[] {CF_NAME}, conf);
+ verifyBounds(expectedBounds, tableName);
+ }
+
+ private void rollingSplitAndVerify(String tableName, String splitAlgo,
+ List<byte[]> expectedBounds) throws Exception {
+ final Configuration conf = UTIL.getConfiguration();
+
+ // Set this larger than the number of splits so RegionSplitter won't block
+ conf.setInt("split.outstanding", 5);
+ RegionSplitter.rollingSplit(tableName, splitAlgo, conf);
+ verifyBounds(expectedBounds, tableName);
+ }
+
+ private void verifyBounds(List<byte[]> expectedBounds, String tableName)
+ throws Exception {
+ // Get region boundaries from the cluster and verify their endpoints
+ final Configuration conf = UTIL.getConfiguration();
+ final int numRegions = expectedBounds.size()-1;
+ final HTable hTable = new HTable(conf, tableName.getBytes());
+ final Map<HRegionInfo, HServerAddress> regionInfoMap =
+ hTable.getRegionsInfo();
+ assertEquals(numRegions, regionInfoMap.size());
+ for (Map.Entry<HRegionInfo, HServerAddress> entry:
+ regionInfoMap.entrySet()) {
+ final HRegionInfo regionInfo = entry.getKey();
+ byte[] regionStart = regionInfo.getStartKey();
+ byte[] regionEnd = regionInfo.getEndKey();
+
+ // This region's start key should be one of the region boundaries
+ int startBoundaryIndex = indexOfBytes(expectedBounds, regionStart);
+ assertNotSame(-1, startBoundaryIndex);
+
+ // This region's end key should be the region boundary that comes
+ // after the starting boundary.
+ byte[] expectedRegionEnd = expectedBounds.get(
+ startBoundaryIndex+1);
+ assertEquals(0, Bytes.compareTo(regionEnd, expectedRegionEnd));
+ }
+ }
+
+ /**
+ * List.indexOf() doesn't really work for a List<byte[]>, because byte[]
+ * doesn't override equals(). This method checks whether a list contains
+ * a given element by checking each element using the byte array
+ * comparator.
+ * @return the index of the first element that equals compareTo, or -1
+ * if no elements are equal.
+ */
+ static private int indexOfBytes(List<byte[]> list, byte[] compareTo) {
+ int listIndex = 0;
+ for(byte[] elem: list) {
+ if(Bytes.BYTES_COMPARATOR.compare(elem, compareTo) == 0) {
+ return listIndex;
+ }
+ listIndex++;
+ }
+ return -1;
+ }
+
+ /**
+ * Inserts some meaningless data into a CF so the regions can be split.
+ */
+ static void insertSomeData(String table) throws IOException {
+ HTable hTable = new HTable(table);
+ for(byte b=Byte.MIN_VALUE; b<Byte.MAX_VALUE; b++) {
+ byte[] whateverBytes = new byte[] {b};
+ Put p = new Put(whateverBytes);
+ p.add(CF_NAME.getBytes(), whateverBytes, whateverBytes);
+ hTable.put(p);
+ }
+ }
+}