You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by st...@apache.org on 2007/08/08 22:30:20 UTC
svn commit: r564012 [4/4] - in /lucene/hadoop/trunk/src/contrib/hbase: ./
src/java/org/apache/hadoop/hbase/ src/java/org/apache/hadoop/hbase/util/
src/test/org/apache/hadoop/hbase/
Added: lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestSplit.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestSplit.java?view=auto&rev=564012
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestSplit.java (added)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestSplit.java Wed Aug 8 13:30:13 2007
@@ -0,0 +1,442 @@
+/**
+ * Copyright 2007 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase;
+
+import java.io.IOException;
+import java.util.ConcurrentModificationException;
+import java.util.List;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.util.Writables;
+import org.apache.hadoop.io.Text;
+
+/**
+ * {@Link TestHRegion} does a split but this TestCase adds testing of fast
+ * split and manufactures odd-ball split scenarios.
+ */
+public class TestSplit extends HBaseTestCase {
+ static final Log LOG = LogFactory.getLog(TestSplit.class);
+ private final static String COLFAMILY_NAME1 = "colfamily1:";
+ private final static String COLFAMILY_NAME2 = "colfamily2:";
+ private final static String COLFAMILY_NAME3 = "colfamily3:";
+ private Path testDir = null;
+ private FileSystem fs = null;
+ private static final char FIRST_CHAR = 'a';
+ private static final char LAST_CHAR = 'z';
+
+ @Override
+ public void setUp() throws Exception {
+ super.setUp();
+ this.testDir = getUnitTestdir(getName());
+ this.fs = FileSystem.getLocal(this.conf);
+ if (fs.exists(testDir)) {
+ fs.delete(testDir);
+ }
+ // This size should make it so we always split using the addContent
+ // below. After adding all data, the first region is 1.3M
+ conf.setLong("hbase.hregion.max.filesize", 1024 * 128);
+ }
+
+ @Override
+ public void tearDown() throws Exception {
+ try {
+ if (this.fs.exists(testDir)) {
+ this.fs.delete(testDir);
+ }
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ super.tearDown();
+ }
+
+ /**
+ * Splits twice and verifies getting from each of the split regions.
+ * @throws Exception
+ */
+ public void testBasicSplit() throws Exception {
+ HRegion region = null;
+ HLog hlog = new HLog(this.fs, this.testDir, this.conf);
+ try {
+ HTableDescriptor htd = createTableDescriptor(getName());
+ HRegionInfo hri = new HRegionInfo(1, htd, null, null);
+ region = new HRegion(testDir, hlog, fs, this.conf, hri, null);
+ basicSplit(region);
+ } finally {
+ if (region != null) {
+ region.close();
+ }
+ hlog.closeAndDelete();
+ }
+ }
+
+ private HTableDescriptor createTableDescriptor(final String name) {
+ HTableDescriptor htd = new HTableDescriptor(name);
+ htd.addFamily(new HColumnDescriptor(COLFAMILY_NAME1));
+ htd.addFamily(new HColumnDescriptor(COLFAMILY_NAME2));
+ htd.addFamily(new HColumnDescriptor(COLFAMILY_NAME3));
+ return htd;
+ }
+
+ private void basicSplit(final HRegion region) throws Exception {
+ addContent(region, COLFAMILY_NAME3);
+ region.internalFlushcache();
+ Text midkey = new Text();
+ assertTrue(region.needsSplit(midkey));
+ HRegion [] regions = split(region);
+ // Assert can get rows out of new regions. Should be able to get first
+ // row from first region and the midkey from second region.
+ byte [] b = new byte [] {FIRST_CHAR, FIRST_CHAR, FIRST_CHAR};
+ assertGet(regions[0], COLFAMILY_NAME3, new Text(b));
+ assertGet(regions[1], COLFAMILY_NAME3, midkey);
+ // Test I can get scanner and that it starts at right place.
+ assertScan(regions[0], COLFAMILY_NAME3, new Text(b));
+ assertScan(regions[1], COLFAMILY_NAME3, midkey);
+ // Now prove can't split regions that have references.
+ Text [] midkeys = new Text[regions.length];
+ for (int i = 0; i < regions.length; i++) {
+ midkeys[i] = new Text();
+ // Even after above splits, still needs split but after splits its
+ // unsplitable because biggest store file is reference. References
+ // make the store unsplittable, until something bigger comes along.
+ assertFalse(regions[i].needsSplit(midkeys[i]));
+ // Add so much data to this region, we create a store file that is > than
+ // one of our unsplitable references.
+ // it will.
+ for (int j = 0; j < 2; j++) {
+ addContent(regions[i], COLFAMILY_NAME3);
+ }
+ addContent(regions[i], COLFAMILY_NAME2);
+ addContent(regions[i], COLFAMILY_NAME1);
+ regions[i].internalFlushcache();
+ }
+
+ // Assert that even if one store file is larger than a reference, the
+ // region is still deemed unsplitable (Can't split region if references
+ // presen).
+ for (int i = 0; i < regions.length; i++) {
+ midkeys[i] = new Text();
+ // Even after above splits, still needs split but after splits its
+ // unsplitable because biggest store file is reference. References
+ // make the store unsplittable, until something bigger comes along.
+ assertFalse(regions[i].needsSplit(midkeys[i]));
+ }
+
+ // To make regions splitable force compaction.
+ for (int i = 0; i < regions.length; i++) {
+ assertTrue(regions[i].compactStores());
+ }
+
+ TreeMap<String, HRegion> sortedMap = new TreeMap<String, HRegion>();
+ // Split these two daughter regions so then I'll have 4 regions. Will
+ // split because added data above.
+ for (int i = 0; i < regions.length; i++) {
+ HRegion [] rs = split(regions[i]);
+ for (int j = 0; j < rs.length; j++) {
+ sortedMap.put(rs[j].getRegionName().toString(), rs[j]);
+ }
+ }
+ LOG.info("Made 4 regions");
+ // The splits should have been even. Test I can get some arbitrary row out
+ // of each.
+ int interval = (LAST_CHAR - FIRST_CHAR) / 3;
+ for (HRegion r: sortedMap.values()) {
+ assertGet(r, COLFAMILY_NAME3, new Text(new String(b)));
+ b[0] += interval;
+ }
+ }
+
+ /**
+ * Test that a region is cleaned up after its daughter splits release all
+ * references.
+ * @throws Exception
+ */
+ public void testSplitRegionIsDeleted() throws Exception {
+ final int timeout = 60;
+ // Start up a hbase cluster
+ this.conf.set(HConstants.HBASE_DIR, this.testDir.toString());
+ MiniHBaseCluster.MasterThread masterThread =
+ MiniHBaseCluster.startMaster(this.conf);
+ List<MiniHBaseCluster.RegionServerThread> regionServerThreads =
+ MiniHBaseCluster.startRegionServers(this.conf, 1);
+ HTable meta = null;
+ HTable t = null;
+ try {
+ // Create a table.
+ HBaseAdmin admin = new HBaseAdmin(this.conf);
+ admin.createTable(createTableDescriptor(getName()));
+ // Get connection on the meta table and get count of rows.
+ meta = new HTable(this.conf, HConstants.META_TABLE_NAME);
+ int count = count(meta, HConstants.COLUMN_FAMILY_STR);
+ t = new HTable(this.conf, new Text(getName()));
+ addContent(t, COLFAMILY_NAME3);
+ // All is running in the one JVM so I should be able to get the
+ // region instance and bring on a split.
+ HRegionInfo hri =
+ t.getRegionLocation(HConstants.EMPTY_START_ROW).getRegionInfo();
+ HRegion r = null;
+ synchronized(regionServerThreads) {
+ r = regionServerThreads.get(0).getRegionServer().onlineRegions.
+ get(hri.getRegionName());
+ }
+ // Flush will provoke a split next time the split-checker thread runs.
+ r.flushcache(false);
+ // Now, wait until split makes it into the meta table.
+ for (int i = 0; i < timeout &&
+ (count(meta, HConstants.COLUMN_FAMILY_STR) <= count); i++) {
+ Thread.sleep(1000);
+ }
+ int oldCount = count;
+ count = count(meta, HConstants.COLUMN_FAMILY_STR);
+ if (count <= oldCount) {
+ throw new IOException("Failed waiting on splits to show up");
+ }
+ HRegionInfo parent = getSplitParent(meta);
+ assertTrue(parent.isOffline());
+ Path parentDir =
+ HRegion.getRegionDir(this.testDir, parent.getRegionName());
+ assertTrue(this.fs.exists(parentDir));
+ LOG.info("Split happened and parent " + parent.getRegionName() + " is " +
+ "offline");
+ // Now, force a compaction. This will rewrite references and make it
+ // so the parent region becomes deletable.
+ LOG.info("Starting compaction");
+ synchronized(regionServerThreads) {
+ for (MiniHBaseCluster.RegionServerThread thread: regionServerThreads) {
+ SortedMap<Text, HRegion> regions =
+ thread.getRegionServer().onlineRegions;
+ // Retry if ConcurrentModification... alternative of sync'ing is not
+ // worth it for sake of unit test.
+ for (int i = 0; i < 10; i++) {
+ try {
+ for (HRegion online: regions.values()) {
+ if (online.getRegionName().toString().startsWith(getName())) {
+ online.compactStores();
+ }
+ }
+ break;
+ } catch (ConcurrentModificationException e) {
+ LOG.warn("Retrying because ..." + e.toString() + " -- one or " +
+ "two should be fine");
+ continue;
+ }
+ }
+ }
+ }
+
+ // Now wait until parent disappears.
+ LOG.info("Waiting on parent " + parent.getRegionName() +
+ " to disappear");
+ for (int i = 0; i < timeout && getSplitParent(meta) != null; i++) {
+ Thread.sleep(1000);
+ }
+ assertTrue(getSplitParent(meta) == null);
+ // Assert cleaned up.
+ assertFalse(this.fs.exists(parentDir));
+ } finally {
+ MiniHBaseCluster.shutdown(masterThread, regionServerThreads);
+ }
+ }
+
+ private void assertGet(final HRegion r, final String family, final Text k)
+ throws IOException {
+ // Now I have k, get values out and assert they are as expected.
+ byte [][] results = r.get(k, new Text(family),
+ Integer.MAX_VALUE);
+ for (int j = 0; j < results.length; j++) {
+ Text tmp = new Text(results[j]);
+ // Row should be equal to value every time.
+ assertEquals(k.toString(), tmp.toString());
+ }
+ }
+
+ private HRegionInfo getSplitParent(final HTable t)
+ throws IOException {
+ HRegionInfo result = null;
+ HScannerInterface s = t.obtainScanner(HConstants.COL_REGIONINFO_ARRAY,
+ HConstants.EMPTY_START_ROW, System.currentTimeMillis(), null);
+ try {
+ HStoreKey curKey = new HStoreKey();
+ TreeMap<Text, byte []> curVals = new TreeMap<Text, byte []>();
+ while(s.next(curKey, curVals)) {
+ HRegionInfo hri = (HRegionInfo)Writables.
+ getWritable(curVals.get(HConstants.COL_REGIONINFO), new HRegionInfo());
+ // Assert that if region is a split region, that it is also offline.
+ // Otherwise, if not a split region, assert that it is online.
+ if (hri.isSplit() && hri.isOffline()) {
+ result = hri;
+ break;
+ }
+ }
+ return result;
+ } finally {
+ s.close();
+ }
+ }
+
+ /*
+ * Count of rows in table for given column.
+ * @param t
+ * @param column
+ * @return
+ * @throws IOException
+ */
+ private int count(final HTable t, final String column)
+ throws IOException {
+ int size = 0;
+ Text [] cols = new Text[] {new Text(column)};
+ HScannerInterface s = t.obtainScanner(cols, HConstants.EMPTY_START_ROW,
+ System.currentTimeMillis(), null);
+ try {
+ HStoreKey curKey = new HStoreKey();
+ TreeMap<Text, byte []> curVals = new TreeMap<Text, byte []>();
+ while(s.next(curKey, curVals)) {
+ size++;
+ }
+ return size;
+ } finally {
+ s.close();
+ }
+ }
+
+ /*
+ * Assert first value in the passed region is <code>firstValue</code>.
+ * @param r
+ * @param column
+ * @param firstValue
+ * @throws IOException
+ */
+ private void assertScan(final HRegion r, final String column,
+ final Text firstValue)
+ throws IOException {
+ Text [] cols = new Text[] {new Text(column)};
+ HInternalScannerInterface s = r.getScanner(cols,
+ HConstants.EMPTY_START_ROW, System.currentTimeMillis(), null);
+ try {
+ HStoreKey curKey = new HStoreKey();
+ TreeMap<Text, byte []> curVals = new TreeMap<Text, byte []>();
+ boolean first = true;
+ OUTER_LOOP: while(s.next(curKey, curVals)) {
+ for(Text col: curVals.keySet()) {
+ byte [] val = curVals.get(col);
+ Text curval = new Text(val);
+ if (first) {
+ first = false;
+ assertTrue(curval.compareTo(firstValue) == 0);
+ } else {
+ // Not asserting anything. Might as well break.
+ break OUTER_LOOP;
+ }
+ }
+ }
+ } finally {
+ s.close();
+ }
+ }
+
+ private HRegion [] split(final HRegion r) throws IOException {
+ Text midKey = new Text();
+ assertTrue(r.needsSplit(midKey));
+ // Assert can get mid key from passed region.
+ assertGet(r, COLFAMILY_NAME3, midKey);
+ HRegion [] regions = r.closeAndSplit(midKey, null);
+ assertEquals(regions.length, 2);
+ return regions;
+ }
+
+ private void addContent(final HRegion r, final String column)
+ throws IOException {
+ Text startKey = r.getRegionInfo().getStartKey();
+ Text endKey = r.getRegionInfo().getEndKey();
+ byte [] startKeyBytes = startKey.getBytes();
+ if (startKeyBytes == null || startKeyBytes.length == 0) {
+ startKeyBytes = new byte [] {FIRST_CHAR, FIRST_CHAR, FIRST_CHAR};
+ }
+ // Add rows of three characters. The first character starts with the
+ // 'a' character and runs up to 'z'. Per first character, we run the
+ // second character over same range. And same for the third so rows
+ // (and values) look like this: 'aaa', 'aab', 'aac', etc.
+ char secondCharStart = (char)startKeyBytes[1];
+ char thirdCharStart = (char)startKeyBytes[2];
+ EXIT_ALL_LOOPS: for (char c = (char)startKeyBytes[0]; c <= LAST_CHAR; c++) {
+ for (char d = secondCharStart; d <= LAST_CHAR; d++) {
+ for (char e = thirdCharStart; e <= LAST_CHAR; e++) {
+ byte [] bytes = new byte [] {(byte)c, (byte)d, (byte)e};
+ Text t = new Text(new String(bytes));
+ if (endKey != null && endKey.getLength() > 0
+ && endKey.compareTo(t) <= 0) {
+ break EXIT_ALL_LOOPS;
+ }
+ long lockid = r.startUpdate(t);
+ try {
+ r.put(lockid, new Text(column), bytes);
+ r.commit(lockid, System.currentTimeMillis());
+ lockid = -1;
+ } finally {
+ if (lockid != -1) {
+ r.abort(lockid);
+ }
+ }
+ }
+ // Set start character back to FIRST_CHAR after we've done first loop.
+ thirdCharStart = FIRST_CHAR;
+ }
+ secondCharStart = FIRST_CHAR;
+ }
+ }
+
+ // TODO: Have HTable and HRegion implement interface that has in it
+ // startUpdate, put, delete, commit, abort, etc.
+ private void addContent(final HTable table, final String column)
+ throws IOException {
+ byte [] startKeyBytes = new byte [] {FIRST_CHAR, FIRST_CHAR, FIRST_CHAR};
+ // Add rows of three characters. The first character starts with the
+ // 'a' character and runs up to 'z'. Per first character, we run the
+ // second character over same range. And same for the third so rows
+ // (and values) look like this: 'aaa', 'aab', 'aac', etc.
+ char secondCharStart = (char)startKeyBytes[1];
+ char thirdCharStart = (char)startKeyBytes[2];
+ for (char c = (char)startKeyBytes[0]; c <= LAST_CHAR; c++) {
+ for (char d = secondCharStart; d <= LAST_CHAR; d++) {
+ for (char e = thirdCharStart; e <= LAST_CHAR; e++) {
+ byte [] bytes = new byte [] {(byte)c, (byte)d, (byte)e};
+ Text t = new Text(new String(bytes));
+ long lockid = table.startBatchUpdate(t);
+ try {
+ table.put(lockid, new Text(column), bytes);
+ table.commit(lockid, System.currentTimeMillis());
+ lockid = -1;
+ } finally {
+ if (lockid != -1) {
+ table.abort(lockid);
+ }
+ }
+ }
+ // Set start character back to FIRST_CHAR after we've done first loop.
+ thirdCharStart = FIRST_CHAR;
+ }
+ secondCharStart = FIRST_CHAR;
+ }
+ }
+}
\ No newline at end of file
Modified: lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestTimestamp.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestTimestamp.java?view=diff&rev=564012&r1=564011&r2=564012
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestTimestamp.java (original)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestTimestamp.java Wed Aug 8 13:30:13 2007
@@ -107,7 +107,7 @@
// flush everything out to disk
- HRegionServer s = cluster.regionServers.get(0);
+ HRegionServer s = cluster.regionThreads.get(0).getRegionServer();
for(HRegion r: s.onlineRegions.values() ) {
r.flushcache(false);
}