You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ji...@apache.org on 2008/05/01 18:08:10 UTC

svn commit: r652563 - in /hadoop/hbase/branches/0.1: ./ src/test/org/apache/hadoop/hbase/ src/test/org/apache/hadoop/hbase/mapred/

Author: jimk
Date: Thu May  1 09:08:09 2008
New Revision: 652563

URL: http://svn.apache.org/viewvc?rev=652563&view=rev
Log:
HBASE-607 MultiRegionTable.makeMultiRegionTable is not deterministic enough for regression tests

M MultiRegionTable

  Make deterministic by creating the regions directly and not rely on
  the asychronous nature of cache flushes, compactions and splits. The
  regions are small, but the point of this class is to generate a
  table with multiple regions so we can test map / reduce, region
  onlining / offlining, etc.

  Removed PUNCTUATION from row keys. Not sure why it was there in the
  first place, other than perhaps to verify that a row key can have
  punctuation in it provided it is not the first character. This will
  become moot when row keys change from Text to byte[] anyways.

  Incorporate repeated code
{code}
    region.close();
    region.getLog().closeAndDelete();
{code}
  into private method closeRegionAndDeleteLog

M TestSplit

  extends HBaseTestCase instead of MultiRegionTable. It didn't use the
  output of MultiRegionTable, so all that work was just wasted by this
  test.

M TestTableIndex, TestTableMapReduce

  The only two tests that currently use MultiRegionTable. Minor
  modifications needed because MultiRegionTable now handles starting
  and stopping of the mini-DFS cluster. With the new MultiRegionTable
  class, if these tests fail now it will be because something they are
  testing has regressed and not because MultiRegionTable failed.


Modified:
    hadoop/hbase/branches/0.1/CHANGES.txt
    hadoop/hbase/branches/0.1/src/test/org/apache/hadoop/hbase/MultiRegionTable.java
    hadoop/hbase/branches/0.1/src/test/org/apache/hadoop/hbase/TestSplit.java
    hadoop/hbase/branches/0.1/src/test/org/apache/hadoop/hbase/mapred/TestTableIndex.java
    hadoop/hbase/branches/0.1/src/test/org/apache/hadoop/hbase/mapred/TestTableMapReduce.java

Modified: hadoop/hbase/branches/0.1/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.1/CHANGES.txt?rev=652563&r1=652562&r2=652563&view=diff
==============================================================================
--- hadoop/hbase/branches/0.1/CHANGES.txt (original)
+++ hadoop/hbase/branches/0.1/CHANGES.txt Thu May  1 09:08:09 2008
@@ -28,6 +28,8 @@
    HBASE-608   HRegionServer::getThisIP() checks hadoop config var for dns interface name
                (Jim R. Wilson via Stack)
    HBASE-609   Master doesn't see regionserver edits because of clock skew
+   HBASE-607   MultiRegionTable.makeMultiRegionTable is not deterministic enough
+               for regression tests
 
   IMPROVEMENTS
    HBASE-559   MR example job to count table rows

Modified: hadoop/hbase/branches/0.1/src/test/org/apache/hadoop/hbase/MultiRegionTable.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.1/src/test/org/apache/hadoop/hbase/MultiRegionTable.java?rev=652563&r1=652562&r2=652563&view=diff
==============================================================================
--- hadoop/hbase/branches/0.1/src/test/org/apache/hadoop/hbase/MultiRegionTable.java (original)
+++ hadoop/hbase/branches/0.1/src/test/org/apache/hadoop/hbase/MultiRegionTable.java Thu May  1 09:08:09 2008
@@ -20,330 +20,113 @@
 package org.apache.hadoop.hbase;
 
 import java.io.IOException;
-import java.util.ConcurrentModificationException;
-import java.util.Map;
-import java.util.SortedMap;
-import java.util.TreeMap;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.util.Writables;
+
+import org.apache.hadoop.dfs.MiniDFSCluster;
 import org.apache.hadoop.io.Text;
 
 /**
  * Utility class to build a table of multiple regions.
  */
 public class MultiRegionTable extends HBaseTestCase {
-  static final Log LOG = LogFactory.getLog(MultiRegionTable.class.getName());
+  private static final Text[] KEYS = {
+    null,
+    new Text("bbb"),
+    new Text("ccc"),
+    new Text("ddd"),
+    new Text("eee"),
+    new Text("fff"),
+    new Text("ggg"),
+    new Text("hhh"),
+    new Text("iii"),
+    new Text("jjj"),
+    new Text("kkk"),
+    new Text("lll"),
+    new Text("mmm"),
+    new Text("nnn"),
+    new Text("ooo"),
+    new Text("ppp"),
+    new Text("qqq"),
+    new Text("rrr"),
+    new Text("sss"),
+    new Text("ttt"),
+    new Text("uuu"),
+    new Text("vvv"),
+    new Text("www"),
+    new Text("xxx"),
+    new Text("yyy")
+  };
+  
+  protected final String columnName;
+  protected HTableDescriptor desc;
+  protected MiniDFSCluster dfsCluster = null;
 
   /**
-   * Make a multi-region table.  Presumption is that table already exists and
-   * that there is only one regionserver. Makes it multi-region by filling with
-   * data and provoking splits. Asserts parent region is cleaned up after its
-   * daughter splits release all references.
-   * @param conf
-   * @param cluster
-   * @param fs
-   * @param tableName
-   * @param columnName
-   * @throws IOException
+   * @param columnName the column to populate.
    */
-  @SuppressWarnings("null")
-  public static void makeMultiRegionTable(HBaseConfiguration conf,
-      MiniHBaseCluster cluster, FileSystem fs, String tableName,
-      String columnName) throws IOException {  
-    final int retries = 10; 
-    final long waitTime = 20L * 1000L;
+  public MultiRegionTable(final String columnName) {
+    super();
+    this.columnName = columnName;
+  }
+  
+  /** {@inheritDoc} */
+  @Override
+  public void setUp() throws Exception {
+    dfsCluster = new MiniDFSCluster(conf, 2, true, (String[])null);
+    // Set the hbase.rootdir to be the home directory in mini dfs.
+    this.conf.set(HConstants.HBASE_DIR,
+      this.dfsCluster.getFileSystem().getHomeDirectory().toString());
     
-    // This size should make it so we always split using the addContent
-    // below.  After adding all data, the first region is 1.3M. Should
-    // set max filesize to be <= 1M.
-    assertTrue(conf.getLong("hbase.hregion.max.filesize",
-      HConstants.DEFAULT_MAX_FILE_SIZE) <= 1024 * 1024);
-
-    assertNotNull(fs);
-    Path d = fs.makeQualified(new Path(conf.get(HConstants.HBASE_DIR)));
-
-    // Get connection on the meta table and get count of rows.
-    HTable meta = new HTable(conf, HConstants.META_TABLE_NAME);
-    int count = count(meta, tableName);
-    HTable t = new HTable(conf, new Text(tableName));
-    // Get the parent region here now.
-    HRegionInfo parent =
-      t.getRegionLocation(HConstants.EMPTY_START_ROW).getRegionInfo();
-    LOG.info("Parent region " + parent.toString());
-    Path parentDir = HRegion.getRegionDir(new Path(d, tableName),
-      parent.getEncodedName());
-    assertTrue(fs.exists(parentDir));
-    // Now add content.
-    addContent(new HTableIncommon(t), columnName);
-    LOG.info("Finished content loading");
+    // Note: we must call super.setUp after starting the mini cluster or
+    // we will end up with a local file system
     
-    // All is running in the one JVM so I should be able to get the single
-    // region instance and bring on a split. Presumption is that there is only
-    // one regionserver.   Of not, the split may already have happened by the
-    // time we got here.  If so, then the region found when we go searching
-    // with EMPTY_START_ROW will be one of the unsplittable daughters.
-    HRegionInfo hri = null;
-    HRegion r = null;
-    for (int i = 0; i < 30; i++) {
-      hri = t.getRegionLocation(HConstants.EMPTY_START_ROW).getRegionInfo();
-      LOG.info("Region location: " + hri);
-      r = cluster.getRegionThreads().get(0).getRegionServer().
-        onlineRegions.get(hri.getRegionName());
-      if (r != null) {
-        break;
-      }
-      try {
-        Thread.sleep(1000);
-      } catch (InterruptedException e) {
-        LOG.warn("Waiting on region to come online", e);
-      }
-    }
+    super.setUp();
 
-    // Flush the cache
-    cluster.getRegionThreads().get(0).getRegionServer().
-      getFlushRequester().request(r);
-
-    // Now, wait until split makes it into the meta table.
-    int oldCount = count;
-    for (int i = 0; i < retries;  i++) {
-      count = count(meta, tableName);
-      if (count > oldCount) {
-        break;
-      }
-      try {
-        Thread.sleep(waitTime);
-      } catch (InterruptedException e) {
-        // continue
-      }
-    }
-    if (count <= oldCount) {
-      throw new IOException("Failed waiting on splits to show up");
-    }
-    
-    // Get info on the parent from the meta table.  Pass in 'hri'. Its the
-    // region we have been dealing with up to this. Its the parent of the
-    // region split.
-    Map<Text, byte []> data = getSplitParentInfo(meta, parent);
-    if (data == null) {
-      // We changed stuff so daughters get cleaned up much faster now.  Can
-      // run so fast, parent has been deleted by time we get to here.
-    } else {
-      parent  = Writables.getHRegionInfoOrNull(data.get(HConstants.COL_REGIONINFO));
-      LOG.info("Found parent region: " + parent);
-      assertTrue(parent.isOffline());
-      assertTrue(parent.isSplit());
-      HRegionInfo splitA =
-        Writables.getHRegionInfoOrNull(data.get(HConstants.COL_SPLITA));
-      HRegionInfo splitB =
-        Writables.getHRegionInfoOrNull(data.get(HConstants.COL_SPLITB));
-      assertTrue(fs.exists(parentDir));
-      LOG.info("Split happened. Parent is " + parent.getRegionName());
-
-      // Recalibrate will cause us to wait on new regions' deployment
-      recalibrate(t, new Text(columnName), retries, waitTime);
-
-      if (splitA == null) {
-        LOG.info("splitA was already null. Assuming it was previously compacted.");
-      } else {
-        LOG.info("Daughter splitA: " + splitA.getRegionName());
-        // Compact a region at a time so we can test case where one region has
-        // no references but the other still has some
-        compact(cluster, splitA);
-        
-        // Wait till the parent only has reference to remaining split, one that
-        // still has references.
-        while (true) {
-          data = getSplitParentInfo(meta, parent);
-          if (data == null || data.size() == 3) {
-            try {
-              Thread.sleep(waitTime);
-            } catch (InterruptedException e) {
-              // continue
-            }
-            continue;
-          }
-          break;
-        }
-        LOG.info("Parent split info returned " + data.keySet().toString());
+    try {
+      // Create a bunch of regions
+
+      HRegion[] regions = new HRegion[KEYS.length];
+      for (int i = 0; i < regions.length; i++) {
+        int j = (i + 1) % regions.length;
+        regions[i] = createARegion(KEYS[i], KEYS[j]);
       }
 
-      if (splitB == null) {
-        LOG.info("splitB was already null. Assuming it was previously compacted.");
-      } else {
-        LOG.info("Daughter splitB: " + splitA.getRegionName());
+      // Now create the root and meta regions and insert the data regions
+      // created above into the meta
 
-        // Call second split.
-        compact(cluster, splitB);
-      }
-      // Now wait until parent disappears.    
-      LOG.info("Waiting on parent " + parent.getRegionName() + " to disappear");
-      for (int i = 0; i < retries; i++) {
-        if (getSplitParentInfo(meta, parent) == null) {
-          break;
-        }
-        
-        try {
-          Thread.sleep(waitTime);
-        } catch (InterruptedException e) {
-          // continue
-        }
-      }
-      assertNull(getSplitParentInfo(meta, parent));
-    }
+      HRegion root = HRegion.createHRegion(HRegionInfo.rootRegionInfo,
+          testDir, this.conf);
+      HRegion meta = HRegion.createHRegion(HRegionInfo.firstMetaRegionInfo,
+          testDir, this.conf);
+      HRegion.addRegionToMETA(root, meta);
 
-    // Assert cleaned up.
-    
-    for (int i = 0; i < retries; i++) {
-      if (!fs.exists(parentDir)) {
-        break;
+      for(int i = 0; i < regions.length; i++) {
+        HRegion.addRegionToMETA(meta, regions[i]);
       }
-      try {
-        Thread.sleep(waitTime);
-      } catch (InterruptedException e) {
-        // continue
-      }
-    }
-    assertFalse(fs.exists(parentDir));
-  }
 
-  /*
-   * Count of regions in passed meta table.
-   * @param t
-   * @param column
-   * @return
-   * @throws IOException
-   */
-  private static int count(final HTable t, final String tableName)
-    throws IOException {
-    
-    int size = 0;
-    Text [] cols = new Text[] {HConstants.COLUMN_FAMILY};
-    HScannerInterface s = t.obtainScanner(cols, HConstants.EMPTY_START_ROW,
-      HConstants.LATEST_TIMESTAMP, null);
-    try {
-      HStoreKey curKey = new HStoreKey();
-      TreeMap<Text, byte []> curVals = new TreeMap<Text, byte []>();
-      while(s.next(curKey, curVals)) {
-        HRegionInfo hri = Writables.
-          getHRegionInfoOrNull(curVals.get(HConstants.COL_REGIONINFO));
-        if (hri.getTableDesc().getName().toString().equals(tableName)) {
-          size++;
-        }
-      }
-      return size;
-    } finally {
-      if (s != null) {
-        s.close();
-      }
+      closeRegionAndDeleteLog(root);
+      closeRegionAndDeleteLog(meta);
+    } catch (Exception e) {
+      StaticTestEnvironment.shutdownDfs(dfsCluster);
+      throw e;
     }
   }
 
-  /*
-   * @return Return row info for passed in region or null if not found in scan.
-   */
-  private static Map<Text, byte []> getSplitParentInfo(final HTable t,
-      final HRegionInfo parent)
-  throws IOException {  
-    HScannerInterface s = t.obtainScanner(HConstants.COLUMN_FAMILY_ARRAY,
-        HConstants.EMPTY_START_ROW, HConstants.LATEST_TIMESTAMP, null);
-    try {
-      HStoreKey curKey = new HStoreKey();
-      TreeMap<Text, byte []> curVals = new TreeMap<Text, byte []>();
-      while(s.next(curKey, curVals)) {
-        HRegionInfo hri = Writables.
-          getHRegionInfoOrNull(curVals.get(HConstants.COL_REGIONINFO));
-        if (hri == null) {
-          continue;
-        }
-        // Make sure I get the parent.
-        if (hri.getRegionName().toString().
-            equals(parent.getRegionName().toString()) &&
-              hri.getRegionId() == parent.getRegionId()) {
-          return curVals;
-        }
-      }
-      return null;
-    } finally {
-      s.close();
-    }   
+  /** {@inheritDoc} */
+  @Override
+  public void tearDown() throws Exception {
+    super.tearDown();
+    StaticTestEnvironment.shutdownDfs(dfsCluster);
   }
 
-  /*
-   * Recalibrate passed in HTable.  Run after change in region geography.
-   * Open a scanner on the table. This will force HTable to recalibrate
-   * and in doing so, will force us to wait until the new child regions
-   * come on-line (since they are no longer automatically served by the 
-   * HRegionServer that was serving the parent. In this test they will
-   * end up on the same server (since there is only one), but we have to
-   * wait until the master assigns them. 
-   * @param t
-   * @param retries
-   */
-  private static void recalibrate(final HTable t, final Text column,
-      final int retries, final long waitTime) throws IOException {
-    
-    for (int i = 0; i < retries; i++) {
-      try {
-        HScannerInterface s =
-          t.obtainScanner(new Text[] {column}, HConstants.EMPTY_START_ROW);
-        try {
-          HStoreKey key = new HStoreKey();
-          TreeMap<Text, byte[]> results = new TreeMap<Text, byte[]>();
-          s.next(key, results);
-          break;
-        } finally {
-          s.close();
-        }
-      } catch (NotServingRegionException x) {
-        System.out.println("it's alright");
-        try {
-          Thread.sleep(waitTime);
-        } catch (InterruptedException e) {
-          // continue
-        }
-      }
-    }
+  private HRegion createARegion(Text startKey, Text endKey) throws IOException {
+    HRegion region = createNewHRegion(desc, startKey, endKey);
+    addContent(region, this.columnName);
+    closeRegionAndDeleteLog(region);
+    return region;
   }
-
-  /*
-   * Compact the passed in region <code>r</code>. 
-   * @param cluster
-   * @param r
-   * @throws IOException
-   */
-  protected static void compact(final MiniHBaseCluster cluster,
-      final HRegionInfo r)
-  throws IOException {
-    if (r == null) {
-      LOG.debug("Passed region is null");
-      return;
-    }
-    LOG.info("Starting compaction");
-    for (LocalHBaseCluster.RegionServerThread thread:
-        cluster.getRegionThreads()) {
-      SortedMap<Text, HRegion> regions = thread.getRegionServer().onlineRegions;
-      
-      // Retry if ConcurrentModification... alternative of sync'ing is not
-      // worth it for sake of unit test.
-      for (int i = 0; i < 10; i++) {
-        try {
-          for (HRegion online: regions.values()) {
-            if (online.getRegionName().toString().
-                equals(r.getRegionName().toString())) {
-              online.compactStores();
-            }
-          }
-          break;
-        } catch (ConcurrentModificationException e) {
-          LOG.warn("Retrying because ..." + e.toString() + " -- one or " +
-          "two should be fine");
-          continue;
-        }
-      }
-    }
+  
+  private void closeRegionAndDeleteLog(HRegion region) throws IOException {
+    region.close();
+    region.getLog().closeAndDelete();
   }
 }
\ No newline at end of file

Modified: hadoop/hbase/branches/0.1/src/test/org/apache/hadoop/hbase/TestSplit.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.1/src/test/org/apache/hadoop/hbase/TestSplit.java?rev=652563&r1=652562&r2=652563&view=diff
==============================================================================
--- hadoop/hbase/branches/0.1/src/test/org/apache/hadoop/hbase/TestSplit.java (original)
+++ hadoop/hbase/branches/0.1/src/test/org/apache/hadoop/hbase/TestSplit.java Thu May  1 09:08:09 2008
@@ -33,9 +33,9 @@
  * {@Link TestHRegion} does a split but this TestCase adds testing of fast
  * split and manufactures odd-ball split scenarios.
  */
-public class TestSplit extends MultiRegionTable {
+public class TestSplit extends HBaseTestCase {
   @SuppressWarnings("hiding")
-  static final Log LOG = LogFactory.getLog(TestSplit.class.getName());
+  static final Log LOG = LogFactory.getLog(TestSplit.class);
   
   /** constructor */
   public TestSplit() {

Modified: hadoop/hbase/branches/0.1/src/test/org/apache/hadoop/hbase/mapred/TestTableIndex.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.1/src/test/org/apache/hadoop/hbase/mapred/TestTableIndex.java?rev=652563&r1=652562&r2=652563&view=diff
==============================================================================
--- hadoop/hbase/branches/0.1/src/test/org/apache/hadoop/hbase/mapred/TestTableIndex.java (original)
+++ hadoop/hbase/branches/0.1/src/test/org/apache/hadoop/hbase/mapred/TestTableIndex.java Thu May  1 09:08:09 2008
@@ -30,10 +30,8 @@
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.dfs.MiniDFSCluster;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HBaseAdmin;
 import org.apache.hadoop.hbase.HColumnDescriptor;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HRegion;
@@ -43,7 +41,6 @@
 import org.apache.hadoop.hbase.HTableDescriptor;
 import org.apache.hadoop.hbase.MiniHBaseCluster;
 import org.apache.hadoop.hbase.MultiRegionTable;
-import org.apache.hadoop.hbase.StaticTestEnvironment;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.JobClient;
 import org.apache.hadoop.mapred.JobConf;
@@ -75,76 +72,39 @@
     TEXT_OUTPUT_COLUMN
   };
 
-
-  private HTableDescriptor desc;
-
-  private MiniDFSCluster dfsCluster = null;
-  private Path dir;
   private MiniHBaseCluster hCluster = null;
 
+  /** constructor */
+  public TestTableIndex() {
+    super(INPUT_COLUMN);
+    desc = new HTableDescriptor(TABLE_NAME);
+    desc.addFamily(new HColumnDescriptor(INPUT_COLUMN));
+    desc.addFamily(new HColumnDescriptor(OUTPUT_COLUMN));
+  }
+
   /** {@inheritDoc} */
   @Override
   public void setUp() throws Exception {
     // Enable DEBUG-level MR logging.
     Logger.getLogger("org.apache.hadoop.mapred").setLevel(Level.DEBUG);
-    
-    // Make sure the cache gets flushed so we trigger a compaction(s) and
-    // hence splits.
-    conf.setInt("hbase.hregion.memcache.flush.size", 1024 * 1024);
-    
-    // This size should make it so we always split using the addContent
-    // below. After adding all data, the first region is 1.3M
-    conf.setLong("hbase.hregion.max.filesize", 1024 * 1024);
-
-    desc = new HTableDescriptor(TABLE_NAME);
-    desc.addFamily(new HColumnDescriptor(INPUT_COLUMN));
-    desc.addFamily(new HColumnDescriptor(OUTPUT_COLUMN));
-
-    dfsCluster = new MiniDFSCluster(conf, 1, true, (String[]) null);
-    // Set the hbase.rootdir to be the home directory in mini dfs.
-    this.conf.set(HConstants.HBASE_DIR,
-      this.dfsCluster.getFileSystem().getHomeDirectory().toString());
 
     // Must call super.setUp after mini dfs cluster is started or else
     // filesystem ends up being local
     
     super.setUp();
 
-    try {
-      dir = new Path("/hbase");
-      fs.mkdirs(dir);
-
-      // Start up HBase cluster
-      hCluster = new MiniHBaseCluster(conf, 1, dfsCluster, true);
-
-      // Create a table.
-      HBaseAdmin admin = new HBaseAdmin(conf);
-      admin.createTable(desc);
-
-      // Populate a table into multiple regions
-      makeMultiRegionTable(conf, hCluster, this.fs, TABLE_NAME, INPUT_COLUMN);
-
-      // Verify table indeed has multiple regions
-      HTable table = new HTable(conf, new Text(TABLE_NAME));
-      Text[] startKeys = table.getStartKeys();
-      assertTrue(startKeys.length > 1);
-    } catch (Exception e) {
-      StaticTestEnvironment.shutdownDfs(dfsCluster);
-      throw e;
-    }
+    // Start up HBase cluster
+    hCluster = new MiniHBaseCluster(conf, 1, dfsCluster, true);
     LOG.debug("\n\n\n\n\t\t\tSetup Complete\n\n\n\n");
   }
 
   /** {@inheritDoc} */
   @Override
   public void tearDown() throws Exception {
-    super.tearDown();
-
     if (hCluster != null) {
       hCluster.shutdown();
     }
-
-    StaticTestEnvironment.shutdownDfs(dfsCluster);
+    super.tearDown();
   }
 
   /**
@@ -318,6 +278,7 @@
         scanner.close();
     }
   }
+
   /**
    * @param args unused
    */

Modified: hadoop/hbase/branches/0.1/src/test/org/apache/hadoop/hbase/mapred/TestTableMapReduce.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.1/src/test/org/apache/hadoop/hbase/mapred/TestTableMapReduce.java?rev=652563&r1=652562&r2=652563&view=diff
==============================================================================
--- hadoop/hbase/branches/0.1/src/test/org/apache/hadoop/hbase/mapred/TestTableMapReduce.java (original)
+++ hadoop/hbase/branches/0.1/src/test/org/apache/hadoop/hbase/mapred/TestTableMapReduce.java Thu May  1 09:08:09 2008
@@ -20,15 +20,11 @@
 package org.apache.hadoop.hbase.mapred;
 
 import java.io.IOException;
-import java.io.UnsupportedEncodingException;
 import java.util.Map;
 import java.util.TreeMap;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.dfs.MiniDFSCluster;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hbase.HBaseAdmin;
 import org.apache.hadoop.hbase.HColumnDescriptor;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HScannerInterface;
@@ -37,7 +33,6 @@
 import org.apache.hadoop.hbase.HTableDescriptor;
 import org.apache.hadoop.hbase.MiniHBaseCluster;
 import org.apache.hadoop.hbase.MultiRegionTable;
-import org.apache.hadoop.hbase.StaticTestEnvironment;
 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
 import org.apache.hadoop.io.MapWritable;
 import org.apache.hadoop.io.Text;
@@ -55,7 +50,6 @@
   private static final Log LOG =
     LogFactory.getLog(TestTableMapReduce.class.getName());
 
-  static final String SINGLE_REGION_TABLE_NAME = "srtest";
   static final String MULTI_REGION_TABLE_NAME = "mrtest";
   static final String INPUT_COLUMN = "contents:";
   static final Text TEXT_INPUT_COLUMN = new Text(INPUT_COLUMN);
@@ -67,46 +61,14 @@
     TEXT_OUTPUT_COLUMN
   };
 
-  private MiniDFSCluster dfsCluster = null;
-  private Path dir;
   private MiniHBaseCluster hCluster = null;
   
-  private static byte[][] values = null;
-  
-  static {
-    try {
-      values = new byte[][] {
-          "0123".getBytes(HConstants.UTF8_ENCODING),
-          "abcd".getBytes(HConstants.UTF8_ENCODING),
-          "wxyz".getBytes(HConstants.UTF8_ENCODING),
-          "6789".getBytes(HConstants.UTF8_ENCODING)
-      };
-    } catch (UnsupportedEncodingException e) {
-      fail();
-    }
-  }
-  
   /** constructor */
   public TestTableMapReduce() {
-    super();
-    
-    // Make sure the cache gets flushed so we trigger a compaction(s) and
-    // hence splits.
-    conf.setInt("hbase.hregion.memcache.flush.size", 1024 * 1024);
-
-    // Always compact if there is more than one store file.
-    conf.setInt("hbase.hstore.compactionThreshold", 2);
-
-    // This size should make it so we always split using the addContent
-    // below. After adding all data, the first region is 1.3M
-    conf.setLong("hbase.hregion.max.filesize", 256 * 1024);
-
-    // Make lease timeout longer, lease checks less frequent
-    conf.setInt("hbase.master.lease.period", 10 * 1000);
-    conf.setInt("hbase.master.lease.thread.wakefrequency", 5 * 1000);
-    
-    // Set client pause to the original default
-    conf.setInt("hbase.client.pause", 10 * 1000);
+    super(INPUT_COLUMN);
+    desc = new HTableDescriptor(MULTI_REGION_TABLE_NAME);
+    desc.addFamily(new HColumnDescriptor(INPUT_COLUMN));
+    desc.addFamily(new HColumnDescriptor(OUTPUT_COLUMN));
   }
 
   /**
@@ -114,27 +76,12 @@
    */
   @Override
   public void setUp() throws Exception {
-    dfsCluster = new MiniDFSCluster(conf, 1, true, (String[])null);
-    // Set the hbase.rootdir to be the home directory in mini dfs.
-    this.conf.set(HConstants.HBASE_DIR,
-      this.dfsCluster.getFileSystem().getHomeDirectory().toString());
-
-    // Must call super.setup() after starting mini dfs cluster. Otherwise
-    // we get a local file system instead of hdfs
-    
     super.setUp();
-    try {
-      dir = new Path("/hbase");
-      fs.mkdirs(dir);
-      // Start up HBase cluster
-      // Only one region server.  MultiRegionServer manufacturing code below
-      // depends on there being one region server only.
-      hCluster = new MiniHBaseCluster(conf, 1, dfsCluster, true);
-      LOG.info("Master is at " + this.conf.get(HConstants.MASTER_ADDRESS));
-    } catch (Exception e) {
-      StaticTestEnvironment.shutdownDfs(dfsCluster);
-      throw e;
-    }
+    // Start up HBase cluster
+    // Only one region server.  MultiRegionServer manufacturing code below
+    // depends on there being one region server only.
+    hCluster = new MiniHBaseCluster(conf, 1, dfsCluster, true);
+    LOG.info("Master is at " + this.conf.get(HConstants.MASTER_ADDRESS));
   }
 
   /**
@@ -142,11 +89,10 @@
    */
   @Override
   public void tearDown() throws Exception {
-    super.tearDown();
     if(hCluster != null) {
       hCluster.shutdown();
     }
-    StaticTestEnvironment.shutdownDfs(dfsCluster);
+    super.tearDown();
   }
 
   /**
@@ -156,7 +102,7 @@
     /**
      * Pass the key, and reversed value to reduce
      *
-     * @see org.apache.hadoop.hbase.mapred.TableMap#map(org.apache.hadoop.hbase.HStoreKey, org.apache.hadoop.io.MapWritable, org.apache.hadoop.hbase.mapred.TableOutputCollector, org.apache.hadoop.mapred.Reporter)
+     * @see org.apache.hadoop.hbase.mapred.TableMap#map(org.apache.hadoop.hbase.HStoreKey, org.apache.hadoop.io.MapWritable, org.apache.hadoop.mapred.OutputCollector, org.apache.hadoop.mapred.Reporter)
      */
     @SuppressWarnings("unchecked")
     @Override
@@ -197,154 +143,31 @@
   }
   
   /**
-   * Test hbase mapreduce jobs against single region and multi-region tables.
+   * Test hbase mapreduce jobs against a multi-region table.
    * @throws IOException
    */
   public void testTableMapReduce() throws IOException {
-    localTestSingleRegionTable();
-    localTestMultiRegionTable();
-  }
-
-  /*
-   * Test against a single region.
-   * @throws IOException
-   */
-  private void localTestSingleRegionTable() throws IOException {
-    HTableDescriptor desc = new HTableDescriptor(SINGLE_REGION_TABLE_NAME);
-    desc.addFamily(new HColumnDescriptor(INPUT_COLUMN));
-    desc.addFamily(new HColumnDescriptor(OUTPUT_COLUMN));
-    
-    // Create a table.
-    HBaseAdmin admin = new HBaseAdmin(this.conf);
-    admin.createTable(desc);
-
-    // insert some data into the test table
-    HTable table = new HTable(conf, new Text(SINGLE_REGION_TABLE_NAME));
+    @SuppressWarnings("deprecation")
+    MiniMRCluster mrCluster = new MiniMRCluster(2, fs.getUri().toString(), 1);
 
     try {
-      for(int i = 0; i < values.length; i++) {
-        long lockid = table.startUpdate(new Text("row_"
-            + String.format("%1$05d", i)));
-
-        try {
-          table.put(lockid, TEXT_INPUT_COLUMN, values[i]);
-          table.commit(lockid, System.currentTimeMillis());
-          lockid = -1;
-        } finally {
-          if (lockid != -1)
-            table.abort(lockid);
-        }
-      }
-
-      LOG.info("Print table contents before map/reduce for " +
-        SINGLE_REGION_TABLE_NAME);
-      scanTable(SINGLE_REGION_TABLE_NAME, true);
-
-      @SuppressWarnings("deprecation")
-      MiniMRCluster mrCluster = new MiniMRCluster(2, fs.getUri().toString(), 1);
-
-      try {
-        JobConf jobConf = new JobConf(conf, TestTableMapReduce.class);
-        jobConf.setJobName("process column contents");
-        jobConf.setNumMapTasks(1);
-        jobConf.setNumReduceTasks(1);
-
-        TableMap.initJob(SINGLE_REGION_TABLE_NAME, INPUT_COLUMN, 
-            ProcessContentsMapper.class, jobConf);
-
-        TableReduce.initJob(SINGLE_REGION_TABLE_NAME,
-            IdentityTableReduce.class, jobConf);
-        LOG.info("Started " + SINGLE_REGION_TABLE_NAME);
-        JobClient.runJob(jobConf);
-
-        LOG.info("Print table contents after map/reduce for " +
-          SINGLE_REGION_TABLE_NAME);
-      scanTable(SINGLE_REGION_TABLE_NAME, true);
+      JobConf jobConf = new JobConf(conf, TestTableMapReduce.class);
+      jobConf.setJobName("process column contents");
+      jobConf.setNumMapTasks(2);
+      jobConf.setNumReduceTasks(1);
+
+      TableMap.initJob(MULTI_REGION_TABLE_NAME, INPUT_COLUMN, 
+          ProcessContentsMapper.class, jobConf);
+
+      TableReduce.initJob(MULTI_REGION_TABLE_NAME,
+          IdentityTableReduce.class, jobConf);
+      LOG.info("Started " + MULTI_REGION_TABLE_NAME);
+      JobClient.runJob(jobConf);
 
       // verify map-reduce results
-      verify(SINGLE_REGION_TABLE_NAME);
-      } finally {
-        mrCluster.shutdown();
-      }
-    } finally {
-      table.close();
-    }
-  }
-  
-  /*
-   * Test against multiple regions.
-   * @throws IOException
-   */
-  private void localTestMultiRegionTable() throws IOException {
-    HTableDescriptor desc = new HTableDescriptor(MULTI_REGION_TABLE_NAME);
-    desc.addFamily(new HColumnDescriptor(INPUT_COLUMN));
-    desc.addFamily(new HColumnDescriptor(OUTPUT_COLUMN));
-    
-    // Create a table.
-    HBaseAdmin admin = new HBaseAdmin(this.conf);
-    admin.createTable(desc);
-
-    // Populate a table into multiple regions
-    makeMultiRegionTable(conf, hCluster, fs, MULTI_REGION_TABLE_NAME,
-        INPUT_COLUMN);
-    
-    // Verify table indeed has multiple regions
-    HTable table = new HTable(conf, new Text(MULTI_REGION_TABLE_NAME));
-    try {
-      Text[] startKeys = table.getStartKeys();
-      assertTrue(startKeys.length > 1);
-
-      @SuppressWarnings("deprecation")
-      MiniMRCluster mrCluster = new MiniMRCluster(2, fs.getUri().toString(), 1);
-
-      try {
-        JobConf jobConf = new JobConf(conf, TestTableMapReduce.class);
-        jobConf.setJobName("process column contents");
-        jobConf.setNumMapTasks(2);
-        jobConf.setNumReduceTasks(1);
-
-        TableMap.initJob(MULTI_REGION_TABLE_NAME, INPUT_COLUMN, 
-            ProcessContentsMapper.class, jobConf);
-
-        TableReduce.initJob(MULTI_REGION_TABLE_NAME,
-            IdentityTableReduce.class, jobConf);
-        LOG.info("Started " + MULTI_REGION_TABLE_NAME);
-        JobClient.runJob(jobConf);
-
-        // verify map-reduce results
-        verify(MULTI_REGION_TABLE_NAME);
-      } finally {
-        mrCluster.shutdown();
-      }
-    } finally {
-      table.close();
-    }
-  }
-
-  private void scanTable(String tableName, boolean printValues)
-  throws IOException {
-    HTable table = new HTable(conf, new Text(tableName));
-    
-    HScannerInterface scanner =
-      table.obtainScanner(columns, HConstants.EMPTY_START_ROW);
-    
-    try {
-      HStoreKey key = new HStoreKey();
-      TreeMap<Text, byte[]> results = new TreeMap<Text, byte[]>();
-      
-      while(scanner.next(key, results)) {
-        if (printValues) {
-          LOG.info("row: " + key.getRow());
-
-          for(Map.Entry<Text, byte[]> e: results.entrySet()) {
-            LOG.info(" column: " + e.getKey() + " value: "
-                + new String(e.getValue(), HConstants.UTF8_ENCODING));
-          }
-        }
-      }
-      
+      verify(MULTI_REGION_TABLE_NAME);
     } finally {
-      scanner.close();
+      mrCluster.shutdown();
     }
   }