You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ji...@apache.org on 2007/06/03 07:09:22 UTC

svn commit: r543841 - in /lucene/hadoop/trunk/src/contrib/hbase: ./ src/java/org/apache/hadoop/hbase/ src/test/org/apache/hadoop/hbase/

Author: jimk
Date: Sat Jun  2 22:09:21 2007
New Revision: 543841

URL: http://svn.apache.org/viewvc?view=rev&rev=543841
Log:
HADOOP-1391. Part 2 - table compaction via merging adjacent regions that have shrunk.

Added:
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMerge.java
    lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestMerge.java
Modified:
    lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HClient.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMasterInterface.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegion.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegionInfo.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegionServer.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HTableDescriptor.java
    lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/MiniHBaseCluster.java

Modified: lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt?view=diff&rev=543841&r1=543840&r2=543841
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt (original)
+++ lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt Sat Jun  2 22:09:21 2007
@@ -20,3 +20,5 @@
  10. HADOOP-1430. HBase shutdown leaves regionservers up.
  11. HADOOP-1392. Part1: includes create/delete table; enable/disable table;
      add/remove column.
+ 12. HADOOP-1392. Part2: includes table compaction by merging adjacent regions
+     that have shrunk in size.

Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HClient.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HClient.java?view=diff&rev=543841&r1=543840&r2=543841
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HClient.java (original)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HClient.java Sat Jun  2 22:09:21 2007
@@ -15,7 +15,6 @@
  */
 package org.apache.hadoop.hbase;
 
-import java.lang.Class;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
@@ -131,7 +130,7 @@
   }
   
   /* Find the address of the master and connect to it */
-  private void checkMaster() throws IOException {
+  private void checkMaster() throws MasterNotRunningException {
     if (this.master != null) {
       return;
     }
@@ -175,6 +174,21 @@
   //////////////////////////////////////////////////////////////////////////////
 
   /**
+   * @return - true if the master server is running
+   */
+  public boolean isMasterRunning() {
+    if(this.master == null) {
+      try {
+        checkMaster();
+        
+      } catch(MasterNotRunningException e) {
+        return false;
+      }
+    }
+    return true;
+  }
+  
+  /**
    * Creates a new table
    * 
    * @param desc - table descriptor for table
@@ -303,10 +317,6 @@
     } catch(RemoteException e) {
       handleRemoteException(e);
     }
-  }
-  
-  public synchronized void mergeRegions(Text regionName1, Text regionName2) throws IOException {
-    
   }
   
   public synchronized void enableTable(Text tableName) throws IOException {

Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java?view=diff&rev=543841&r1=543840&r2=543841
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java (original)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java Sat Jun  2 22:09:21 2007
@@ -576,13 +576,17 @@
     if(! fs.exists(rootRegionDir)) {
       LOG.info("bootstrap: creating ROOT and first META regions");
       try {
-        HRegion root = createNewHRegion(HGlobals.rootTableDesc, 0L);
-        HRegion meta = createNewHRegion(HGlobals.metaTableDesc, 1L);
+        HRegion root = HRegion.createNewHRegion(fs, dir, conf, 
+            HGlobals.rootTableDesc, 0L, null, null);
+        HRegion meta = HRegion.createNewHRegion(fs, dir, conf,
+            HGlobals.metaTableDesc, 1L, null, null);
       
-        addTableToMeta(root, meta);
+        HRegion.addRegionToMeta(root, meta);
         
         root.close();
+        root.getLog().close();
         meta.close();
+        meta.getLog().close();
         
       } catch(IOException e) {
         LOG.error(e);
@@ -1621,7 +1625,8 @@
 
         // 2. Create the HRegion
 
-        HRegion r = createNewHRegion(desc, newRegion.regionId);
+        HRegion r = HRegion.createNewHRegion(fs, dir, conf, desc,
+            newRegion.regionId, null, null);
 
         // 3. Insert into meta
 
@@ -1659,53 +1664,6 @@
     }
   }
 
-  /**
-   * Internal method to create a new HRegion. Used by createTable and by the
-   * bootstrap code in the HMaster constructor
-   * 
-   * @param desc        - table descriptor
-   * @param regionId    - region id
-   * @return            - new HRegion
-   * 
-   * @throws IOException
-   */
-  private HRegion createNewHRegion(HTableDescriptor desc, long regionId) 
-      throws IOException {
-    
-    HRegionInfo info = new HRegionInfo(regionId, desc, null, null);
-    Path regionDir = HStoreFile.getHRegionDir(dir, info.regionName);
-    fs.mkdirs(regionDir);
-
-    return new HRegion(dir,
-      new HLog(fs, new Path(regionDir, HREGION_LOGDIR_NAME), conf),
-      fs, conf, info, null, null);
-  }
-  
-  /**
-   * Inserts a new table's meta information into the meta table. Used by
-   * the HMaster bootstrap code.
-   * 
-   * @param meta                - HRegion to be updated
-   * @param table               - HRegion of new table
-   * 
-   * @throws IOException
-   */
-  private void addTableToMeta(HRegion meta, HRegion table) throws IOException {
-    
-    // The row key is the region name
-    
-    long writeid = meta.startUpdate(table.getRegionName());
-    
-    ByteArrayOutputStream bytes = new ByteArrayOutputStream();
-    DataOutputStream s = new DataOutputStream(bytes);
-
-    table.getRegionInfo().write(s);
-    
-    meta.put(writeid, COL_REGIONINFO, new BytesWritable(bytes.toByteArray()));
-    
-    meta.commit(writeid);
-  }
-  
   /* (non-Javadoc)
    * @see org.apache.hadoop.hbase.HMasterInterface#deleteTable(org.apache.hadoop.io.Text)
    */
@@ -1731,13 +1689,6 @@
   }
   
   /* (non-Javadoc)
-   * @see org.apache.hadoop.hbase.HMasterInterface#mergeRegions(org.apache.hadoop.io.Text, org.apache.hadoop.io.Text)
-   */
-  public void mergeRegions(Text regionName1, Text regionName2) throws IOException {
-    //TODO
-  }
-  
-  /* (non-Javadoc)
    * @see org.apache.hadoop.hbase.HMasterInterface#enableTable(org.apache.hadoop.io.Text)
    */
   public void enableTable(Text tableName) throws IOException {
@@ -1941,7 +1892,7 @@
     protected abstract void postProcessMeta(MetaRegion m, 
         HRegionInterface server) throws IOException;
   }
-  
+
   private class ChangeTableState extends TableOperation {
     private boolean online;
     

Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMasterInterface.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMasterInterface.java?view=diff&rev=543841&r1=543840&r2=543841
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMasterInterface.java (original)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMasterInterface.java Sat Jun  2 22:09:21 2007
@@ -44,8 +44,6 @@
   public void addColumn(Text tableName, HColumnDescriptor column) throws IOException;
   public void deleteColumn(Text tableName, Text columnName) throws IOException;
   
-  public void mergeRegions(Text regionName1, Text regionName2) throws IOException;
-  
   public void enableTable(Text tableName) throws IOException;
   public void disableTable(Text tableName) throws IOException;
   

Added: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMerge.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMerge.java?view=auto&rev=543841
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMerge.java (added)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMerge.java Sat Jun  2 22:09:21 2007
@@ -0,0 +1,418 @@
+/**
+ * Copyright 2007 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase;
+
+import java.io.ByteArrayOutputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.NoSuchElementException;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.DataInputBuffer;
+import org.apache.hadoop.io.Text;
+
+public class HMerge implements HConstants {
+  private static final Log LOG = LogFactory.getLog(HMerge.class);
+  private static final Text[] META_COLS = {COL_REGIONINFO};
+  
+  private HMerge() {}                           // Not instantiable
+  
+  /**
+   * Scans the table and merges two adjacent regions if they are small. This
+   * only happens when a lot of rows are deleted.
+   * 
+   * When merging the META region, the HBase instance must be offline.
+   * When merging a normal table, the HBase instance must be online, but the
+   * table must be disabled. 
+   * 
+   * @param conf        - configuration object for HBase
+   * @param fs          - FileSystem where regions reside
+   * @param tableName   - Table to be compacted
+   * @throws IOException
+   */
+  public static void merge(Configuration conf, FileSystem fs, Text tableName)
+      throws IOException {
+    
+    HClient client = new HClient(conf);
+    boolean masterIsRunning = client.isMasterRunning();
+    if(tableName.equals(META_TABLE_NAME)) {
+        if(masterIsRunning) {
+          throw new IllegalStateException(
+              "Can not compact META table if instance is on-line");
+        }
+        new OfflineMerger(conf, fs, META_TABLE_NAME).process();
+      
+    } else {
+      if(!masterIsRunning) {
+        throw new IllegalStateException(
+            "HBase instance must be running to merge a normal table");
+      }
+      new OnlineMerger(conf, fs, client, tableName).process();
+    }
+  }
+
+  private static abstract class Merger {
+    protected Configuration conf;
+    protected FileSystem fs;
+    protected Text tableName;
+    protected Path dir;
+    protected Path basedir;
+    protected HLog hlog;
+    protected DataInputBuffer in;
+    protected boolean more;
+    protected HStoreKey key;
+    protected HRegionInfo info;
+    
+    protected Merger(Configuration conf, FileSystem fs, Text tableName)
+        throws IOException {
+      
+      this.conf = conf;
+      this.fs = fs;
+      this.tableName = tableName;
+      this.in = new DataInputBuffer();
+      this.more = true;
+      this.key = new HStoreKey();
+      this.info = new HRegionInfo();
+      this.dir = new Path(conf.get(HREGION_DIR, DEFAULT_HREGION_DIR));
+      this.basedir = new Path(dir, "merge_" + System.currentTimeMillis());
+      fs.mkdirs(basedir);
+      this.hlog = new HLog(fs, new Path(basedir, HREGION_LOGDIR_NAME), conf);
+    }
+    
+    public void process() throws IOException {
+      try {
+        while(more) {
+          TreeSet<HRegionInfo> regionsToMerge = next();
+          if(regionsToMerge == null) {
+            break;
+          }
+          merge(regionsToMerge.toArray(new HRegionInfo[regionsToMerge.size()]));
+        }
+      } finally {
+        try {
+          hlog.close();
+          
+        } catch(IOException e) {
+          LOG.error(e);
+        }
+        try {
+          fs.delete(basedir);
+          
+        } catch(IOException e) {
+          LOG.error(e);
+        }
+      }
+    }
+    
+    private void merge(HRegionInfo[] regions) throws IOException {
+      if(regions.length < 2) {
+        LOG.info("only one region - nothing to merge");
+        return;
+      }
+      
+      HRegion currentRegion = null;
+      long currentSize = 0;
+      HRegion nextRegion = null;
+      long nextSize = 0;
+      for(int i = 0; i < regions.length - 1; i++) {
+        if(currentRegion == null) {
+          currentRegion =
+            new HRegion(dir, hlog, fs, conf, regions[i], null, null);
+
+          currentSize = currentRegion.largestHStore();
+        }
+        nextRegion =
+          new HRegion(dir, hlog, fs, conf, regions[i + 1], null, null);
+
+        nextSize = nextRegion.largestHStore();
+
+        if((currentSize + nextSize) <= (DESIRED_MAX_FILE_SIZE / 2)) {
+          // We merge two adjacent regions if their total size is less than
+          // one half of the desired maximum size
+
+          LOG.info("merging regions " + currentRegion.getRegionName()
+              + " and " + nextRegion.getRegionName());
+
+          HRegion mergedRegion = HRegion.closeAndMerge(currentRegion, nextRegion);
+
+          updateMeta(currentRegion.getRegionName(), nextRegion.getRegionName(),
+              mergedRegion);
+
+          currentRegion = null;
+          i++;
+          continue;
+          
+        } else {
+          LOG.info("not merging regions " + currentRegion.getRegionName()
+              + " and " + nextRegion.getRegionName());
+        }
+
+        currentRegion.close();
+        currentRegion = nextRegion;
+        currentSize = nextSize;
+      }
+      if(currentRegion != null) {
+        currentRegion.close();
+      }
+    }
+    
+    protected abstract TreeSet<HRegionInfo> next() throws IOException;
+    
+    protected abstract void updateMeta(Text oldRegion1, Text oldRegion2,
+        HRegion newRegion) throws IOException;
+    
+  }
+  
+  private static class OnlineMerger extends Merger {
+    private HClient client;
+    private HScannerInterface metaScanner;
+    private HRegionInfo latestRegion;
+    
+    public OnlineMerger(Configuration conf, FileSystem fs, HClient client,
+        Text tableName) throws IOException {
+      
+      super(conf, fs, tableName);
+      this.client = client;
+      client.openTable(META_TABLE_NAME);
+      this.metaScanner = client.obtainScanner(META_COLS, new Text());
+      this.latestRegion = null;
+    }
+    
+    private HRegionInfo nextRegion() throws IOException {
+      try {
+        TreeMap<Text, byte[]> results = new TreeMap<Text, byte[]>();
+        if(! metaScanner.next(key, results)) {
+          more = false;
+          return null;
+        }
+        byte[] bytes = results.get(COL_REGIONINFO);
+        if(bytes == null || bytes.length == 0) {
+          throw new NoSuchElementException("meta region entry missing "
+              + COL_REGIONINFO);
+        }
+        HRegionInfo region = new HRegionInfo(bytes);
+        if(!region.offLine) {
+          throw new TableNotDisabledException("region " + region.regionName
+              + " is not disabled");
+        }
+        return region;
+        
+      } catch(IOException e) {
+        try {
+          metaScanner.close();
+          
+        } catch(IOException ex) {
+          LOG.error(ex);
+        }
+        more = false;
+        throw e;
+      }
+    }
+
+    protected TreeSet<HRegionInfo> next() throws IOException {
+      TreeSet<HRegionInfo> regions = new TreeSet<HRegionInfo>();
+      if(latestRegion == null) {
+        latestRegion = nextRegion();
+      }
+      if(latestRegion != null) {
+        regions.add(latestRegion);
+      }
+      latestRegion = nextRegion();
+      if(latestRegion != null) {
+        regions.add(latestRegion);
+      }
+      return regions;
+    }
+    
+    protected void updateMeta(Text oldRegion1, Text oldRegion2,
+        HRegion newRegion) throws IOException {
+      Text[] regionsToDelete = {
+          oldRegion1,
+          oldRegion2
+      };
+      for(int r = 0; r < regionsToDelete.length; r++) {
+        if(regionsToDelete[r].equals(latestRegion.regionName)) {
+          latestRegion = null;
+        }
+        long lockid = -1L;
+        try {
+          lockid = client.startUpdate(regionsToDelete[r]);
+          client.delete(lockid, COL_REGIONINFO);
+          client.delete(lockid, COL_SERVER);
+          client.delete(lockid, COL_STARTCODE);
+          client.commit(lockid);
+          lockid = -1L;
+
+          if(LOG.isDebugEnabled()) {
+            LOG.debug("updated columns in row: " + regionsToDelete[r]);
+          }
+        } finally {
+          try {
+            if(lockid != -1L) {
+              client.abort(lockid);
+            }
+
+          } catch(IOException iex) {
+            LOG.error(iex);
+          }
+        }
+      }
+      ByteArrayOutputStream byteValue = new ByteArrayOutputStream();
+      DataOutputStream s = new DataOutputStream(byteValue);
+      newRegion.getRegionInfo().offLine = true;
+      newRegion.getRegionInfo().write(s);
+      long lockid = -1L;
+      try {
+        lockid = client.startUpdate(newRegion.getRegionName());
+        client.put(lockid, COL_REGIONINFO, byteValue.toByteArray());
+        client.commit(lockid);
+        lockid = -1L;
+
+        if(LOG.isDebugEnabled()) {
+          LOG.debug("updated columns in row: "
+              + newRegion.getRegionName());
+        }
+      } finally {
+        try {
+          if(lockid != -1L) {
+            client.abort(lockid);
+          }
+
+        } catch(IOException iex) {
+          LOG.error(iex);
+        }
+      }
+    }
+  }
+
+  private static class OfflineMerger extends Merger {
+    private Path dir;
+    private TreeSet<HRegionInfo> metaRegions;
+    private TreeMap<Text, BytesWritable> results;
+    
+    public OfflineMerger(Configuration conf, FileSystem fs, Text tableName)
+        throws IOException {
+      
+      super(conf, fs, tableName);
+      this.dir = new Path(conf.get(HREGION_DIR, DEFAULT_HREGION_DIR));
+      this.metaRegions = new TreeSet<HRegionInfo>();
+      this.results = new TreeMap<Text, BytesWritable>();
+
+      // Scan root region to find all the meta regions
+      
+      HRegion root = new HRegion(dir, hlog,fs, conf, HGlobals.rootRegionInfo,
+          null, null);
+
+      HInternalScannerInterface rootScanner =
+        root.getScanner(META_COLS, new Text());
+      
+      try {
+        while(rootScanner.next(key, results)) {
+          for(BytesWritable b: results.values()) {
+            byte[] bytes = new byte[b.getSize()];
+            System.arraycopy(b.get(), 0, bytes, 0, bytes.length);
+            in.reset(bytes, bytes.length);
+            info.readFields(in);
+            metaRegions.add(info);
+            results.clear();
+          }
+        }
+      } finally {
+        rootScanner.close();
+        try {
+          root.close();
+          
+        } catch(IOException e) {
+          LOG.error(e);
+        }
+      }
+    }
+
+    protected TreeSet<HRegionInfo> next() throws IOException {
+      more = false;
+      return metaRegions;
+    }
+    
+    protected void updateMeta(Text oldRegion1, Text oldRegion2,
+        HRegion newRegion) throws IOException {
+      
+      HRegion root =
+        new HRegion(dir, hlog, fs, conf, HGlobals.rootRegionInfo, null, null);
+
+      Text[] regionsToDelete = {
+          oldRegion1,
+          oldRegion2
+      };
+      for(int r = 0; r < regionsToDelete.length; r++) {
+        long lockid = -1L;
+        try {
+          lockid = root.startUpdate(regionsToDelete[r]);
+          root.delete(lockid, COL_REGIONINFO);
+          root.delete(lockid, COL_SERVER);
+          root.delete(lockid, COL_STARTCODE);
+          root.commit(lockid);
+          lockid = -1L;
+
+          if(LOG.isDebugEnabled()) {
+            LOG.debug("updated columns in row: " + regionsToDelete[r]);
+          }
+        } finally {
+          try {
+            if(lockid != -1L) {
+              root.abort(lockid);
+            }
+
+          } catch(IOException iex) {
+            LOG.error(iex);
+          }
+        }
+      }
+      ByteArrayOutputStream byteValue = new ByteArrayOutputStream();
+      DataOutputStream s = new DataOutputStream(byteValue);
+      newRegion.getRegionInfo().offLine = true;
+      newRegion.getRegionInfo().write(s);
+      long lockid = -1L;
+      try {
+        lockid = root.startUpdate(newRegion.getRegionName());
+        root.put(lockid, COL_REGIONINFO,
+            new BytesWritable(byteValue.toByteArray()));
+        root.commit(lockid);
+        lockid = -1L;
+
+        if(LOG.isDebugEnabled()) {
+          LOG.debug("updated columns in row: "
+              + newRegion.getRegionName());
+        }
+      } finally {
+        try {
+          if(lockid != -1L) {
+            root.abort(lockid);
+          }
+
+        } catch(IOException iex) {
+          LOG.error(iex);
+        }
+      }
+    }
+  }
+}

Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegion.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegion.java?view=diff&rev=543841&r1=543840&r2=543841
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegion.java (original)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegion.java Sat Jun  2 22:09:21 2007
@@ -142,8 +142,7 @@
     
     TreeSet<HStoreFile> alreadyMerged = new TreeSet<HStoreFile>();
     TreeMap<Text, Vector<HStoreFile>> filesToMerge = new TreeMap<Text, Vector<HStoreFile>>();
-    for(Iterator<HStoreFile> it = srcA.flushcache(true).iterator(); it.hasNext(); ) {
-      HStoreFile src = it.next();
+    for(HStoreFile src: srcA.flushcache(true)) {
       Vector<HStoreFile> v = filesToMerge.get(src.getColFamily());
       if(v == null) {
         v = new Vector<HStoreFile>();
@@ -156,8 +155,7 @@
       LOG.debug("flushing and getting file names for region " + srcB.getRegionName());
     }
     
-    for(Iterator<HStoreFile> it = srcB.flushcache(true).iterator(); it.hasNext(); ) {
-      HStoreFile src = it.next();
+    for(HStoreFile src: srcB.flushcache(true)) {
       Vector<HStoreFile> v = filesToMerge.get(src.getColFamily());
       if(v == null) {
         v = new Vector<HStoreFile>();
@@ -189,9 +187,7 @@
     }
 
     filesToMerge.clear();
-    for(Iterator<HStoreFile> it = srcA.close().iterator(); it.hasNext(); ) {
-      HStoreFile src = it.next();
-      
+    for(HStoreFile src: srcA.close()) {
       if(! alreadyMerged.contains(src)) {
         Vector<HStoreFile> v = filesToMerge.get(src.getColFamily());
         if(v == null) {
@@ -207,9 +203,7 @@
           + srcB.getRegionName());
     }
     
-    for(Iterator<HStoreFile> it = srcB.close().iterator(); it.hasNext(); ) {
-      HStoreFile src = it.next();
-      
+    for(HStoreFile src: srcB.close()) {
       if(! alreadyMerged.contains(src)) {
         Vector<HStoreFile> v = filesToMerge.get(src.getColFamily());
         if(v == null) {
@@ -246,6 +240,59 @@
     return dstRegion;
   }
 
+  /**
+   * Internal method to create a new HRegion. Used by createTable and by the
+   * bootstrap code in the HMaster constructor
+   * 
+   * @param fs          - file system to create region in
+   * @param dir         - base directory
+   * @param conf        - configuration object
+   * @param desc        - table descriptor
+   * @param regionId    - region id
+   * @param startKey    - first key in region
+   * @param endKey      - last key in region
+   * @return            - new HRegion
+   * @throws IOException
+   */
+  public static HRegion createNewHRegion(FileSystem fs, Path dir,
+      Configuration conf, HTableDescriptor desc, long regionId, Text startKey,
+      Text endKey) throws IOException {
+    
+    HRegionInfo info = new HRegionInfo(regionId, desc, startKey, endKey);
+    Path regionDir = HStoreFile.getHRegionDir(dir, info.regionName);
+    fs.mkdirs(regionDir);
+
+    return new HRegion(dir,
+      new HLog(fs, new Path(regionDir, HREGION_LOGDIR_NAME), conf),
+      fs, conf, info, null, null);
+  }
+  
+  /**
+   * Inserts a new table's meta information into the meta table. Used by
+   * the HMaster bootstrap code.
+   * 
+   * @param meta                - HRegion to be updated
+   * @param table               - HRegion of new table
+   * 
+   * @throws IOException
+   */
+  public static void addRegionToMeta(HRegion meta, HRegion table)
+      throws IOException {
+    
+    // The row key is the region name
+    
+    long writeid = meta.startUpdate(table.getRegionName());
+    
+    ByteArrayOutputStream bytes = new ByteArrayOutputStream();
+    DataOutputStream s = new DataOutputStream(bytes);
+
+    table.getRegionInfo().write(s);
+    
+    meta.put(writeid, COL_REGIONINFO, new BytesWritable(bytes.toByteArray()));
+    
+    meta.commit(writeid);
+  }
+  
   //////////////////////////////////////////////////////////////////////////////
   // Members
   //////////////////////////////////////////////////////////////////////////////
@@ -627,6 +674,28 @@
     }
   }
 
+  /**
+   * @return - returns the size of the largest HStore
+   */
+  public long largestHStore() {
+    long maxsize = 0;
+    lock.obtainReadLock();
+    try {
+      Text key = new Text();
+      for(HStore h: stores.values()) {
+        long size = h.getLargestFileSize(key);
+
+        if(size > maxsize) {                      // Largest so far
+          maxsize = size;
+        }
+      }
+      return maxsize;
+    
+    } finally {
+      lock.releaseReadLock();
+    }
+  }
+  
   /**
    * @return true if the region should be compacted.
    */

Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegionInfo.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegionInfo.java?view=diff&rev=543841&r1=543840&r2=543841
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegionInfo.java (original)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegionInfo.java Sat Jun  2 22:09:21 2007
@@ -22,20 +22,20 @@
 import java.io.IOException;
 
 import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
 
 /**
  * HRegion information.
  * Contains HRegion id, start and end keys, a reference to this
  * HRegions' table descriptor, etc.
  */
-public class HRegionInfo implements Writable {
+public class HRegionInfo implements WritableComparable {
+  public Text regionName;
   public long regionId;
-  public HTableDescriptor tableDesc;
   public Text startKey;
   public Text endKey;
-  public Text regionName;
   public boolean offLine;
+  public HTableDescriptor tableDesc;
   
   public HRegionInfo() {
     this.regionId = 0;
@@ -87,6 +87,22 @@
       this.tableDesc.toString() + "}";
   }
     
+  @Override
+  public boolean equals(Object o) {
+    return this.compareTo(o) == 0;
+  }
+  
+  @Override
+  public int hashCode() {
+    int result = this.regionName.hashCode();
+    result ^= Long.valueOf(this.regionId).hashCode();
+    result ^= this.startKey.hashCode();
+    result ^= this.endKey.hashCode();
+    result ^= Boolean.valueOf(this.offLine).hashCode();
+    result ^= this.tableDesc.hashCode();
+    return result;
+  }
+
   //////////////////////////////////////////////////////////////////////////////
   // Writable
   //////////////////////////////////////////////////////////////////////////////
@@ -107,5 +123,14 @@
     this.endKey.readFields(in);
     this.regionName.readFields(in);
     this.offLine = in.readBoolean();
+  }
+  
+  //////////////////////////////////////////////////////////////////////////////
+  // Comparable
+  //////////////////////////////////////////////////////////////////////////////
+  
+  public int compareTo(Object o) {
+    HRegionInfo other = (HRegionInfo)o;
+    return regionName.compareTo(other.regionName);
   }
 }

Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegionServer.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegionServer.java?view=diff&rev=543841&r1=543840&r2=543841
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegionServer.java (original)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegionServer.java Sat Jun  2 22:09:21 2007
@@ -824,33 +824,6 @@
     }
   }
 
-  /*****************************************************************************
-   * TODO - Figure out how the master is to determine when regions should be
-   *        merged. It once it makes this determination, it needs to ensure that
-   *        the regions to be merged are first being served by the same
-   *        HRegionServer and if not, move them so they are.
-   *        
-   *        For now, we do not do merging. Splits are driven by the HRegionServer.
-   ****************************************************************************/
-/*
-  private void mergeRegions(Text regionNameA, Text regionNameB) throws IOException {
-    locking.writeLock().lock();
-    try {
-      HRegion srcA = regions.remove(regionNameA);
-      HRegion srcB = regions.remove(regionNameB);
-      HRegion newRegion = HRegion.closeAndMerge(srcA, srcB);
-      regions.put(newRegion.getRegionName(), newRegion);
-
-      reportClose(srcA);
-      reportClose(srcB);
-      reportOpen(newRegion);
-      
-    } finally {
-      locking.writeLock().unlock();
-    }
-  }
-*/
-
   //////////////////////////////////////////////////////////////////////////////
   // HRegionInterface
   //////////////////////////////////////////////////////////////////////////////

Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HTableDescriptor.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HTableDescriptor.java?view=diff&rev=543841&r1=543840&r2=543841
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HTableDescriptor.java (original)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HTableDescriptor.java Sat Jun  2 22:09:21 2007
@@ -20,7 +20,6 @@
 import java.io.IOException;
 import java.util.Iterator;
 import java.util.Map;
-import java.util.Map.Entry;
 import java.util.TreeMap;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;

Modified: lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/MiniHBaseCluster.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/MiniHBaseCluster.java?view=diff&rev=543841&r1=543840&r2=543841
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/MiniHBaseCluster.java (original)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/MiniHBaseCluster.java Sat Jun  2 22:09:21 2007
@@ -40,11 +40,32 @@
   private HRegionServer[] regionServers;
   private Thread[] regionThreads;
   
+  /**
+   * Starts a MiniHBaseCluster on top of a new MiniDFSCluster
+   * 
+   * @param conf
+   * @param nRegionNodes
+   */
   public MiniHBaseCluster(Configuration conf, int nRegionNodes) {
     this(conf, nRegionNodes, true);
   }
   
   /**
+   * Starts a MiniHBaseCluster on top of an existing HDFSCluster
+   * 
+   * @param conf
+   * @param nRegionNodes
+   * @param dfsCluster
+   */
+  public MiniHBaseCluster(Configuration conf, int nRegionNodes,
+      MiniDFSCluster dfsCluster) {
+
+    this.conf = conf;
+    this.cluster = dfsCluster;
+    init(nRegionNodes);
+  }
+  
+  /**
    * Constructor.
    * @param conf
    * @param nRegionNodes
@@ -55,22 +76,23 @@
   public MiniHBaseCluster(Configuration conf, int nRegionNodes,
       final boolean miniHdfsFilesystem) {
     this.conf = conf;
+    
+    if (miniHdfsFilesystem) {
+      try {
+        this.cluster = new MiniDFSCluster(this.conf, 2, true, (String[])null);
+        
+      } catch(Throwable t) {
+        LOG.error("Failed setup of mini dfs cluster", t);
+        t.printStackTrace();
+        return;
+      }
+    }
+    init(nRegionNodes);
+  }
 
+  private void init(int nRegionNodes) {
     try {
       try {
-        if(System.getProperty(StaticTestEnvironment.TEST_DIRECTORY_KEY) == null) {
-          File testDir = new File(new File("").getAbsolutePath(),
-              "build/contrib/hbase/test");
-
-          String dir = testDir.getAbsolutePath();
-          LOG.info("Setting test.build.data to " + dir);
-          System.setProperty(StaticTestEnvironment.TEST_DIRECTORY_KEY, dir);
-        }
-
-        if (miniHdfsFilesystem) {
-          this.cluster =
-            new MiniDFSCluster(this.conf, 2, true, (String[])null);
-        }
         this.fs = FileSystem.get(conf);
         this.parentdir = new Path(conf.get(HREGION_DIR, DEFAULT_HREGION_DIR));
         fs.mkdirs(parentdir);
@@ -110,7 +132,7 @@
       shutdown();
     }
   }
-  
+
   private void startRegionServers(Configuration conf, int nRegionNodes)
       throws IOException {
     this.regionServers = new HRegionServer[nRegionNodes];

Added: lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestMerge.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestMerge.java?view=auto&rev=543841
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestMerge.java (added)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestMerge.java Sat Jun  2 22:09:21 2007
@@ -0,0 +1,170 @@
+/**
+ * Copyright 2007 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase;
+
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.util.Random;
+
+import org.apache.hadoop.dfs.MiniDFSCluster;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Text;
+
+/** Tests region merging */
+public class TestMerge extends HBaseTestCase {
+  private static final Text COLUMN_NAME = new Text("contents:");
+  private Random rand;
+  private HTableDescriptor desc;
+  private BytesWritable value;
+
+  private MiniDFSCluster dfsCluster;
+  private FileSystem fs;
+  private Path dir;
+
+  private MiniHBaseCluster hCluster;
+  
+  public void testMerge() {
+    setup();
+    startMiniDFSCluster();
+    createRegions();
+    try {
+      HMerge.merge(conf, fs, HConstants.META_TABLE_NAME);
+      
+      hCluster = new MiniHBaseCluster(conf, 1, dfsCluster);
+      try {
+        HMerge.merge(conf, fs, desc.getName());
+      
+      } finally {
+        hCluster.shutdown();
+      }
+      
+    } catch(Throwable t) {
+      t.printStackTrace();
+      fail();
+      
+    } finally {
+      dfsCluster.shutdown();
+    }
+  }
+  
+  private void setup() {
+    rand = new Random();
+    desc = new HTableDescriptor("test");
+    desc.addFamily(new HColumnDescriptor(COLUMN_NAME.toString()));
+    
+    // We will use the same value for the rows as that is not really important here
+    
+    String partialValue = String.valueOf(System.currentTimeMillis());
+    StringBuilder val = new StringBuilder();
+    while(val.length() < 1024) {
+      val.append(partialValue);
+    }
+    try {
+      value = new BytesWritable(val.toString().getBytes(HConstants.UTF8_ENCODING));
+      
+    } catch(UnsupportedEncodingException e) {
+      fail();
+    }
+  }
+
+  private void startMiniDFSCluster() {
+    try {
+      dfsCluster = new MiniDFSCluster(conf, 2, true, (String[])null);
+      fs = dfsCluster.getFileSystem();
+      dir = new Path("/hbase");
+      fs.mkdirs(dir);
+      
+    } catch(Throwable t) {
+      t.printStackTrace();
+      fail();
+    }
+  }
+  
+  private void createRegions() {
+    // We create three data regions: The first is too large to merge since it 
+    // will be > 64 MB in size. The second two will be smaller and will be 
+    // selected for merging.
+    
+    // To ensure that the first region is larger than 64MB we need to write at
+    // least 65536 rows. We will make certain by writing 70000
+
+    try {
+      Text row_70001 = new Text("row_70001");
+      Text row_80001 = new Text("row_80001");
+      
+      HRegion[] regions = {
+        createAregion(null, row_70001, 1, 70000),
+        createAregion(row_70001, row_80001, 70001, 10000),
+        createAregion(row_80001, null, 80001, 10000)
+      };
+      
+      // Now create the root and meta regions and insert the data regions
+      // created above into the meta
+      
+      HRegion root = HRegion.createNewHRegion(fs, dir, conf, 
+          HGlobals.rootTableDesc, 0L, null, null);
+      HRegion meta = HRegion.createNewHRegion(fs, dir, conf,
+          HGlobals.metaTableDesc, 1L, null, null);
+    
+      HRegion.addRegionToMeta(root, meta);
+      
+      for(int i = 0; i < regions.length; i++) {
+        HRegion.addRegionToMeta(meta, regions[i]);
+      }
+      
+      root.close();
+      root.getLog().close();
+      fs.delete(new Path(root.getRegionDir(), HConstants.HREGION_LOGDIR_NAME));
+      meta.close();
+      meta.getLog().close();
+      fs.delete(new Path(meta.getRegionDir(), HConstants.HREGION_LOGDIR_NAME));
+      
+    } catch(Throwable t) {
+      t.printStackTrace();
+      fail();
+    }
+  }
+  
+  private HRegion createAregion(Text startKey, Text endKey, int firstRow, int nrows)
+      throws IOException {
+    HRegion region = HRegion.createNewHRegion(fs, dir, conf, desc,
+        rand.nextLong(), startKey, endKey);
+    
+    System.out.println("created region " + region.getRegionName());
+
+    for(int i = firstRow; i < firstRow + nrows; i++) {
+      long lockid = region.startUpdate(new Text("row_"
+          + String.format("%1$05d", i)));
+
+      region.put(lockid, COLUMN_NAME, value);
+      region.commit(lockid);
+      if(i % 10000 == 0) {
+        System.out.println("Flushing write #" + i);
+        region.flushcache(false);
+      }
+    }
+    System.out.println("Rolling log...");
+    region.log.rollWriter();
+    region.compactStores();
+    region.close();
+    region.getLog().close();
+    fs.delete(new Path(region.getRegionDir(), HConstants.HREGION_LOGDIR_NAME));
+    region.getRegionInfo().offLine = true;
+    return region;
+  }
+}