You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ji...@apache.org on 2009/01/09 02:48:26 UTC

svn commit: r732908 - in /hadoop/hbase/trunk: ./ src/java/org/apache/hadoop/hbase/ipc/ src/java/org/apache/hadoop/hbase/master/ src/java/org/apache/hadoop/hbase/regionserver/

Author: jimk
Date: Thu Jan  8 17:48:25 2009
New Revision: 732908

URL: http://svn.apache.org/viewvc?rev=732908&view=rev
Log:
HBASE-1104, HBASE-1098, HBASE-1096: Doubly-assigned regions redux, IllegalStateException: Cannot set a region to be closed it it was not already marked as closing, Does not recover if HRS carrying -ROOT- goes down

Modified:
    hadoop/hbase/trunk/CHANGES.txt
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/BaseScanner.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ChangeTableState.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/HMaster.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/MetaScanner.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RootScanner.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ServerManager.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java

Modified: hadoop/hbase/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/CHANGES.txt?rev=732908&r1=732907&r2=732908&view=diff
==============================================================================
--- hadoop/hbase/trunk/CHANGES.txt (original)
+++ hadoop/hbase/trunk/CHANGES.txt Thu Jan  8 17:48:25 2009
@@ -133,6 +133,10 @@
    HBASE-1101  NPE in HConnectionManager$TableServers.processBatchOfRows
    HBASE-1099  Regions assigned while master is splitting logs of recently
                crashed server; regionserver tries to execute incomplete log
+   HBASE-1104, HBASE-1098, HBASE-1096: Doubly-assigned regions redux,
+               IllegalStateException: Cannot set a region to be closed it it was
+               not already marked as closing, Does not recover if HRS carrying 
+               -ROOT- goes down
 
   IMPROVEMENTS
    HBASE-901   Add a limit to key length, check key and value length on client side

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java?rev=732908&r1=732907&r2=732908&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/ipc/HRegionInterface.java Thu Jan  8 17:48:25 2009
@@ -126,6 +126,7 @@
    * @param regionName name of the region to update
    * @param b BatchUpdate
    * @param expectedValues map of column names to expected data values.
+   * @return true if update was applied
    * @throws IOException
    */
   public boolean checkAndSave(final byte [] regionName, final BatchUpdate b,
@@ -214,7 +215,7 @@
    * @param row The row
    * @param column The column, or null for any
    * @param timestamp The timestamp, or LATEST_TIMESTAMP for any
-   * @param lockId lock id
+   * @param lockID lock id
    * @return true if the row exists, false otherwise
    * @throws IOException
    */

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/BaseScanner.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/BaseScanner.java?rev=732908&r1=732907&r2=732908&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/BaseScanner.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/BaseScanner.java Thu Jan  8 17:48:25 2009
@@ -104,7 +104,6 @@
     
   private final boolean rootRegion;
   protected final HMaster master;
-  protected final RegionManager regionManager;
   
   protected boolean initialScanComplete;
   
@@ -115,12 +114,11 @@
   // mid-scan
   final Integer scannerLock = new Integer(0);
   
-  BaseScanner(final HMaster master, final RegionManager regionManager, 
-    final boolean rootRegion, final int period, final AtomicBoolean stop) {
+  BaseScanner(final HMaster master, final boolean rootRegion, final int period,
+      final AtomicBoolean stop) {
     super(period, stop);
     this.rootRegion = rootRegion;
     this.master = master;
-    this.regionManager = regionManager;
     this.initialScanComplete = false;
   }
   
@@ -180,7 +178,7 @@
         rows += 1;
       }
       if (rootRegion) {
-        regionManager.setNumMetaRegions(rows);
+        this.master.regionManager.setNumMetaRegions(rows);
       }
     } catch (IOException e) {
       if (e instanceof RemoteException) {
@@ -210,7 +208,7 @@
     if (emptyRows.size() > 0) {
       LOG.warn("Found " + emptyRows.size() + " rows with empty HRegionInfo " +
         "while scanning meta region " + Bytes.toString(region.getRegionName()));
-      master.deleteEmptyMetaRows(regionServer, region.getRegionName(),
+      this.master.deleteEmptyMetaRows(regionServer, region.getRegionName(),
           emptyRows);
     }
 
@@ -264,7 +262,7 @@
     if (!hasReferencesA && !hasReferencesB) {
       LOG.info("Deleting region " + parent.getRegionNameAsString() +
         " because daughter splits no longer hold references");
-      HRegion.deleteRegion(master.fs, master.rootdir, parent);
+      HRegion.deleteRegion(this.master.fs, this.master.rootdir, parent);
       HRegion.removeRegionFromMETA(srvr, metaRegionName,
         parent.getRegionName());
       result = true;
@@ -294,8 +292,8 @@
     if (split == null) {
       return result;
     }
-    Path tabledir =
-      HTableDescriptor.getTableDir(master.rootdir, split.getTableDesc().getName());
+    Path tabledir = HTableDescriptor.getTableDir(this.master.rootdir,
+        split.getTableDesc().getName());
     for (HColumnDescriptor family: split.getTableDesc().getFamilies()) {
       Path p = HStoreFile.getMapDir(tabledir, split.getEncodedName(),
         family.getName());
@@ -303,7 +301,7 @@
       // Look for reference files.  Call listStatus with an anonymous
       // instance of PathFilter.
 
-      FileStatus [] ps = master.fs.listStatus(p,
+      FileStatus [] ps = this.master.fs.listStatus(p,
           new PathFilter () {
             public boolean accept(Path path) {
               return HStore.isReference(path);
@@ -337,70 +335,56 @@
     final String serverName, final long startCode) 
   throws IOException {
     
-    synchronized (regionManager) {
-      // Skip region - if
+    synchronized (this.master.regionManager) {
+      /*
+       * We don't assign regions that are offline, in transition or were on
+       * a dead server. Regions that were on a dead server will get reassigned
+       * by ProcessServerShutdown
+       */
       if(info.isOffline() ||
-          regionManager.isOfflined(info.getRegionName())) { // queued for offline
-        regionManager.removeRegion(info);
+          this.master.regionManager.regionIsInTransition(info.getRegionName()) ||
+          this.master.serverManager.isDead(serverName)) {
+
         return;
       }
       HServerInfo storedInfo = null;
-      boolean deadServerAndLogsSplit = false;
-      boolean deadServer = false;
       if (serverName.length() != 0) {
-        if (regionManager.isOfflined(info.getRegionName())) {
-          // Skip if region is on kill list
-          if(LOG.isDebugEnabled()) {
-            LOG.debug("not assigning region (on kill list): " +
-                info.getRegionNameAsString());
-          }
-          return;
-        }
         storedInfo = this.master.serverManager.getServerInfo(serverName);
-        deadServer = this.master.serverManager.isDead(serverName);
-        deadServerAndLogsSplit =
-          this.master.serverManager.isDeadServerLogsSplit(serverName);
       }
 
       /*
-       * If the server is a dead server and its logs have been split or its
-       * not on the dead server lists and its startcode is off -- either null
-       * or doesn't match the start code for the address -- then add it to the
-       * list of unassigned regions IF not already there (or pending open).
+       * If the startcode is off -- either null or doesn't match the start code
+       * for the address -- then add it to the list of unassigned regions.
        */ 
-      if ((deadServerAndLogsSplit ||
-          (!deadServer && (storedInfo == null ||
-            (storedInfo.getStartCode() != startCode)))) &&
-          this.regionManager.assignable(info)) {
+      if (storedInfo == null || storedInfo.getStartCode() != startCode) {
+
         // The current assignment is invalid
         if (LOG.isDebugEnabled()) {
           LOG.debug("Current assignment of " + info.getRegionNameAsString() +
-            " is not valid; deadServerAndLogsSplit=" + deadServerAndLogsSplit +
-            ", deadServer=" + deadServer + ". " +
+            " is not valid; " +
             (storedInfo == null ? " Server '" + serverName + "' unknown." :
                 " serverInfo: " + storedInfo + ", passed startCode: " +
                 startCode + ", storedInfo.startCode: " +
-                storedInfo.getStartCode()) +
-          " Region is not unassigned, assigned or pending");
+                storedInfo.getStartCode()));
         }
 
         // Recover the region server's log if there is one.
         // This is only done from here if we are restarting and there is stale
         // data in the meta region. Once we are on-line, dead server log
         // recovery is handled by lease expiration and ProcessServerShutdown
-        if (!regionManager.isInitialMetaScanComplete() &&
+        if (!this.master.regionManager.isInitialMetaScanComplete() &&
             serverName.length() != 0) {
           StringBuilder dirName = new StringBuilder("log_");
           dirName.append(serverName.replace(":", "_"));
-          Path logDir = new Path(master.rootdir, dirName.toString());
+          Path logDir = new Path(this.master.rootdir, dirName.toString());
           try {
             if (master.fs.exists(logDir)) {
-              regionManager.splitLogLock.lock();
+              this.master.regionManager.splitLogLock.lock();
               try {
                 HLog.splitLog(master.rootdir, logDir, master.fs,
                     master.getConfiguration());
               } finally {
-                regionManager.splitLogLock.unlock();
+                this.master.regionManager.splitLogLock.unlock();
               }
             }
             if (LOG.isDebugEnabled()) {
@@ -412,7 +396,7 @@
           }
         }
         // Now get the region assigned
-        regionManager.setUnassigned(info, true);
+        this.master.regionManager.setUnassigned(info, true);
       }
     }
   }

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ChangeTableState.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ChangeTableState.java?rev=732908&r1=732907&r2=732908&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ChangeTableState.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ChangeTableState.java Thu Jan  8 17:48:25 2009
@@ -91,7 +91,7 @@
       synchronized (master.regionManager) {
         if (online) {
           // Bring offline regions on-line
-          if (!master.regionManager.assignable(i)) {
+          if (!master.regionManager.regionIsOpening(i.getRegionName())) {
             master.regionManager.setUnassigned(i, false);
           }
         } else {

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/HMaster.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/HMaster.java?rev=732908&r1=732907&r2=732908&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/HMaster.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/HMaster.java Thu Jan  8 17:48:25 2009
@@ -835,7 +835,7 @@
       LOG.info("Marking " + hri.getRegionNameAsString() +
         " as closed on " + servername + "; cleaning SERVER + STARTCODE; " +
           "master will tell regionserver to close region on next heartbeat");
-      this.regionManager.setClosing(servername, hri, hri.isOffline(), false);
+      this.regionManager.setClosing(servername, hri, hri.isOffline());
       MetaRegion meta = this.regionManager.getMetaRegionForRow(regionname);
       HRegionInterface srvr = getMETAServer(meta);
       HRegion.cleanRegionInMETA(srvr, meta.getRegionName(), hri);

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/MetaScanner.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/MetaScanner.java?rev=732908&r1=732907&r2=732908&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/MetaScanner.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/MetaScanner.java Thu Jan  8 17:48:25 2009
@@ -49,21 +49,21 @@
    * Constructor
    * 
    * @param master
-   * @param regionManager
    */
-  public MetaScanner(HMaster master, RegionManager regionManager) {
-    super(master, regionManager, false, master.metaRescanInterval, master.closed);
+  public MetaScanner(HMaster master) {
+    super(master, false, master.metaRescanInterval, master.closed);
   }
 
   // Don't retry if we get an error while scanning. Errors are most often
   // caused by the server going away. Wait until next rescan interval when
   // things should be back to normal.
   private boolean scanOneMetaRegion(MetaRegion region) {
-    while (!master.closed.get() && !regionManager.isInitialRootScanComplete() &&
-      regionManager.getRootRegionLocation() == null) {
+    while (!this.master.closed.get() &&
+        !this.master.regionManager.isInitialRootScanComplete() &&
+        this.master.regionManager.getRootRegionLocation() == null) {
       sleep();
     }
-    if (master.closed.get()) {
+    if (this.master.closed.get()) {
       return false;
     }
 
@@ -71,7 +71,7 @@
       // Don't interrupt us while we're working
       synchronized (scannerLock) {
         scanRegion(region);
-        regionManager.putMetaRegionOnline(region);
+        this.master.regionManager.putMetaRegionOnline(region);
       }
     } catch (IOException e) {
       e = RemoteExceptionHandler.checkIOException(e);
@@ -80,13 +80,13 @@
       // so, either it won't be in the onlineMetaRegions list or its host
       // address has changed and the containsValue will fail. If not
       // found, best thing to do here is probably return.
-      if (!regionManager.isMetaRegionOnline(region.getStartKey())) {
+      if (!this.master.regionManager.isMetaRegionOnline(region.getStartKey())) {
         LOG.debug("Scanned region is no longer in map of online " +
         "regions or its value has changed");
         return false;
       }
       // Make sure the file system is still available
-      master.checkFileSystem();
+      this.master.checkFileSystem();
     } catch (Exception e) {
       // If for some reason we get some other kind of exception, 
       // at least log it rather than go out silently.
@@ -98,11 +98,11 @@
   @Override
   protected boolean initialScan() {
     MetaRegion region = null;
-    while (!master.closed.get() &&
+    while (!this.master.closed.get() &&
         (region == null && metaRegionsToScan.size() > 0) &&
           !metaRegionsScanned()) {
       try {
-        region = metaRegionsToScan.poll(master.threadWakeFrequency, 
+        region = metaRegionsToScan.poll(this.master.threadWakeFrequency, 
           TimeUnit.MILLISECONDS);
       } catch (InterruptedException e) {
         // continue
@@ -122,7 +122,8 @@
 
   @Override
   protected void maintenanceScan() {
-    List<MetaRegion> regions = regionManager.getListOfOnlineMetaRegions();
+    List<MetaRegion> regions =
+      this.master.regionManager.getListOfOnlineMetaRegions();
     int regionCount = 0;
     for (MetaRegion r: regions) {
       scanOneMetaRegion(r);
@@ -140,8 +141,9 @@
    * @return False if number of meta regions matches count of online regions.
    */
   private synchronized boolean metaRegionsScanned() {
-    if (!regionManager.isInitialRootScanComplete() ||
-      regionManager.numMetaRegions() != regionManager.numOnlineMetaRegions()) {
+    if (!this.master.regionManager.isInitialRootScanComplete() ||
+        this.master.regionManager.numMetaRegions() !=
+          this.master.regionManager.numOnlineMetaRegions()) {
       return false;
     }
     notifyAll();
@@ -153,21 +155,21 @@
    * been scanned.
    */
   synchronized boolean waitForMetaRegionsOrClose() {
-    while (!master.closed.get()) {
+    while (!this.master.closed.get()) {
       synchronized (master.regionManager) {
-        if (regionManager.isInitialRootScanComplete() &&
-            regionManager.numMetaRegions() ==
-              regionManager.numOnlineMetaRegions()) {
+        if (this.master.regionManager.isInitialRootScanComplete() &&
+            this.master.regionManager.numMetaRegions() ==
+              this.master.regionManager.numOnlineMetaRegions()) {
           break;
         }
       }
       try {
-        wait(master.threadWakeFrequency);
+        wait(this.master.threadWakeFrequency);
       } catch (InterruptedException e) {
         // continue
       }
     }
-    return master.closed.get();
+    return this.master.closed.get();
   }
   
   /**

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java?rev=732908&r1=732907&r2=732908&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java Thu Jan  8 17:48:25 2009
@@ -52,6 +52,7 @@
   private final boolean rootRegionServer;
   private boolean rootRegionReassigned = false;
   private Path oldLogDir;
+  private boolean logSplit;
   private boolean rootRescanned;
   
 
@@ -78,6 +79,7 @@
     this.deadServer = serverInfo.getServerAddress();
     this.deadServerStr = this.deadServer.toString();
     this.rootRegionServer = rootRegionServer;
+    this.logSplit = false;
     this.rootRescanned = false;
     this.oldLogDir =
       new Path(master.rootdir, HLog.getHLogDirectoryName(serverInfo));
@@ -230,8 +232,6 @@
 
   @Override
   protected boolean process() throws IOException {
-    boolean logSplit =
-      this.master.serverManager.isDeadServerLogsSplit(this.deadServerStr);
     LOG.info("process shutdown of server " + this.deadServerStr +
       ": logSplit: " +
       logSplit + ", rootRescanned: " + rootRescanned +
@@ -252,7 +252,7 @@
           master.regionManager.splitLogLock.unlock();
         }
       }
-      this.master.serverManager.setDeadServerLogsSplit(this.deadServerStr);
+      logSplit = true;
     }
 
     if (this.rootRegionServer && !this.rootRegionReassigned) {

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java?rev=732908&r1=732907&r2=732908&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java Thu Jan  8 17:48:25 2009
@@ -78,12 +78,11 @@
 
   private static final byte[] OVERLOADED = Bytes.toBytes("Overloaded");
 
-  /*
+  /**
    * Map of region name to RegionState for regions that are in transition such as
    * 
-   * unassigned -> assigned -> pending -> open
-   * closing -> closed -> offline
-   * closing -> closed -> unassigned -> assigned -> pending -> open
+   * unassigned -> pendingOpen -> open
+   * closing -> pendingClose -> closed; if (closed && !offline) -> unassigned
    * 
    * At the end of a transition, removeRegion is used to remove the region from
    * the map (since it is no longer in transition)
@@ -133,10 +132,10 @@
       (float)0.1);
 
     // The root region
-    rootScannerThread = new RootScanner(master, this);
+    rootScannerThread = new RootScanner(master);
 
     // Scans the meta table
-    metaScannerThread = new MetaScanner(master, this);
+    metaScannerThread = new MetaScanner(master);
     
     reassignRootRegion();
   }
@@ -272,7 +271,7 @@
       for (RegionState s: regionsToAssign) {
         LOG.info("assigning region " + Bytes.toString(s.getRegionName())+
           " to server " + serverName);
-        s.setAssigned(serverName);
+        s.setPendingOpen(serverName);
         this.historian.addRegionAssignment(s.getRegionInfo(), serverName);
         returnMsgs.add(new HMsg(HMsg.Type.MSG_REGION_OPEN, s.getRegionInfo()));
         if (--nregions <= 0) {
@@ -323,7 +322,8 @@
    * Get the set of regions that should be assignable in this pass.
    * 
    * Note that no synchronization on regionsInTransition is needed because the
-   * only caller (assignRegions) whose caller owns the monitor for RegionManager
+   * only caller (assignRegions, whose caller is ServerManager.processMsgs) owns
+   * the monitor for RegionManager
    */ 
   private Set<RegionState> regionsAwaitingAssignment() {
     // set of regions we want to assign to this server
@@ -342,8 +342,7 @@
         // and are on-line
         continue;
       }
-      if (!s.isAssigned() && !s.isClosing() && !s.isPending()) {
-        s.setUnassigned();
+      if (s.isUnassigned()) {
         regionsToAssign.add(s);
       }
     }
@@ -399,7 +398,7 @@
     for (RegionState s: regionsToAssign) {
       LOG.info("assigning region " + Bytes.toString(s.getRegionName()) +
           " to the only server " + serverName);
-      s.setAssigned(serverName);
+      s.setPendingOpen(serverName);
       this.historian.addRegionAssignment(s.getRegionInfo(), serverName);
       returnMsgs.add(new HMsg(HMsg.Type.MSG_REGION_OPEN, s.getRegionInfo()));
     }
@@ -432,8 +431,7 @@
         continue;
       }
       byte[] regionName = currentRegion.getRegionName();
-      if (isClosing(regionName) || isUnassigned(currentRegion) ||
-          isAssigned(regionName) || isPending(regionName)) {
+      if (regionIsInTransition(regionName)) {
         skipped++;
         continue;
       }
@@ -444,6 +442,7 @@
         OVERLOADED));
       // mark the region as closing
       setClosing(serverName, currentRegion, false);
+      setPendingClose(currentRegion.getRegionName());
       // increment the count of regions we've marked
       regionsClosed++;
     }
@@ -564,15 +563,6 @@
   }
 
   /**
-   * Remove a region from the region state map.
-   * 
-   * @param info
-   */
-  public void removeRegion(HRegionInfo info) {
-    regionsInTransition.remove(info.getRegionName());
-  }
-  
-  /**
    * Get metaregion that would host passed in row.
    * @param row Row need to know all the meta regions for
    * @return set of MetaRegion objects that contain the table
@@ -663,6 +653,53 @@
     onlineMetaRegions.remove(startKey); 
   }
   
+  /**
+   * Remove a region from the region state map.
+   * 
+   * @param info
+   */
+  public void removeRegion(HRegionInfo info) {
+    regionsInTransition.remove(info.getRegionName());
+  }
+  
+  /**
+   * @param regionName
+   * @return true if the named region is in a transition state
+   */
+  public boolean regionIsInTransition(byte[] regionName) {
+    return regionsInTransition.containsKey(regionName);
+  }
+
+  /**
+   * @param regionName
+   * @return true if the region is unassigned, pendingOpen or open
+   */
+  public boolean regionIsOpening(byte[] regionName) {
+    RegionState state = regionsInTransition.get(regionName);
+    if (state != null) {
+      return state.isOpening();
+    }
+    return false;
+  }
+
+  /** 
+   * Set a region to unassigned 
+   * @param info Region to set unassigned
+   * @param force if true mark region unassigned whatever its current state
+   */
+  public void setUnassigned(HRegionInfo info, boolean force) {
+    synchronized(this.regionsInTransition) {
+      RegionState s = regionsInTransition.get(info.getRegionName());
+      if (s == null) {
+        s = new RegionState(info);
+        regionsInTransition.put(info.getRegionName(), s);
+      }
+      if (force || (!s.isPendingOpen() && !s.isOpen())) {
+        s.setUnassigned();
+      }
+    }
+  }
+  
   /** 
    * Check if a region is on the unassigned list
    * @param info HRegionInfo to check for
@@ -681,45 +718,35 @@
   }
   
   /**
-   * Check if a region is pending 
+   * Check if a region has been assigned and we're waiting for a response from
+   * the region server.
+   * 
    * @param regionName name of the region
-   * @return true if pending, false otherwise
+   * @return true if open, false otherwise
    */
-  public boolean isPending(byte [] regionName) {
+  public boolean isPendingOpen(byte [] regionName) {
     synchronized (regionsInTransition) {
       RegionState s = regionsInTransition.get(regionName);
       if (s != null) {
-        return s.isPending();
+        return s.isPendingOpen();
       }
     }
     return false;
   }
 
   /**
-   * @param hri
-   * @return True if the passed region is assignable: i.e. not assigned, not
-   * pending and not unassigned.
-   */
-  public boolean assignable(final HRegionInfo hri) {
-    return !isUnassigned(hri) &&
-    !isPending(hri.getRegionName()) &&
-    !isAssigned(hri.getRegionName());
-  }
-  
-  /**
+   * Region has been assigned to a server and the server has told us it is open
    * @param regionName
-   * @return true if region has been assigned
    */
-  public boolean isAssigned(byte[] regionName) {
+  public void setOpen(byte [] regionName) {
     synchronized (regionsInTransition) {
       RegionState s = regionsInTransition.get(regionName);
       if (s != null) {
-        return s.isAssigned() || s.isPending();
+        s.setOpen();
       }
     }
-    return false;
   }
-
+  
   /**
    * @param regionName
    * @return true if region is marked to be offlined.
@@ -735,33 +762,20 @@
   }
 
   /** 
-   * Set a region to unassigned 
-   * @param info Region to set unassigned
-   * @param force if true mark region unassigned whatever its current state
+   * Mark a region as closing 
+   * @param serverName
+   * @param regionInfo
+   * @param setOffline
    */
-  public void setUnassigned(HRegionInfo info, boolean force) {
-    synchronized(this.regionsInTransition) {
-      RegionState s = regionsInTransition.get(info.getRegionName());
+  public void setClosing(final String serverName, final HRegionInfo regionInfo,
+      final boolean setOffline) {
+    synchronized (this.regionsInTransition) {
+      RegionState s = this.regionsInTransition.get(regionInfo.getRegionName());
       if (s == null) {
-        s = new RegionState(info);
-        regionsInTransition.put(info.getRegionName(), s);
-      }
-      if (force || (!s.isAssigned() && !s.isPending())) {
-        s.setUnassigned();
-      }
-    }
-  }
-  
-  /**
-   * Set a region to pending assignment 
-   * @param regionName
-   */
-  public void setPending(byte [] regionName) {
-    synchronized (regionsInTransition) {
-      RegionState s = regionsInTransition.get(regionName);
-      if (s != null) {
-        s.setPending();
+        s = new RegionState(regionInfo);
       }
+      s.setClosing(serverName, setOffline);
+      this.regionsInTransition.put(regionInfo.getRegionName(), s);
     }
   }
   
@@ -776,7 +790,7 @@
     Set<HRegionInfo> result = new HashSet<HRegionInfo>();
     synchronized (regionsInTransition) {
       for (RegionState s: regionsInTransition.values()) {
-        if (s.isClosing() && !s.isClosed() &&
+        if (s.isClosing() && !s.isPendingClose() && !s.isClosed() &&
             s.getServerName().compareTo(serverName) == 0) {
           result.add(s.getRegionInfo());
         }
@@ -785,55 +799,18 @@
     return result;
   }
   
-  /** 
-   * Check if a region is closing 
-   * @param regionName 
-   * @return true if the region is marked as closing, false otherwise
+  /**
+   * Called when we have told a region server to close the region
+   * 
+   * @param regionName
    */
-  public boolean isClosing(byte [] regionName) {
+  public void setPendingClose(byte[] regionName) {
     synchronized (regionsInTransition) {
       RegionState s = regionsInTransition.get(regionName);
       if (s != null) {
-        return s.isClosing();
+        s.setPendingClose();
       }
     }
-    return false;
-  }
-
-  /** 
-   * Mark a region as closing 
-   * @param serverName
-   * @param regionInfo
-   * @param setOffline
-   */
-  public void setClosing(final String serverName, final HRegionInfo regionInfo,
-      final boolean setOffline) {
-    setClosing(serverName, regionInfo, setOffline, true);
-  }
-
-  /**
-   * Mark a region as closing 
-   * @param serverName
-   * @param regionInfo
-   * @param setOffline
-   * @param check False if we are to skip state transition check.
-   */
-  void setClosing(final String serverName, final HRegionInfo regionInfo,
-      final boolean setOffline, final boolean check) {
-    synchronized (this.regionsInTransition) {
-      RegionState s = this.regionsInTransition.get(regionInfo.getRegionName());
-      if (check && s != null) {
-        if (!s.isClosing()) {
-          throw new IllegalStateException(
-            "Cannot transition to closing from any other state. Region: " +
-            Bytes.toString(regionInfo.getRegionName()));
-        }
-        return;
-      }
-      s = new RegionState(regionInfo);
-      s.setClosing(serverName, setOffline);
-      this.regionsInTransition.put(regionInfo.getRegionName(), s);
-    }
   }
   
   /**
@@ -1057,27 +1034,28 @@
   private static class RegionState implements Comparable<RegionState> {
     private final HRegionInfo regionInfo;
     private volatile boolean unassigned = false;
-    private volatile boolean assigned = false;
-    private volatile boolean pending = false;
+    private volatile boolean pendingOpen = false;
+    private volatile boolean open = false;
     private volatile boolean closing = false;
+    private volatile boolean pendingClose = false;
     private volatile boolean closed = false;
     private volatile boolean offlined = false;
     
-    /* Set when region is assigned.
-     */
-    private String serverName = null;
+    /* Set when region is assigned or closing */
+    private volatile String serverName = null;
 
+    /* Constructor */
     RegionState(HRegionInfo info) {
       this.regionInfo = info;
     }
     
-    byte [] getRegionName() {
-      return this.regionInfo.getRegionName();
-    }
-
     synchronized HRegionInfo getRegionInfo() {
       return this.regionInfo;
     }
+    
+    synchronized byte [] getRegionName() {
+      return this.regionInfo.getRegionName();
+    }
 
     /*
      * @return Server this region was assigned to
@@ -1085,7 +1063,17 @@
     synchronized String getServerName() {
       return this.serverName;
     }
-    
+
+    /*
+     * @return true if the region is being opened
+     */
+    synchronized boolean isOpening() {
+      return this.unassigned || this.pendingOpen || this.open;
+    }
+
+    /*
+     * @return true if region is unassigned
+     */
     synchronized boolean isUnassigned() {
       return unassigned;
     }
@@ -1097,46 +1085,55 @@
      */
     synchronized void setUnassigned() {
       this.unassigned = true;
-      this.assigned = false;
-      this.pending = false;
+      this.pendingOpen = false;
+      this.open = false;
       this.closing = false;
+      this.pendingClose = false;
+      this.closed = false;
+      this.offlined = false;
       this.serverName = null;
     }
 
-    synchronized boolean isAssigned() {
-      return assigned;
+    synchronized boolean isPendingOpen() {
+      return pendingOpen;
     }
 
     /*
      * @param serverName Server region was assigned to.
      */
-    synchronized void setAssigned(final String serverName) {
+    synchronized void setPendingOpen(final String serverName) {
       if (!this.unassigned) {
         throw new IllegalStateException(
-            "Cannot assign a region that is not currently unassigned. " +
-            "State: " + toString());
+            "Cannot assign a region that is not currently unassigned. State: " +
+            toString());
       }
       this.unassigned = false;
-      this.assigned = true;
-      this.pending = false;
+      this.pendingOpen = true;
+      this.open = false;
       this.closing = false;
+      this.pendingClose = false;
+      this.closed = false;
+      this.offlined = false;
       this.serverName = serverName;
     }
 
-    synchronized boolean isPending() {
-      return pending;
+    synchronized boolean isOpen() {
+      return open;
     }
 
-    synchronized void setPending() {
-      if (!assigned) {
+    synchronized void setOpen() {
+      if (!pendingOpen) {
         throw new IllegalStateException(
-            "Cannot set a region as pending if it has not been assigned. " +
-            "State: " + toString());
+            "Cannot set a region as open if it has not been pending. State: " +
+            toString());
       }
       this.unassigned = false;
-      this.assigned = false;
-      this.pending = true;
+      this.pendingOpen = false;
+      this.open = true;
       this.closing = false;
+      this.pendingClose = false;
+      this.closed = false;
+      this.offlined = false;
     }
 
     synchronized boolean isClosing() {
@@ -1145,24 +1142,48 @@
 
     synchronized void setClosing(String serverName, boolean setOffline) {
       this.unassigned = false;
-      this.assigned = false;
-      this.pending = false;
+      this.pendingOpen = false;
+      this.open = false;
       this.closing = true;
+      this.pendingClose = false;
+      this.closed = false;
       this.offlined = setOffline;
       this.serverName = serverName;
     }
     
+    synchronized boolean isPendingClose() {
+      return this.pendingClose;
+    }
+
+    synchronized void setPendingClose() {
+      if (!closing) {
+        throw new IllegalStateException(
+            "Cannot set a region as pending close if it has not been closing. " +
+            "State: " + toString());
+      }
+      this.unassigned = false;
+      this.pendingOpen = false;
+      this.open = false;
+      this.closing = false;
+      this.pendingClose = true;
+      this.closed = false;
+    }
+
     synchronized boolean isClosed() {
       return this.closed;
     }
     
     synchronized void setClosed() {
-      if (!closing) {
+      if (!pendingClose) {
         throw new IllegalStateException(
             "Cannot set a region to be closed if it was not already marked as" +
-            " closing. State: " + toString());
+            " pending close. State: " + toString());
       }
+      this.unassigned = false;
+      this.pendingOpen = false;
+      this.open = false;
       this.closing = false;
+      this.pendingClose = false;
       this.closed = true;
     }
     
@@ -1172,11 +1193,14 @@
 
     @Override
     public synchronized String toString() {
-      return "name=" + Bytes.toString(getRegionName()) +
-          ", isUnassigned=" + this.unassigned + ", isAssigned=" +
-          this.assigned + ", isPending=" + this.pending + ", isClosing=" +
-          this.closing + ", isClosed=" + this.closed + ", isOfflined=" +
-          this.offlined;
+      return ("name=" + Bytes.toString(getRegionName()) +
+          ", unassigned=" + this.unassigned +
+          ", pendingOpen=" + this.pendingOpen +
+          ", open=" + this.open +
+          ", closing=" + this.closing +
+          ", pendingClose=" + this.pendingClose +
+          ", closed=" + this.closed +
+          ", offlined=" + this.offlined);
     }
     
     @Override

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RootScanner.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RootScanner.java?rev=732908&r1=732907&r2=732908&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RootScanner.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RootScanner.java Thu Jan  8 17:48:25 2009
@@ -30,10 +30,9 @@
    * Constructor
    * 
    * @param master
-   * @param regionManager
    */
-  public RootScanner(HMaster master, RegionManager regionManager) {
-    super(master, regionManager, true, master.metaRescanInterval, master.closed);
+  public RootScanner(HMaster master) {
+    super(master, true, master.metaRescanInterval, master.closed);
   }
 
   /*

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ServerManager.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ServerManager.java?rev=732908&r1=732907&r2=732908&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ServerManager.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ServerManager.java Thu Jan  8 17:48:25 2009
@@ -126,7 +126,7 @@
       }
       // May be on list of dead servers.  If so, wait till we've cleared it.
       String addr = serverInfo.getServerAddress().toString();
-      if (isDead(addr) && !isDeadServerLogsSplit(addr)) {
+      if (isDead(addr)) {
         LOG.debug("Waiting on " + addr + " removal from dead list before " +
           "processing report-for-duty request");
         sleepTime = this.master.threadWakeFrequency;
@@ -308,13 +308,15 @@
               synchronized (master.regionManager) {
                 if (info.isRootRegion()) {
                   master.regionManager.reassignRootRegion();
-                } else if (info.isMetaTable()) {
-                  master.regionManager.offlineMetaRegion(info.getStartKey());
-                }
-                if (!master.regionManager.isOfflined(info.getRegionName())) {
-                  master.regionManager.setUnassigned(info, true);
                 } else {
-                  master.regionManager.removeRegion(info);
+                  if (info.isMetaTable()) {
+                    master.regionManager.offlineMetaRegion(info.getStartKey());
+                  }
+                  if (!master.regionManager.isOfflined(info.getRegionName())) {
+                    master.regionManager.setUnassigned(info, true);
+                  } else {
+                    master.regionManager.removeRegion(info);
+                  }
                 }
               }
             }
@@ -419,7 +421,7 @@
       for (HRegionInfo i: master.regionManager.getMarkedToClose(serverName)) {
         returnMsgs.add(new HMsg(HMsg.Type.MSG_REGION_CLOSE, i));
         // Transition the region from toClose to closing state
-        master.regionManager.setClosed(i.getRegionName());
+        master.regionManager.setPendingClose(i.getRegionName());
       }
 
       // Figure out what the RegionServer ought to do, and write back.
@@ -472,7 +474,7 @@
     boolean duplicateAssignment = false;
     synchronized (master.regionManager) {
       if (!master.regionManager.isUnassigned(region) &&
-          !master.regionManager.isAssigned(region.getRegionName())) {
+          !master.regionManager.isPendingOpen(region.getRegionName())) {
         if (region.isRootRegion()) {
           // Root region
           HServerAddress rootServer = master.getRootRegionLocation();
@@ -489,7 +491,7 @@
           // Not root region. If it is not a pending region, then we are
           // going to treat it as a duplicate assignment, although we can't 
           // tell for certain that's the case.
-          if (master.regionManager.isPending(region.getRegionName())) {
+          if (master.regionManager.isPendingOpen(region.getRegionName())) {
             // A duplicate report from the correct server
             return;
           }
@@ -523,7 +525,7 @@
         } else {
           // Note that the table has been assigned and is waiting for the
           // meta table to be updated.
-          master.regionManager.setPending(region.getRegionName());
+          master.regionManager.setOpen(region.getRegionName());
           // Queue up an update to note the region location.
           try {
             master.toDoQueue.put(
@@ -564,9 +566,10 @@
       //       the messages we've received. In this case, a close could be
       //       processed before an open resulting in the master not agreeing on
       //       the region's state.
+      master.regionManager.setClosed(region.getRegionName());
       try {
-        master.toDoQueue.put(new ProcessRegionClose(master, region, offlineRegion,
-            reassignRegion));
+        master.toDoQueue.put(new ProcessRegionClose(master, region,
+            offlineRegion, reassignRegion));
       } catch (InterruptedException e) {
         throw new RuntimeException("Putting into toDoQueue was interrupted.", e);
       }
@@ -580,11 +583,11 @@
     // Only cancel lease and update load information once.
     // This method can be called a couple of times during shutdown.
     if (info != null) {
+      LOG.info("Cancelling lease for " + serverName);
       if (master.getRootRegionLocation() != null &&
         info.getServerAddress().equals(master.getRootRegionLocation())) {
-        master.regionManager.reassignRootRegion();
+        master.regionManager.unsetRootRegion();
       }
-      LOG.info("Cancelling lease for " + serverName);
       try {
         serverLeases.cancelLease(serverName);
       } catch (LeaseException e) {
@@ -789,21 +792,4 @@
   public boolean isDead(String serverName) {
     return deadServers.containsKey(serverName);
   }
-
-  /**
-   * @param serverName
-   * @return True if this is a dead server and it has had its logs split.
-   */
-  public boolean isDeadServerLogsSplit(final String serverName) {
-    Boolean b = this.deadServers.get(serverName);
-    return b == null? false: b.booleanValue();
-  }
-
-  /**
-   * Set that this deadserver has had its log split.
-   * @param serverName
-   */
-  public void setDeadServerLogsSplit(final String serverName) {
-    this.deadServers.put(serverName, Boolean.TRUE);
-  }
 }

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java?rev=732908&r1=732907&r2=732908&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java Thu Jan  8 17:48:25 2009
@@ -1075,7 +1075,7 @@
    * @throws IOException
    */
   public RowResult getClosestRowBefore(final byte [] row,
-    final byte [] columnFamily)
+      final byte [] columnFamily)
   throws IOException{
     // look across all the HStores for this region and determine what the
     // closest key is across all column families, since the data may be sparse
@@ -1084,20 +1084,22 @@
     splitsAndClosesLock.readLock().lock();
     try {
       HStore store = getStore(columnFamily);
-      // get the closest key
+      // get the closest key. (HStore.getRowKeyAtOrBefore can return null)
       byte [] closestKey = store.getRowKeyAtOrBefore(row);
-      // If it happens to be an exact match, we can stop looping.
+      // If it happens to be an exact match, we can stop.
       // Otherwise, we need to check if it's the max and move to the next
-      if (HStoreKey.equalsTwoRowKeys(regionInfo, row, closestKey)) {
-        key = new HStoreKey(closestKey, this.regionInfo);
-      } else if (closestKey != null &&
-          (key == null || HStoreKey.compareTwoRowKeys(
-              regionInfo,closestKey, key.getRow()) > 0) ) {
-        key = new HStoreKey(closestKey, this.regionInfo);
-      } else {
+      if (closestKey != null) {
+        if (HStoreKey.equalsTwoRowKeys(regionInfo, row, closestKey)) {
+          key = new HStoreKey(closestKey, this.regionInfo);
+        }
+        if (key == null) {
+          key = new HStoreKey(closestKey, this.regionInfo);
+        }
+      }
+      if (key == null) {
         return null;
       }
-      
+
       // Now that we've found our key, get the values
       HbaseMapWritable<byte [], Cell> cells =
         new HbaseMapWritable<byte [], Cell>();
@@ -1299,7 +1301,10 @@
    * 
    * @param b the update to apply
    * @param expectedValues the expected values to check
+   * @param lockid
    * @param writeToWAL whether or not to write to the write ahead log
+   * @return true if update was applied
+   * @throws IOException
    */
   public boolean checkAndSave(BatchUpdate b,
     HbaseMapWritable<byte[], byte[]> expectedValues, Integer lockid,
@@ -1979,7 +1984,7 @@
             // Need to replicate filters.
             // At least WhileMatchRowFilter will mess up the scan if only
             // one shared across many rows. See HADOOP-2467.
-            f = (RowFilterInterface)WritableUtils.clone(filter, conf);
+            f = WritableUtils.clone(filter, conf);
           }
           scanners[i] = stores[i].getScanner(timestamp,
               columns.toArray(new byte[columns.size()][]), firstRow, f);

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=732908&r1=732907&r2=732908&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java Thu Jan  8 17:48:25 2009
@@ -535,6 +535,7 @@
   /**
    * Run and wait on passed thread in HRS context.
    * @param t
+   * @param dfsShutdownWait
    */
   public void runThread(final Thread t, final long dfsShutdownWait) {
     if (t ==  null) {
@@ -728,6 +729,7 @@
    * Thread for toggling safemode after some configurable interval.
    */
   private class SafeModeThread extends Thread {
+    @Override
     public void run() {
       // first, wait the required interval before turning off safemode
       int safemodeInterval =