You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ji...@apache.org on 2008/12/24 02:06:55 UTC

svn commit: r729186 - in /hadoop/hbase/trunk: ./ src/java/org/apache/hadoop/hbase/client/ src/java/org/apache/hadoop/hbase/master/

Author: jimk
Date: Tue Dec 23 17:06:54 2008
New Revision: 729186

URL: http://svn.apache.org/viewvc?rev=729186&view=rev
Log:
HBASE-543,  HBASE-1046, HBase-1051 A region's state is kept in several places in the master opening the possibility for race conditions

Modified:
    hadoop/hbase/trunk/CHANGES.txt
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/HConnectionManager.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/ServerConnection.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/BaseScanner.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ChangeTableState.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/HMaster.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/MetaScanner.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessRegionClose.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessRegionOpen.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ServerManager.java

Modified: hadoop/hbase/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/CHANGES.txt?rev=729186&r1=729185&r2=729186&view=diff
==============================================================================
--- hadoop/hbase/trunk/CHANGES.txt (original)
+++ hadoop/hbase/trunk/CHANGES.txt Tue Dec 23 17:06:54 2008
@@ -115,6 +115,8 @@
    HBASE-1079  Dumb NPE in ServerCallable hides the RetriesExhausted exception
    HBASE-782   The DELETE key in the hbase shell deletes the wrong character
                (Tim Sell via Stack)
+   HBASE-543,  HBASE-1046, HBase-1051 A region's state is kept in several places
+               in the master opening the possibility for race conditions
 
   IMPROVEMENTS
    HBASE-901   Add a limit to key length, check key and value length on client side

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/HConnectionManager.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/HConnectionManager.java?rev=729186&r1=729185&r2=729186&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/HConnectionManager.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/HConnectionManager.java Tue Dec 23 17:06:54 2008
@@ -177,7 +177,15 @@
       return this.pause * HConstants.RETRY_BACKOFF[ntries];
     }
 
+    public void unsetRootRegionLocation() {
+      this.rootRegionLocation = null;
+    }
+    
     public void setRootRegionLocation(HRegionLocation rootRegion) {
+      if (rootRegion == null) {
+        throw new IllegalArgumentException(
+            "Cannot set root region location to null.");
+      }
       this.rootRegionLocation = rootRegion;
     }
     

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/ServerConnection.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/ServerConnection.java?rev=729186&r1=729185&r2=729186&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/ServerConnection.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/ServerConnection.java Tue Dec 23 17:06:54 2008
@@ -33,4 +33,10 @@
    * @param rootRegion
    */
   public void setRootRegionLocation(HRegionLocation rootRegion);
+  
+  /**
+   * Unset the root region location in the connection. Called by 
+   * ServerManager.processRegionClose.
+   */
+  public void unsetRootRegionLocation();
 }

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/BaseScanner.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/BaseScanner.java?rev=729186&r1=729185&r2=729186&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/BaseScanner.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/BaseScanner.java Tue Dec 23 17:06:54 2008
@@ -337,79 +337,85 @@
     final String serverName, final long startCode) 
   throws IOException {
     
-    // Skip region - if ...
-    if(info.isOffline()                                 // offline
-      || regionManager.isClosing(info.getRegionName())) { // queued for offline
-
-      regionManager.noLongerUnassigned(info);
-      regionManager.noLongerPending(info.getRegionName());
-      return;
-    }
-    HServerInfo storedInfo = null;
-    boolean deadServer = false;
-    if (serverName.length() != 0) {
-      
-      if (regionManager.isMarkedToClose(serverName, info.getRegionName())) {
-        // Skip if region is on kill list
-        if(LOG.isDebugEnabled()) {
-          LOG.debug("not assigning region (on kill list): " +
-            info.getRegionNameAsString());
-        }
+    synchronized (regionManager) {
+      // Skip region - if ...
+      if(info.isOffline()                                 // offline
+          || regionManager.isOfflined(info.getRegionName())) { // queued for offline
+
+        regionManager.removeRegion(info);
         return;
       }
-      
-      storedInfo = master.serverManager.getServerInfo(serverName);
-      deadServer = master.serverManager.isDead(serverName);
-    }
+      HServerInfo storedInfo = null;
+      boolean deadServer = false;
+      if (serverName.length() != 0) {
 
-    /*
-     * If the server is a dead server or its startcode is off -- either null
-     * or doesn't match the start code for the address -- then add it to the
-     * list of unassigned regions IF not already there (or pending open).
-     */ 
-    if (!deadServer && !regionManager.isUnassigned(info) &&
-          !regionManager.isPending(info.getRegionName())
-        && (storedInfo == null || storedInfo.getStartCode() != startCode)) {
-      // The current assignment is invalid
-      if (LOG.isDebugEnabled()) {
-        LOG.debug("Current assignment of " +
-          info.getRegionNameAsString() +
-          " is not valid: serverInfo: " + storedInfo + ", passed startCode: " +
-          startCode + ", storedInfo.startCode: " +
-          ((storedInfo != null)? storedInfo.getStartCode(): -1) +
-          ", unassignedRegions: " + 
-          regionManager.isUnassigned(info) +
-          ", pendingRegions: " +
-          regionManager.isPending(info.getRegionName()));
+        if (regionManager.isOfflined(info.getRegionName())) {
+          // Skip if region is on kill list
+          if(LOG.isDebugEnabled()) {
+            LOG.debug("not assigning region (on kill list): " +
+                info.getRegionNameAsString());
+          }
+          return;
+        }
+
+        storedInfo = master.serverManager.getServerInfo(serverName);
+        deadServer = master.serverManager.isDead(serverName);
       }
-      // Recover the region server's log if there is one.
-      // This is only done from here if we are restarting and there is stale
-      // data in the meta region. Once we are on-line, dead server log
-      // recovery is handled by lease expiration and ProcessServerShutdown
-      if (!regionManager.isInitialMetaScanComplete() && serverName.length() != 0) {
-        StringBuilder dirName = new StringBuilder("log_");
-        dirName.append(serverName.replace(":", "_"));
-        Path logDir = new Path(master.rootdir, dirName.toString());
-        try {
-          if (master.fs.exists(logDir)) {
-            regionManager.splitLogLock.lock();
-            try {
-              HLog.splitLog(master.rootdir, logDir, master.fs,
-                master.getConfiguration());
-            } finally {
-              regionManager.splitLogLock.unlock();
+
+      /*
+       * If the server is a dead server or its startcode is off -- either null
+       * or doesn't match the start code for the address -- then add it to the
+       * list of unassigned regions IF not already there (or pending open).
+       */ 
+      if ((deadServer || 
+          (storedInfo == null || storedInfo.getStartCode() != startCode)) &&
+          (!regionManager.isUnassigned(info) &&
+              !regionManager.isPending(info.getRegionName()) &&
+              !regionManager.isAssigned(info.getRegionName()))) {
+
+        // The current assignment is invalid
+
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("Current assignment of " +
+              info.getRegionNameAsString() +
+              " is not valid." +
+              (storedInfo == null ? " Server '" + serverName + "' unknown." :
+                " serverInfo: " + storedInfo + ", passed startCode: " +
+                startCode + ", storedInfo.startCode: " + storedInfo.getStartCode()) +
+          " Region is not unassigned, assigned or pending");
+        }
+
+        // Recover the region server's log if there is one.
+        // This is only done from here if we are restarting and there is stale
+        // data in the meta region. Once we are on-line, dead server log
+        // recovery is handled by lease expiration and ProcessServerShutdown
+
+        if (!regionManager.isInitialMetaScanComplete() &&
+            serverName.length() != 0) {
+          StringBuilder dirName = new StringBuilder("log_");
+          dirName.append(serverName.replace(":", "_"));
+          Path logDir = new Path(master.rootdir, dirName.toString());
+          try {
+            if (master.fs.exists(logDir)) {
+              regionManager.splitLogLock.lock();
+              try {
+                HLog.splitLog(master.rootdir, logDir, master.fs,
+                    master.getConfiguration());
+              } finally {
+                regionManager.splitLogLock.unlock();
+              }
             }
+            if (LOG.isDebugEnabled()) {
+              LOG.debug("Split " + logDir.toString());
+            }
+          } catch (IOException e) {
+            LOG.warn("unable to split region server log because: ", e);
+            throw e;
           }
-          if (LOG.isDebugEnabled()) {
-            LOG.debug("Split " + logDir.toString());
-          }
-        } catch (IOException e) {
-          LOG.warn("unable to split region server log because: ", e);
-          throw e;
         }
+        // Now get the region assigned
+        regionManager.setUnassigned(info, true);
       }
-      // Now get the region assigned
-      regionManager.setUnassigned(info);
     }
   }
   

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ChangeTableState.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ChangeTableState.java?rev=729186&r1=729185&r2=729186&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ChangeTableState.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ChangeTableState.java Tue Dec 23 17:06:54 2008
@@ -23,14 +23,12 @@
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Map;
-import java.util.TreeMap;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hbase.HRegionInfo;
 import org.apache.hadoop.hbase.ipc.HRegionInterface;
 import org.apache.hadoop.hbase.io.BatchUpdate;
-import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.Writables;
 
 /** Instantiated to enable or disable a table */
@@ -90,15 +88,18 @@
         LOG.debug("Updated columns in row: " + i.getRegionNameAsString());
       }
 
-      if (online) {
-        // Bring offline regions on-line
-        master.regionManager.noLongerClosing(i.getRegionName());
-        if (!master.regionManager.isUnassigned(i)) {
-          master.regionManager.setUnassigned(i);
+      synchronized (master.regionManager) {
+        if (online) {
+          // Bring offline regions on-line
+          if (!master.regionManager.isUnassigned(i) &&
+              !master.regionManager.isAssigned(i.getRegionName()) &&
+              !master.regionManager.isPending(i.getRegionName())) {
+            master.regionManager.setUnassigned(i, false);
+          }
+        } else {
+          // Prevent region from getting assigned.
+          master.regionManager.removeRegion(i);
         }
-      } else {
-        // Prevent region from getting assigned.
-        master.regionManager.noLongerUnassigned(i);
       }
     }
 
@@ -106,37 +107,23 @@
     if (LOG.isDebugEnabled()) {
       LOG.debug("processing regions currently being served");
     }
-    for (Map.Entry<String, HashSet<HRegionInfo>> e: servedRegions.entrySet()) {
-      String serverName = e.getKey();
-      if (online) {
-        LOG.debug("Already online");
-        continue;                             // Already being served
-      }
+    synchronized (master.regionManager) {
+      for (Map.Entry<String, HashSet<HRegionInfo>> e: servedRegions.entrySet()) {
+        String serverName = e.getKey();
+        if (online) {
+          LOG.debug("Already online");
+          continue;                             // Already being served
+        }
 
-      // Cause regions being served to be taken off-line and disabled
+        // Cause regions being served to be taken off-line and disabled
 
-      Map<byte [], HRegionInfo> localKillList =
-        new TreeMap<byte [], HRegionInfo>(Bytes.BYTES_COMPARATOR);
-      for (HRegionInfo i: e.getValue()) {
-        if (LOG.isDebugEnabled()) {
-          LOG.debug("adding region " + i.getRegionNameAsString() + " to kill list");
-        }
-        // this marks the regions to be closed
-        localKillList.put(i.getRegionName(), i);
-        // this marks the regions to be offlined once they are closed
-        master.regionManager.markRegionForOffline(i.getRegionName());
-      }
-      Map<byte [], HRegionInfo> killedRegions = 
-        master.regionManager.removeMarkedToClose(serverName);
-      if (killedRegions != null) {
-        localKillList.putAll(killedRegions);
-      }
-      if (localKillList.size() > 0) {
-        if (LOG.isDebugEnabled()) {
-          LOG.debug("inserted local kill list into kill list for server " +
-              serverName);
+        for (HRegionInfo i: e.getValue()) {
+          if (LOG.isDebugEnabled()) {
+            LOG.debug("adding region " + i.getRegionNameAsString() + " to kill list");
+          }
+          // this marks the regions to be closed
+          master.regionManager.setClosing(serverName, i, true);
         }
-        master.regionManager.markToCloseBulk(serverName, localKillList);
       }
     }
     servedRegions.clear();

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/HMaster.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/HMaster.java?rev=729186&r1=729185&r2=729186&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/HMaster.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/HMaster.java Tue Dec 23 17:06:54 2008
@@ -93,7 +93,8 @@
   
   static final Log LOG = LogFactory.getLog(HMaster.class.getName());
 
-  public long getProtocolVersion(String protocol, long clientVersion) {
+  public long getProtocolVersion(@SuppressWarnings("unused") String protocol,
+      @SuppressWarnings("unused") long clientVersion) {
     return HBaseRPCProtocolVersion.versionID;
   }
 
@@ -531,8 +532,7 @@
   /*
    * HMasterRegionInterface
    */
-  public MapWritable regionServerStartup(HServerInfo serverInfo)
-  throws IOException {
+  public MapWritable regionServerStartup(HServerInfo serverInfo) {
     // Set the address for now even tho it will not be persisted on
     // the HRS side.
     String rsAddress = HBaseServer.getRemoteAddress();
@@ -833,7 +833,7 @@
       LOG.info("Marking " + hri.getRegionNameAsString() +
         " as closed on " + servername + "; cleaning SERVER + STARTCODE; " +
           "master will tell regionserver to close region on next heartbeat");
-      this.regionManager.markToClose(servername, hri);
+      this.regionManager.setClosing(servername, hri, false);
       MetaRegion meta = this.regionManager.getMetaRegionForRow(regionname);
       HRegionInterface srvr = getMETAServer(meta);
       HRegion.cleanRegionInMETA(srvr, meta.getRegionName(), hri);

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/MetaScanner.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/MetaScanner.java?rev=729186&r1=729185&r2=729186&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/MetaScanner.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/MetaScanner.java Tue Dec 23 17:06:54 2008
@@ -154,10 +154,12 @@
    */
   synchronized boolean waitForMetaRegionsOrClose() {
     while (!master.closed.get()) {
-      if (regionManager.isInitialRootScanComplete() &&
-          regionManager.numMetaRegions() ==
-            regionManager.numOnlineMetaRegions()) {
-        break;
+      synchronized (master.regionManager) {
+        if (regionManager.isInitialRootScanComplete() &&
+            regionManager.numMetaRegions() ==
+              regionManager.numOnlineMetaRegions()) {
+          break;
+        }
       }
       try {
         wait(master.threadWakeFrequency);

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessRegionClose.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessRegionClose.java?rev=729186&r1=729185&r2=729186&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessRegionClose.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessRegionClose.java Tue Dec 23 17:06:54 2008
@@ -71,11 +71,11 @@
             // back on the toDoQueue
 
             if (metaRegionAvailable()) {
-              // offline the region in meta and then note that we've offlined
-              // the region. 
+              // offline the region in meta and then remove it from the
+              // set of regions in transition
               HRegion.offlineRegionInMETA(server, metaRegionName,
                   regionInfo);
-              master.regionManager.regionOfflined(regionInfo.getRegionName());
+              master.regionManager.removeRegion(regionInfo);
             }
             return true;
           }
@@ -84,7 +84,7 @@
 
     } else if (reassignRegion) {
       // we are reassigning the region eventually, so set it unassigned
-      master.regionManager.setUnassigned(regionInfo);
+      master.regionManager.setUnassigned(regionInfo, false);
     }
 
     return result == null ? true : result;

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessRegionOpen.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessRegionOpen.java?rev=729186&r1=729185&r2=729186&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessRegionOpen.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessRegionOpen.java Tue Dec 23 17:06:54 2008
@@ -94,26 +94,28 @@
             this.historian.online(this.master.getConfiguration());
           }
           this.historian.addRegionOpen(regionInfo, serverAddress);
-          if (isMetaTable) {
-            // It's a meta region.
-            MetaRegion m = new MetaRegion(new HServerAddress(serverAddress),
-                regionInfo.getRegionName(), regionInfo.getStartKey());
-            if (!master.regionManager.isInitialMetaScanComplete()) {
-              // Put it on the queue to be scanned for the first time.
-              LOG.debug("Adding " + m.toString() + " to regions to scan");
-              master.regionManager.addMetaRegionToScan(m);
-            } else {
-              // Add it to the online meta regions
-              LOG.debug("Adding to onlineMetaRegions: " + m.toString());
-              master.regionManager.putMetaRegionOnline(m);
-              // Interrupting the Meta Scanner sleep so that it can
-              // process regions right away
-              master.regionManager.metaScannerThread.interrupt();
+          synchronized (master.regionManager) {
+            if (isMetaTable) {
+              // It's a meta region.
+              MetaRegion m = new MetaRegion(new HServerAddress(serverAddress),
+                  regionInfo.getRegionName(), regionInfo.getStartKey());
+              if (!master.regionManager.isInitialMetaScanComplete()) {
+                // Put it on the queue to be scanned for the first time.
+                LOG.debug("Adding " + m.toString() + " to regions to scan");
+                master.regionManager.addMetaRegionToScan(m);
+              } else {
+                // Add it to the online meta regions
+                LOG.debug("Adding to onlineMetaRegions: " + m.toString());
+                master.regionManager.putMetaRegionOnline(m);
+                // Interrupting the Meta Scanner sleep so that it can
+                // process regions right away
+                master.regionManager.metaScannerThread.interrupt();
+              }
             }
+            // If updated successfully, remove from pending list.
+            master.regionManager.removeRegion(regionInfo);
+            return true;
           }
-          // If updated successfully, remove from pending list.
-          master.regionManager.noLongerPending(regionInfo.getRegionName());
-          return true;
         }
     }.doWithRetries();
     return result == null ? true : result;

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java?rev=729186&r1=729185&r2=729186&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ProcessServerShutdown.java Tue Dec 23 17:06:54 2008
@@ -113,7 +113,7 @@
         // region had been on shutdown server (could be null because we
         // missed edits in hlog because hdfs does not do write-append).
         String serverName = Writables.cellToString(values.get(COL_SERVER));
-        if (serverName.length() > 0 &&
+        if (serverName != null && serverName.length() > 0 &&
             deadServerName.compareTo(serverName) != 0) {
           // This isn't the server you're looking for - move along
           continue;
@@ -130,31 +130,33 @@
           continue;
         }
 
-        if (info.isMetaTable()) {
-          if (LOG.isDebugEnabled()) {
-            LOG.debug("removing meta region " +
-              Bytes.toString(info.getRegionName()) +
+        synchronized (master.regionManager) {
+          if (info.isMetaTable()) {
+            if (LOG.isDebugEnabled()) {
+              LOG.debug("removing meta region " +
+                  Bytes.toString(info.getRegionName()) +
               " from online meta regions");
+            }
+            master.regionManager.offlineMetaRegion(info.getStartKey());
           }
-          master.regionManager.offlineMetaRegion(info.getStartKey());
-        }
 
-        ToDoEntry todo = new ToDoEntry(row, info);
-        toDoList.add(todo);
+          ToDoEntry todo = new ToDoEntry(row, info);
+          toDoList.add(todo);
 
-        if (master.regionManager.isMarkedToClose(deadServerName, info.getRegionName())) {
-          master.regionManager.noLongerMarkedToClose(deadServerName, info.getRegionName());
-          master.regionManager.noLongerUnassigned(info);
-          // Mark region offline
-          todo.regionOffline = true;
-        } else {
-          if (!info.isOffline() && !info.isSplit()) {
-            // Get region reassigned
-            regions.add(info);
+          if (master.regionManager.isOfflined(info.getRegionName()) ||
+              info.isOffline()) {
+            master.regionManager.removeRegion(info);
+            // Mark region offline
+            if (!info.isOffline()) {
+              todo.regionOffline = true;
+            }
+          } else {
+            if (!info.isOffline() && !info.isSplit()) {
+              // Get region reassigned
+              regions.add(info);
+            }
           }
         }
-        // If it was pending, remove.
-        master.regionManager.noLongerPending(info.getRegionName());
       }
     } finally {
       if(scannerId != -1L) {
@@ -184,7 +186,7 @@
 
     // Get regions reassigned
     for (HRegionInfo info: regions) {
-      master.regionManager.setUnassigned(info);
+      master.regionManager.setUnassigned(info, true);
     }
   }
 
@@ -252,11 +254,11 @@
     }
     
     if (this.rootRegionServer && !this.rootRegionReassigned) {
+      // avoid multiple root region reassignment 
+      this.rootRegionReassigned = true;
       // The server that died was serving the root region. Now that the log
       // has been split, get it reassigned.
       master.regionManager.reassignRootRegion();
-      // avoid multiple root region reassignment 
-      this.rootRegionReassigned = true;
       // When we call rootAvailable below, it will put us on the delayed
       // to do queue to allow some time to pass during which the root 
       // region will hopefully get reassigned.
@@ -307,7 +309,6 @@
           Bytes.toString(r.getRegionName()) + " on " + r.getServer());
       }
     }
-    master.regionManager.allRegionsClosed(deadServerName);
     master.serverManager.removeDeadServer(deadServerName);
     return true;
   }

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java?rev=729186&r1=729185&r2=729186&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/RegionManager.java Tue Dec 23 17:06:54 2008
@@ -34,8 +34,6 @@
 import java.util.SortedMap;
 import java.util.TreeMap;
 import java.util.Collections;
-import java.util.TreeSet;
-import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentSkipListMap;
 
 import org.apache.commons.logging.Log;
@@ -80,45 +78,23 @@
 
   private static final byte[] OVERLOADED = Bytes.toBytes("Overloaded");
 
-  /**
-   * The 'unassignedRegions' table maps from a HRegionInfo to a timestamp that
-   * indicates the last time we *tried* to assign the region to a RegionServer.
-   * If the timestamp is out of date, then we can try to reassign it. 
+  /*
+   * Map of region name to RegionState for regions that are in transition such as
    * 
-   * We fill 'unassignedRecords' by scanning ROOT and META tables, learning the
-   * set of all known valid regions.
+   * unassigned -> assigned -> pending -> open
+   * closing -> closed -> offline
+   * closing -> closed -> unassigned -> assigned -> pending -> open
+   * 
+   * At the end of a transition, removeRegion is used to remove the region from
+   * the map (since it is no longer in transition)
+   * 
+   * Note: Needs to be SortedMap so we can specify a comparator
    * 
-   * <p>Items are removed from this list when a region server reports in that
-   * the region has been deployed.
-   *
-   * TODO: Need to be a sorted map?
-   */
-  private final SortedMap<HRegionInfo, Long> unassignedRegions =
-    Collections.synchronizedSortedMap(new TreeMap<HRegionInfo, Long>());
-
-  /**
-   * Regions that have been assigned, and the server has reported that it has
-   * started serving it, but that we have not yet recorded in the meta table.
-   */
-  private final Set<byte []> pendingRegions =
-    Collections.synchronizedSet(new TreeSet<byte []>(Bytes.BYTES_COMPARATOR));
-
-  /**
-   * List of regions that are going to be closed.
    */
-  private final Map<String, Map<byte [], HRegionInfo>> regionsToClose =
-    new ConcurrentHashMap<String, Map<byte [], HRegionInfo>>();
-
-  /** Regions that are in the process of being closed */
-  private final Set<byte []> closingRegions =
-    Collections.synchronizedSet(new TreeSet<byte []>(Bytes.BYTES_COMPARATOR));
+  private final SortedMap<byte[], RegionState> regionsInTransition =
+    Collections.synchronizedSortedMap(
+        new TreeMap<byte[], RegionState>(Bytes.BYTES_COMPARATOR));
 
-  /**
-   * Set of regions that, once closed, should be marked as offline so that they
-   * are not reassigned.
-   */
-  private final Set<byte []> regionsToOffline = 
-    Collections.synchronizedSet(new TreeSet<byte []>(Bytes.BYTES_COMPARATOR));
   // How many regions to assign a server at a time.
   private final int maxAssignInOneGo;
 
@@ -127,12 +103,12 @@
   private final float slop;
 
   /** Set of regions to split. */
-  private final Map<byte[],Pair<HRegionInfo,HServerAddress>> regionsToSplit = 
+  private final SortedMap<byte[],Pair<HRegionInfo,HServerAddress>> regionsToSplit = 
     Collections.synchronizedSortedMap(
       new TreeMap<byte[],Pair<HRegionInfo,HServerAddress>>
-        (Bytes.BYTES_COMPARATOR));
+      (Bytes.BYTES_COMPARATOR));
   /** Set of regions to compact. */
-  private final Map<byte[],Pair<HRegionInfo,HServerAddress>> regionsToCompact =
+  private final SortedMap<byte[],Pair<HRegionInfo,HServerAddress>> regionsToCompact =
     Collections.synchronizedSortedMap(
       new TreeMap<byte[],Pair<HRegionInfo,HServerAddress>>
       (Bytes.BYTES_COMPARATOR));
@@ -160,21 +136,31 @@
     Threads.setDaemonThreadRunning(metaScannerThread,
       "RegionManager.metaScanner");    
   }
-  
+
   void unsetRootRegion() {
-    rootRegionLocation.set(null);
+    synchronized (regionsInTransition) {
+      rootRegionLocation.set(null);
+      regionsInTransition.remove(HRegionInfo.ROOT_REGIONINFO.getRegionName());
+    }
   }
   
   void reassignRootRegion() {
     unsetRootRegion();
     if (!master.shutdownRequested) {
-      unassignedRegions.put(HRegionInfo.ROOT_REGIONINFO, ZERO_L);
+      synchronized (regionsInTransition) {
+        RegionState s = new RegionState(HRegionInfo.ROOT_REGIONINFO);
+        s.setUnassigned();
+        regionsInTransition.put(HRegionInfo.ROOT_REGIONINFO.getRegionName(), s);
+      }
     }
   }
   
   /*
    * Assigns regions to region servers attempting to balance the load across
    * all region servers
+   *
+   * Note that no synchronization is necessary as the caller 
+   * (ServerManager.processMsgs) already owns the monitor for the RegionManager.
    * 
    * @param info
    * @param serverName
@@ -183,53 +169,51 @@
   void assignRegions(HServerInfo info, String serverName,
     HRegionInfo[] mostLoadedRegions, ArrayList<HMsg> returnMsgs) {
     HServerLoad thisServersLoad = info.getLoad();
-    synchronized (unassignedRegions) {
-      // We need to hold a lock on assign attempts while we figure out what to
-      // do so that multiple threads do not execute this method in parallel
-      // resulting in assigning the same region to multiple servers.
-      
-      // figure out what regions need to be assigned and aren't currently being
-      // worked on elsewhere.
-      Set<HRegionInfo> regionsToAssign = regionsAwaitingAssignment();
-      if (regionsToAssign.size() == 0) {
-        // There are no regions waiting to be assigned.
-        if (!inSafeMode()) {
-          // We only do load balancing once all regions are assigned.
-          // This prevents churn while the cluster is starting up.
-          double avgLoad = master.serverManager.getAverageLoad();
-          double avgLoadWithSlop = avgLoad +
-            ((this.slop != 0)? avgLoad * this.slop: avgLoad);
-          if (avgLoad > 2.0 &&
-              thisServersLoad.getNumberOfRegions() > avgLoadWithSlop) {
-            if (LOG.isDebugEnabled()) {
-              LOG.debug("Server " + serverName +
+    // figure out what regions need to be assigned and aren't currently being
+    // worked on elsewhere.
+    Set<RegionState> regionsToAssign = regionsAwaitingAssignment();
+    if (regionsToAssign.size() == 0) {
+      // There are no regions waiting to be assigned.
+      if (!inSafeMode()) {
+        // We only do load balancing once all regions are assigned.
+        // This prevents churn while the cluster is starting up.
+        double avgLoad = master.serverManager.getAverageLoad();
+        double avgLoadWithSlop = avgLoad +
+        ((this.slop != 0)? avgLoad * this.slop: avgLoad);
+        if (avgLoad > 2.0 &&
+            thisServersLoad.getNumberOfRegions() > avgLoadWithSlop) {
+          if (LOG.isDebugEnabled()) {
+            LOG.debug("Server " + serverName +
                 " is overloaded. Server load: " + 
                 thisServersLoad.getNumberOfRegions() + " avg: " + avgLoad +
                 ", slop: " + this.slop);
-            }
-            unassignSomeRegions(thisServersLoad, avgLoad, mostLoadedRegions,
-              returnMsgs);
           }
+          unassignSomeRegions(serverName, thisServersLoad,
+              avgLoad, mostLoadedRegions, returnMsgs);
         }
+      }
+    } else {
+      // if there's only one server, just give it all the regions
+      if (master.serverManager.numServers() == 1) {
+        assignRegionsToOneServer(regionsToAssign, serverName, returnMsgs);
       } else {
-        // if there's only one server, just give it all the regions
-        if (master.serverManager.numServers() == 1) {
-          assignRegionsToOneServer(regionsToAssign, serverName, returnMsgs);
-        } else {
-          // otherwise, give this server a few regions taking into account the 
-          // load of all the other servers.
-         assignRegionsToMultipleServers(thisServersLoad, regionsToAssign, 
-           serverName, returnMsgs);
-        }
+        // otherwise, give this server a few regions taking into account the 
+        // load of all the other servers.
+        assignRegionsToMultipleServers(thisServersLoad, regionsToAssign, 
+            serverName, returnMsgs);
       }
     }
   }
   
-  /**
+  /*
    * Make region assignments taking into account multiple servers' loads.
+   *
+   * Note that no synchronization is needed while we iterate over
+   * regionsInTransition because this method is only called by assignRegions
+   * whose caller owns the monitor for RegionManager
    */ 
   private void assignRegionsToMultipleServers(final HServerLoad thisServersLoad,
-    final Set<HRegionInfo> regionsToAssign, final String serverName, 
+    final Set<RegionState> regionsToAssign, final String serverName, 
     final ArrayList<HMsg> returnMsgs) {
     
     int nRegionsToAssign = regionsToAssign.size();
@@ -274,14 +258,12 @@
         nregions = this.maxAssignInOneGo;
       }
       
-      long now = System.currentTimeMillis();
-      for (HRegionInfo regionInfo: regionsToAssign) {
-        LOG.info("assigning region " +
-          Bytes.toString(regionInfo.getRegionName())+
+      for (RegionState s: regionsToAssign) {
+        LOG.info("assigning region " + Bytes.toString(s.getRegionName())+
           " to server " + serverName);
-        unassignedRegions.put(regionInfo, Long.valueOf(now));
-        this.historian.addRegionAssignment(regionInfo, serverName);
-        returnMsgs.add(new HMsg(HMsg.Type.MSG_REGION_OPEN, regionInfo));
+        s.setAssigned(serverName);
+        this.historian.addRegionAssignment(s.getRegionInfo(), serverName);
+        returnMsgs.add(new HMsg(HMsg.Type.MSG_REGION_OPEN, s.getRegionInfo()));
         if (--nregions <= 0) {
           break;
         }
@@ -326,38 +308,38 @@
     return nRegions;
   }
   
-  /**
+  /*
    * Get the set of regions that should be assignable in this pass.
+   * 
+   * Note that no synchronization on regionsInTransition is needed because the
+   * only caller (assignRegions) whose caller owns the monitor for RegionManager
    */ 
-  private Set<HRegionInfo> regionsAwaitingAssignment() {
-    long now = System.currentTimeMillis();
-    
+  private Set<RegionState> regionsAwaitingAssignment() {
     // set of regions we want to assign to this server
-    Set<HRegionInfo> regionsToAssign = new HashSet<HRegionInfo>();
+    Set<RegionState> regionsToAssign = new HashSet<RegionState>();
     
     // Look over the set of regions that aren't currently assigned to 
     // determine which we should assign to this server.
-    synchronized (unassignedRegions) {          //must synchronize when iterating
-      for (Map.Entry<HRegionInfo, Long> e: unassignedRegions.entrySet()) {
-        HRegionInfo i = e.getKey();
-        if (numberOfMetaRegions.get() != onlineMetaRegions.size() &&
-            !i.isMetaRegion()) {
-          // Can't assign user regions until all meta regions have been assigned
-          // and are on-line
-          continue;
-        }
-        // If the last attempt to open this region was pretty recent, then we 
-        // don't want to try and assign it.
-        long diff = now - e.getValue().longValue();
-        if (diff > master.maxRegionOpenTime) {
-          regionsToAssign.add(e.getKey());
-        }
+    for (RegionState s: regionsInTransition.values()) {
+      HRegionInfo i = s.getRegionInfo();
+      if (i == null) {
+        continue;
+      }
+      if (numberOfMetaRegions.get() != onlineMetaRegions.size() &&
+          !i.isMetaRegion()) {
+        // Can't assign user regions until all meta regions have been assigned
+        // and are on-line
+        continue;
+      }
+      if (!s.isAssigned() && !s.isClosing() && !s.isPending()) {
+        s.setUnassigned();
+        regionsToAssign.add(s);
       }
     }
     return regionsToAssign;
   }
   
-  /**
+  /*
    * Figure out the load that is next highest amongst all regionservers. Also,
    * return how many servers exist at that load. 
    */
@@ -392,31 +374,37 @@
   
   /*
    * Assign all to the only server. An unlikely case but still possible.
+   * 
+   * Note that no synchronization is needed on regionsInTransition while
+   * iterating on it because the only caller is assignRegions whose caller owns 
+   * the monitor for RegionManager
+   * 
    * @param regionsToAssign
    * @param serverName
    * @param returnMsgs
    */
-  private void assignRegionsToOneServer(final Set<HRegionInfo> regionsToAssign,
+  private void assignRegionsToOneServer(final Set<RegionState> regionsToAssign,
       final String serverName, final ArrayList<HMsg> returnMsgs) {
-    long now = System.currentTimeMillis();
-    for (HRegionInfo regionInfo: regionsToAssign) {
-      LOG.info("assigning region " +
-          Bytes.toString(regionInfo.getRegionName()) +
+    for (RegionState s: regionsToAssign) {
+      LOG.info("assigning region " + Bytes.toString(s.getRegionName()) +
           " to the only server " + serverName);
-      unassignedRegions.put(regionInfo, Long.valueOf(now));
-      this.historian.addRegionAssignment(regionInfo, serverName);
-      returnMsgs.add(new HMsg(HMsg.Type.MSG_REGION_OPEN, regionInfo));
+      s.setAssigned(serverName);
+      this.historian.addRegionAssignment(s.getRegionInfo(), serverName);
+      returnMsgs.add(new HMsg(HMsg.Type.MSG_REGION_OPEN, s.getRegionInfo()));
     }
   }
   
-  /**
+  /*
    * The server checking in right now is overloaded. We will tell it to close
    * some or all of its most loaded regions, allowing it to reduce its load.
    * The closed regions will then get picked up by other underloaded machines.
+   *
+   * Note that no synchronization is needed because the only caller 
+   * (assignRegions) whose caller owns the monitor for RegionManager
    */
-  private synchronized void unassignSomeRegions(final HServerLoad load, 
-    final double avgLoad, final HRegionInfo[] mostLoadedRegions, 
-    ArrayList<HMsg> returnMsgs) {
+  private void unassignSomeRegions(final String serverName,
+      final HServerLoad load, final double avgLoad,
+      final HRegionInfo[] mostLoadedRegions, ArrayList<HMsg> returnMsgs) {
     
     int numRegionsToClose = load.getNumberOfRegions() - (int)Math.ceil(avgLoad);
     LOG.debug("Choosing to reassign " + numRegionsToClose 
@@ -425,7 +413,7 @@
     
     int regionIdx = 0;
     int regionsClosed = 0;
-    int skippedClosing = 0;
+    int skipped = 0;
     while (regionsClosed < numRegionsToClose &&
         regionIdx < mostLoadedRegions.length) {
       HRegionInfo currentRegion = mostLoadedRegions[regionIdx];
@@ -435,8 +423,10 @@
         continue;
       }
       
-      if (isClosing(currentRegion.getRegionName())) {
-        skippedClosing++;
+      byte[] regionName = currentRegion.getRegionName();
+      if (isClosing(regionName) || isUnassigned(currentRegion) ||
+          isAssigned(regionName) || isPending(regionName)) {
+        skipped++;
         continue;
       }
       
@@ -446,11 +436,11 @@
       returnMsgs.add(new HMsg(HMsg.Type.MSG_REGION_CLOSE, currentRegion,
         OVERLOADED));
       // mark the region as closing
-      setClosing(currentRegion.getRegionName());
+      setClosing(serverName, currentRegion, false);
       // increment the count of regions we've marked
       regionsClosed++;
     }
-    LOG.info("Skipped " + skippedClosing + " region(s) as already closing");
+    LOG.info("Skipped " + skipped + " region(s) that are in transition states");
   }
   
   /**
@@ -503,12 +493,10 @@
    * @return true if we found meta regions, false if we're closing.
    */
   public boolean areAllMetaRegionsOnline() {
-    boolean result = false;
-    if (rootRegionLocation.get() != null &&
-        numberOfMetaRegions.get() == onlineMetaRegions.size()) {
-      result = true;
+    synchronized (onlineMetaRegions) {
+      return (rootRegionLocation.get() != null &&
+          numberOfMetaRegions.get() == onlineMetaRegions.size());
     }
-    return result;
   }
   
   /**
@@ -528,7 +516,7 @@
           return onlineMetaRegions.get(newRegion.getRegionName());
         } 
         return onlineMetaRegions.get(onlineMetaRegions.headMap(
-          newRegion.getTableDesc().getName()).lastKey());
+            newRegion.getTableDesc().getName()).lastKey());
       }
     }
   }
@@ -569,6 +557,15 @@
   }
 
   /**
+   * Remove a region from the region state map.
+   * 
+   * @param info
+   */
+  public void removeRegion(HRegionInfo info) {
+    regionsInTransition.remove(info.getRegionName());
+  }
+  
+  /**
    * Get metaregion that would host passed in row.
    * @param row Row need to know all the meta regions for
    * @return set of MetaRegion objects that contain the table
@@ -611,7 +608,7 @@
     region.getLog().closeAndDelete();
 
     // 5. Get it assigned to a server
-    unassignedRegions.put(info, ZERO_L);
+    setUnassigned(info, true);
   }
   
   /** 
@@ -667,7 +664,13 @@
    * it happens to be between states.
    */
   public boolean isUnassigned(HRegionInfo info) {
-    return unassignedRegions.containsKey(info);
+    synchronized (regionsInTransition) {
+      RegionState s = regionsInTransition.get(info.getRegionName());
+      if (s != null) {
+        return s.isUnassigned();
+      }
+    }
+    return false;
   }
   
   /**
@@ -676,89 +679,71 @@
    * @return true if pending, false otherwise
    */
   public boolean isPending(byte [] regionName) {
-    return pendingRegions.contains(regionName);
-  }
-  
-  /** 
-   * Set a region to unassigned 
-   * @param info Region to set unassigned
-   */
-  public void setUnassigned(HRegionInfo info) {
-    synchronized(this.unassignedRegions) {
-      if (!this.unassignedRegions.containsKey(info) &&
-          !this.pendingRegions.contains(info.getRegionName())) {
-        this.unassignedRegions.put(info, ZERO_L);
+    synchronized (regionsInTransition) {
+      RegionState s = regionsInTransition.get(regionName);
+      if (s != null) {
+        return s.isPending();
       }
     }
+    return false;
   }
   
   /**
-   * Set a region to pending assignment 
    * @param regionName
+   * @return true if region has been assigned
    */
-  public void setPending(byte [] regionName) {
-    pendingRegions.add(regionName);
-  }
-  
-  /**
-   * Unset region's pending status 
-   * @param regionName 
-   */
-  public void noLongerPending(byte [] regionName) {
-    pendingRegions.remove(regionName);
+  public boolean isAssigned(byte[] regionName) {
+    synchronized (regionsInTransition) {
+      RegionState s = regionsInTransition.get(regionName);
+      if (s != null) {
+        return s.isAssigned() || s.isPending();
+      }
+    }
+    return false;
   }
-  
+
   /**
-   * Extend the update assignment deadline for a region.
-   * @param info Region whose deadline you want to extend
+   * @param regionName
+   * @return true if region is marked to be offlined.
    */
-  public void updateAssignmentDeadline(HRegionInfo info) {
-    synchronized (unassignedRegions) {
-      // Region server is reporting in that its working on region open
-      // (We can get more than one of these messages if region is replaying
-      // a bunch of edits and taking a while to open).
-      // Extend region open time by max region open time.
-      this.unassignedRegions.put(info,
-        Long.valueOf(System.currentTimeMillis() + this.master.maxRegionOpenTime));
+  public boolean isOfflined(byte[] regionName) {
+    synchronized (regionsInTransition) {
+      RegionState s = regionsInTransition.get(regionName);
+      if (s != null) {
+        return s.isOfflined();
+      }
     }
+    return false;
   }
-  
+
   /** 
-   * Unset a region's unassigned status 
-   * @param info Region you want to take off the unassigned list
-   */
-  public void noLongerUnassigned(HRegionInfo info) {
-    unassignedRegions.remove(info);
-  }
-  
-  /**
-   * Mark a region to be closed. Server manager will inform hosting region server
-   * to close the region at its next opportunity.
-   * @param serverName address info of server
-   * @param info region to close
+   * Set a region to unassigned 
+   * @param info Region to set unassigned
+   * @param force if true mark region unassigned whatever its current state
    */
-  public void markToClose(final String serverName, final HRegionInfo info) {
-    Map<byte [], HRegionInfo> toclose =
-      new TreeMap<byte [], HRegionInfo>(Bytes.BYTES_COMPARATOR);
-    toclose.put(info.getRegionName(), info);
-    markToCloseBulk(serverName, toclose);
+  public void setUnassigned(HRegionInfo info, boolean force) {
+    synchronized(this.regionsInTransition) {
+      RegionState s = regionsInTransition.get(info.getRegionName());
+      if (s == null) {
+        s = new RegionState(info);
+        regionsInTransition.put(info.getRegionName(), s);
+      }
+      if (force || (!s.isAssigned() && !s.isPending())) {
+        s.setUnassigned();
+      }
+    }
   }
   
   /**
-   * Mark a bunch of regions as to close at once for a server 
-   * @param serverName address info of server
-   * @param map map of region names to region infos of regions to close
+   * Set a region to pending assignment 
+   * @param regionName
    */
-  public void markToCloseBulk(final String serverName,
-      final Map<byte [], HRegionInfo> map) {
-    synchronized (regionsToClose) {
-      Map<byte [], HRegionInfo> regions = regionsToClose.get(serverName);
-      if (regions != null) {
-        regions.putAll(map);
-      } else {
-        regions = map;
+  public void setPending(byte [] regionName) {
+    synchronized (regionsInTransition) {
+      RegionState s = regionsInTransition.get(regionName);
+      if (s != null) {
+        s.setPending();
       }
-      regionsToClose.put(serverName, regions);
     }
   }
   
@@ -767,74 +752,71 @@
    * given server
    *  
    * @param serverName
-   * @return map of region names to region infos to close
+   * @return set of infos to close
    */
-  public Map<byte [], HRegionInfo> removeMarkedToClose(String serverName) {
-    return regionsToClose.remove(serverName);
-  }
-  
-  /**
-   * Check if a region is marked as to close
-   * @param serverName address info of server
-   * @param regionName name of the region we might want to close
-   * @return true if the region is marked to close, false otherwise
-   */
-  public boolean isMarkedToClose(String serverName, byte [] regionName) {
-    synchronized (regionsToClose) {
-      Map<byte [], HRegionInfo> serverToClose = regionsToClose.get(serverName);
-      return (serverToClose != null && serverToClose.containsKey(regionName));
-    }
-  }
-  
-  /**
-   * Mark a region as no longer waiting to be closed. Either it was closed or 
-   * we don't want to close it anymore for some reason.
-   * @param serverName address info of server
-   * @param regionName name of the region
-   */
-  public void noLongerMarkedToClose(String serverName, byte [] regionName) {
-    synchronized (regionsToClose) {
-      Map<byte [], HRegionInfo> serverToClose = regionsToClose.get(serverName);
-      if (serverToClose != null) {
-        serverToClose.remove(regionName);
+  public Set<HRegionInfo> getMarkedToClose(String serverName) {
+    Set<HRegionInfo> result = new HashSet<HRegionInfo>();
+    synchronized (regionsInTransition) {
+      for (RegionState s: regionsInTransition.values()) {
+        if (s.isClosing() && !s.isClosed() &&
+            s.getServerName().compareTo(serverName) == 0) {
+          result.add(s.getRegionInfo());
+        }
       }
     }
+    return result;
   }
   
-  /**
-   * Called when all regions for a particular server have been closed
-   * 
-   * @param serverName
-   */
-  public void allRegionsClosed(String serverName) {
-    regionsToClose.remove(serverName);
-  }
-
   /** 
    * Check if a region is closing 
    * @param regionName 
    * @return true if the region is marked as closing, false otherwise
    */
   public boolean isClosing(byte [] regionName) {
-    return closingRegions.contains(regionName);
+    synchronized (regionsInTransition) {
+      RegionState s = regionsInTransition.get(regionName);
+      if (s != null) {
+        return s.isClosing();
+      }
+    }
+    return false;
   }
 
   /** 
-   * Set a region as no longer closing (closed?) 
-   * @param regionName
+   * Mark a region as closing 
+   * @param serverName
+   * @param regionInfo
+   * @param setOffline
    */
-  public void noLongerClosing(byte [] regionName) {
-    closingRegions.remove(regionName);
+  public void setClosing(String serverName, HRegionInfo regionInfo,
+      boolean setOffline) {
+    synchronized (regionsInTransition) {
+      RegionState s = regionsInTransition.get(regionInfo.getRegionName());
+      if (s != null) {
+        if (!s.isClosing()) {
+          throw new IllegalStateException(
+              "Cannot transition to closing from any other state. Region: " +
+              Bytes.toString(regionInfo.getRegionName()));
+        }
+        return;
+      }
+      s = new RegionState(regionInfo);
+      regionsInTransition.put(regionInfo.getRegionName(), s);
+      s.setClosing(serverName, setOffline);
+    }
   }
   
-  /** 
-   * Mark a region as closing 
+  /**
    * @param regionName
    */
-  public void setClosing(byte [] regionName) {
-    closingRegions.add(regionName);
+  public void setClosed(byte[] regionName) {
+    synchronized (regionsInTransition) {
+      RegionState s = regionsInTransition.get(regionName);
+      if (s != null) {
+        s.setClosed();
+      }
+    }
   }
-  
   /**
    * Add a meta region to the scan queue
    * @param m MetaRegion that needs to get scanned
@@ -844,31 +826,6 @@
   }
   
   /** 
-   * Note that a region should be offlined as soon as its closed. 
-   * @param regionName
-   */
-  public void markRegionForOffline(byte [] regionName) {
-    regionsToOffline.add(regionName);
-  }
-  
-  /** 
-   * Check if a region is marked for offline 
-   * @param regionName
-   * @return true if marked for offline, false otherwise
-   */
-  public boolean isMarkedForOffline(byte [] regionName) {
-    return regionsToOffline.contains(regionName);
-  }
-  
-  /** 
-   * Region was offlined as planned, remove it from the list to offline 
-   * @param regionName
-   */
-  public void regionOfflined(byte [] regionName) {
-    regionsToOffline.remove(regionName);
-  }
-  
-  /** 
    * Check if the initial root scan has been completed.
    * @return true if scan completed, false otherwise
    */
@@ -890,8 +847,7 @@
    */
   public boolean inSafeMode() {
     if (safeMode) {
-      if(isInitialMetaScanComplete() && unassignedRegions.size() == 0 &&
-          pendingRegions.size() == 0) {
+      if(isInitialMetaScanComplete() && regionsInTransition.size() == 0) {
         safeMode = false;
         LOG.info("exiting safe mode");
       } else {
@@ -1017,30 +973,174 @@
    */
   public void applyActions(HServerInfo serverInfo, ArrayList<HMsg> returnMsgs) {
     HServerAddress addr = serverInfo.getServerAddress();
-    Iterator<Pair<HRegionInfo,HServerAddress>> i =
+    Iterator<Pair<HRegionInfo, HServerAddress>> i =
       regionsToCompact.values().iterator();
-    while (i.hasNext()) {
-      Pair<HRegionInfo,HServerAddress> pair = i.next();
-      if (addr.equals(pair.getSecond())) {
-        if (LOG.isDebugEnabled()) {
-          LOG.debug("sending MSG_REGION_COMPACT " + pair.getFirst() + " to " +
-              addr);
+    synchronized (regionsToCompact) {
+      while (i.hasNext()) {
+        Pair<HRegionInfo,HServerAddress> pair = i.next();
+        if (addr.equals(pair.getSecond())) {
+          if (LOG.isDebugEnabled()) {
+            LOG.debug("sending MSG_REGION_COMPACT " + pair.getFirst() + " to " +
+                addr);
+          }
+          returnMsgs.add(new HMsg(HMsg.Type.MSG_REGION_COMPACT, pair.getFirst()));
+          i.remove();
         }
-        returnMsgs.add(new HMsg(HMsg.Type.MSG_REGION_COMPACT, pair.getFirst()));
-        i.remove();
       }
     }
     i = regionsToSplit.values().iterator();
-    while (i.hasNext()) {
-      Pair<HRegionInfo,HServerAddress> pair = i.next();
-      if (addr.equals(pair.getSecond())) {
-        if (LOG.isDebugEnabled()) {
-          LOG.debug("sending MSG_REGION_SPLIT " + pair.getFirst() + " to " +
-              addr);
+    synchronized (regionsToSplit) {
+      while (i.hasNext()) {
+        Pair<HRegionInfo,HServerAddress> pair = i.next();
+        if (addr.equals(pair.getSecond())) {
+          if (LOG.isDebugEnabled()) {
+            LOG.debug("sending MSG_REGION_SPLIT " + pair.getFirst() + " to " +
+                addr);
+          }
+          returnMsgs.add(new HMsg(HMsg.Type.MSG_REGION_SPLIT, pair.getFirst()));
+          i.remove();
         }
-        returnMsgs.add(new HMsg(HMsg.Type.MSG_REGION_SPLIT, pair.getFirst()));
-        i.remove();
       }
     }
   }
+  
+  private static class RegionState implements Comparable<RegionState> {
+    private final byte[] regionName;
+    private HRegionInfo regionInfo = null;
+    private boolean unassigned = false;
+    private boolean assigned = false;
+    private boolean pending = false;
+    private boolean closing = false;
+    private boolean closed = false;
+    private boolean offlined = false;
+    private String serverName = null;
+    
+    RegionState(byte[] regionName) {
+      this.regionName = regionName;
+    }
+    
+    RegionState(HRegionInfo info) {
+      this.regionName = info.getRegionName();
+      this.regionInfo = info;
+    }
+    
+    byte[] getRegionName() {
+      return regionName;
+    }
+
+    synchronized HRegionInfo getRegionInfo() {
+      return this.regionInfo;
+    }
+    
+    synchronized String getServerName() {
+      return this.serverName;
+    }
+    
+    synchronized boolean isUnassigned() {
+      return unassigned;
+    }
+
+    /*
+     * Note: callers of this method (reassignRootRegion, 
+     * regionsAwaitingAssignment, setUnassigned) ensure that this method is not
+     * called unless it is safe to do so.
+     */
+    synchronized void setUnassigned() {
+      this.unassigned = true;
+      this.assigned = false;
+      this.pending = false;
+      this.closing = false;
+      this.serverName = null;
+    }
+
+    synchronized boolean isAssigned() {
+      return assigned;
+    }
+
+    synchronized void setAssigned(String serverName) {
+      if (!this.unassigned) {
+        throw new IllegalStateException(
+            "Cannot assign a region that is not currently unassigned. Region: " +
+            Bytes.toString(regionName));
+      }
+      this.unassigned = false;
+      this.assigned = true;
+      this.pending = false;
+      this.closing = false;
+      this.serverName = serverName;
+    }
+
+    synchronized boolean isPending() {
+      return pending;
+    }
+
+    synchronized void setPending() {
+      if (!assigned) {
+        throw new IllegalStateException(
+            "Cannot set a region as pending if it has not been assigned. Region: " +
+            Bytes.toString(regionName));
+      }
+      this.unassigned = false;
+      this.assigned = false;
+      this.pending = true;
+      this.closing = false;
+    }
+
+    synchronized boolean isClosing() {
+      return closing;
+    }
+
+    synchronized void setClosing(String serverName, boolean setOffline) {
+      this.unassigned = false;
+      this.assigned = false;
+      this.pending = false;
+      this.closing = true;
+      this.offlined = setOffline;
+      this.serverName = serverName;
+    }
+    
+    synchronized boolean isClosed() {
+      return this.closed;
+    }
+    
+    synchronized void setClosed() {
+      if (!closing) {
+        throw new IllegalStateException(
+            "Cannot set a region to be closed if it was not already marked as" +
+            " closing. Region: " + Bytes.toString(regionName));
+      }
+      this.closed = true;
+    }
+    
+    synchronized boolean isOfflined() {
+      return this.offlined;
+    }
+
+    @Override
+    public synchronized String toString() {
+      return "region name: " + Bytes.toString(this.regionName) +
+          ", isUnassigned: " + this.unassigned + ", isAssigned: " +
+          this.assigned + ", isPending: " + this.pending + ", isClosing: " +
+          this.closing + ", isClosed: " + this.closed + ", isOfflined: " +
+          this.offlined;
+    }
+    
+    @Override
+    public boolean equals(Object o) {
+      return this.compareTo((RegionState) o) == 0;
+    }
+    
+    @Override
+    public int hashCode() {
+      return Bytes.toString(regionName).hashCode();
+    }
+    
+    @Override
+    public int compareTo(RegionState o) {
+      if (o == null) {
+        return 1;
+      }
+      return Bytes.compareTo(this.regionName, o.getRegionName());
+    }
+  }
 }

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ServerManager.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ServerManager.java?rev=729186&r1=729185&r2=729186&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ServerManager.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/master/ServerManager.java Tue Dec 23 17:06:54 2008
@@ -260,14 +260,17 @@
             for (int i = 1; i < msgs.length; i++) {
               LOG.info("Processing " + msgs[i] + " from " + serverName);
               HRegionInfo info = msgs[i].getRegionInfo();
-              if (info.isRootRegion()) {
-                master.regionManager.reassignRootRegion();
-              } else if (info.isMetaTable()) {
-                master.regionManager.offlineMetaRegion(info.getStartKey());
-              }
-              if (!master.regionManager.isMarkedToClose(
-                  serverName, info.getRegionName())) {
-                master.regionManager.setUnassigned(info);
+              synchronized (master.regionManager) {
+                if (info.isRootRegion()) {
+                  master.regionManager.reassignRootRegion();
+                } else if (info.isMetaTable()) {
+                  master.regionManager.offlineMetaRegion(info.getStartKey());
+                }
+                if (!master.regionManager.isOfflined(info.getRegionName())) {
+                  master.regionManager.setUnassigned(info, true);
+                } else {
+                  master.regionManager.removeRegion(info);
+                }
               }
             }
           }
@@ -334,8 +337,6 @@
     HRegionInfo[] mostLoadedRegions, HMsg incomingMsgs[])
   throws IOException { 
     ArrayList<HMsg> returnMsgs = new ArrayList<HMsg>();
-    Map<byte [], HRegionInfo> regionsToKill = 
-      master.regionManager.removeMarkedToClose(serverName);
     if (serverInfo.getServerAddress() == null) {
       throw new NullPointerException("Server address cannot be null; " +
         "hbase-958 debugging");
@@ -346,7 +347,6 @@
       LOG.info("Received " + incomingMsgs[i] + " from " + serverName);
       switch (incomingMsgs[i].getType()) {
         case MSG_REPORT_PROCESS_OPEN:
-          master.regionManager.updateAssignmentDeadline(region);
           break;
         
         case MSG_REPORT_OPEN:
@@ -354,7 +354,7 @@
           break;
 
         case MSG_REPORT_CLOSE:
-          processRegionClose(serverInfo, region);
+          processRegionClose(region);
           break;
 
         case MSG_REPORT_SPLIT:
@@ -369,22 +369,21 @@
       }
     }
 
-    // Tell the region server to close regions that we have marked for closing.
-    if (regionsToKill != null) {
-      for (HRegionInfo i: regionsToKill.values()) {
+    synchronized (master.regionManager) {
+      // Tell the region server to close regions that we have marked for closing.
+      for (HRegionInfo i: master.regionManager.getMarkedToClose(serverName)) {
         returnMsgs.add(new HMsg(HMsg.Type.MSG_REGION_CLOSE, i));
         // Transition the region from toClose to closing state
-        master.regionManager.setClosing(i.getRegionName());
+        master.regionManager.setClosed(i.getRegionName());
       }
-    }
-
-    // Figure out what the RegionServer ought to do, and write back.
-    master.regionManager.assignRegions(serverInfo, serverName, 
-      mostLoadedRegions, returnMsgs);
 
-    // Send any pending table actions.
-    master.regionManager.applyActions(serverInfo, returnMsgs);
+      // Figure out what the RegionServer ought to do, and write back.
+      master.regionManager.assignRegions(serverInfo, serverName, 
+          mostLoadedRegions, returnMsgs);
 
+      // Send any pending table actions.
+      master.regionManager.applyActions(serverInfo, returnMsgs);
+    }
     return returnMsgs.toArray(new HMsg[returnMsgs.size()]);
   }
   
@@ -401,21 +400,23 @@
   private void processSplitRegion(String serverName, HServerInfo serverInfo, 
     HRegionInfo region, HMsg splitA, HMsg splitB, ArrayList<HMsg> returnMsgs) {
 
-    // Cancel any actions pending for the affected region.
-    // This prevents the master from sending a SPLIT message if the table
-    // has already split by the region server. 
-    master.regionManager.endActions(region.getRegionName());
-
-    HRegionInfo newRegionA = splitA.getRegionInfo();
-    master.regionManager.setUnassigned(newRegionA);
-
-    HRegionInfo newRegionB = splitB.getRegionInfo();
-    master.regionManager.setUnassigned(newRegionB);
-
-    if (region.isMetaTable()) {
-      // A meta region has split.
-      master.regionManager.offlineMetaRegion(region.getStartKey());
-      master.regionManager.incrementNumMetaRegions();
+    synchronized (master.regionManager) {
+      // Cancel any actions pending for the affected region.
+      // This prevents the master from sending a SPLIT message if the table
+      // has already split by the region server. 
+      master.regionManager.endActions(region.getRegionName());
+
+      HRegionInfo newRegionA = splitA.getRegionInfo();
+      master.regionManager.setUnassigned(newRegionA, false);
+
+      HRegionInfo newRegionB = splitB.getRegionInfo();
+      master.regionManager.setUnassigned(newRegionB, false);
+
+      if (region.isMetaTable()) {
+        // A meta region has split.
+        master.regionManager.offlineMetaRegion(region.getStartKey());
+        master.regionManager.incrementNumMetaRegions();
+      }
     }
   }
 
@@ -424,119 +425,105 @@
     HRegionInfo region, ArrayList<HMsg> returnMsgs) 
   throws IOException {
     boolean duplicateAssignment = false;
-    if (!master.regionManager.isUnassigned(region)) {
-      if (region.isRootRegion()) {
-        // Root region
-        HServerAddress rootServer = master.getRootRegionLocation();
-        if (rootServer != null) {
-          if (rootServer.toString().compareTo(serverName) == 0) {
-            // A duplicate open report from the correct server
+    synchronized (master.regionManager) {
+      if (!master.regionManager.isUnassigned(region) &&
+          !master.regionManager.isAssigned(region.getRegionName())) {
+        if (region.isRootRegion()) {
+          // Root region
+          HServerAddress rootServer = master.getRootRegionLocation();
+          if (rootServer != null) {
+            if (rootServer.toString().compareTo(serverName) == 0) {
+              // A duplicate open report from the correct server
+              return;
+            }
+            // We received an open report on the root region, but it is
+            // assigned to a different server
+            duplicateAssignment = true;
+          }
+        } else {
+          // Not root region. If it is not a pending region, then we are
+          // going to treat it as a duplicate assignment, although we can't 
+          // tell for certain that's the case.
+          if (master.regionManager.isPending(region.getRegionName())) {
+            // A duplicate report from the correct server
             return;
           }
-          // We received an open report on the root region, but it is
-          // assigned to a different server
           duplicateAssignment = true;
         }
-      } else {
-        // Not root region. If it is not a pending region, then we are
-        // going to treat it as a duplicate assignment, although we can't 
-        // tell for certain that's the case.
-        if (master.regionManager.isPending(region.getRegionName())) {
-          // A duplicate report from the correct server
-          return;
-        }
-        duplicateAssignment = true;
       }
-    }
     
-    if (duplicateAssignment) {
-      if (LOG.isDebugEnabled()) {
-        LOG.debug("region server " + serverInfo.getServerAddress().toString()
-          + " should not have opened region " + region.getRegionName());
-      }
-
-      // This Region should not have been opened.
-      // Ask the server to shut it down, but don't report it as closed.  
-      // Otherwise the HMaster will think the Region was closed on purpose, 
-      // and then try to reopen it elsewhere; that's not what we want.
-      returnMsgs.add(new HMsg(HMsg.Type.MSG_REGION_CLOSE_WITHOUT_REPORT,
-        region, "Duplicate assignment".getBytes()));
-    } else {
-      // it was assigned, and it's not a duplicate assignment, so take it out 
-      // of the unassigned list.
-      master.regionManager.noLongerUnassigned(region);
-      if (region.isRootRegion()) {
-        // Store the Root Region location (in memory)
-        HServerAddress rootServer = serverInfo.getServerAddress();
-        master.connection.setRootRegionLocation(
-            new HRegionLocation(region, rootServer));
-        master.regionManager.setRootRegionLocation(rootServer);
-      } else {
-        // Note that the table has been assigned and is waiting for the
-        // meta table to be updated.
-        master.regionManager.setPending(region.getRegionName());
-        // Queue up an update to note the region location.
-        try {
-          master.toDoQueue.put(
-            new ProcessRegionOpen(master, serverInfo, region));
-        } catch (InterruptedException e) {
-          throw new RuntimeException(
-            "Putting into toDoQueue was interrupted.", e);
+      if (duplicateAssignment) {
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("region server " + serverInfo.getServerAddress().toString()
+              + " should not have opened region " + region.getRegionName());
         }
-      } 
+
+        // This Region should not have been opened.
+        // Ask the server to shut it down, but don't report it as closed.  
+        // Otherwise the HMaster will think the Region was closed on purpose, 
+        // and then try to reopen it elsewhere; that's not what we want.
+        returnMsgs.add(new HMsg(HMsg.Type.MSG_REGION_CLOSE_WITHOUT_REPORT,
+            region, "Duplicate assignment".getBytes()));
+      } else {
+        if (region.isRootRegion()) {
+          // it was assigned, and it's not a duplicate assignment, so take it out 
+          // of the unassigned list.
+          master.regionManager.removeRegion(region);
+
+          // Store the Root Region location (in memory)
+          HServerAddress rootServer = serverInfo.getServerAddress();
+          master.connection.setRootRegionLocation(
+              new HRegionLocation(region, rootServer));
+          master.regionManager.setRootRegionLocation(rootServer);
+        } else {
+          // Note that the table has been assigned and is waiting for the
+          // meta table to be updated.
+          master.regionManager.setPending(region.getRegionName());
+          // Queue up an update to note the region location.
+          try {
+            master.toDoQueue.put(
+                new ProcessRegionOpen(master, serverInfo, region));
+          } catch (InterruptedException e) {
+            throw new RuntimeException(
+                "Putting into toDoQueue was interrupted.", e);
+          }
+        } 
+      }
     }
   }
   
-  private void processRegionClose(
-      @SuppressWarnings("unused") HServerInfo serverInfo, HRegionInfo region) {
-    if (region.isRootRegion()) {
-      // Root region
-      if (region.isOffline()) {
-        // Can't proceed without root region. Shutdown.
-        LOG.fatal("root region is marked offline");
-        master.shutdown();
-      }
-      master.connection.setRootRegionLocation(null);
-      master.regionManager.reassignRootRegion();
-
-    } else {
-      boolean reassignRegion = !region.isOffline();
-      boolean offlineRegion = false;
-
-      // either this region is being closed because it was marked to close, or
-      // the region server is going down peacefully. in either case, we should
-      // at least try to remove it from the closing list. 
-      master.regionManager.noLongerClosing(region.getRegionName());
-
-      // if the region is marked to be offlined, we don't want to reassign it.
-      if (master.regionManager.isMarkedForOffline(region.getRegionName())) {
-        reassignRegion = false;
-        offlineRegion = true;
-      }
+  private void processRegionClose(HRegionInfo region) {
+    synchronized (master.regionManager) {
+      if (region.isRootRegion()) {
+        // Root region
+        master.connection.unsetRootRegionLocation();
+        master.regionManager.unsetRootRegion();
+        if (region.isOffline()) {
+          // Can't proceed without root region. Shutdown.
+          LOG.fatal("root region is marked offline");
+          master.shutdown();
+          return;
+        }
 
-      if (region.isMetaTable()) {
+      } else if (region.isMetaTable()) {
         // Region is part of the meta table. Remove it from onlineMetaRegions
         master.regionManager.offlineMetaRegion(region.getStartKey());
       }
 
-      // if the region is already on the unassigned list, let's remove it. this
-      // is safe because if it's going to be reassigned, it'll get added again
-      // shortly. if it's not going to get reassigned, then we need to make 
-      // sure it's not on the unassigned list, because that would contend with
-      // the ProcessRegionClose going on asynchronously.
-      master.regionManager.noLongerUnassigned(region);
-
-      // NOTE: we cannot put the region into unassignedRegions as that
-      //       changes the ordering of the messages we've received. In
-      //       this case, a close could be processed before an open
-      //       resulting in the master not agreeing on the region's
-      //       state.
+      boolean offlineRegion =
+        master.regionManager.isOfflined(region.getRegionName());
+      boolean reassignRegion = !region.isOffline() && !offlineRegion;
+
+      // NOTE: If the region was just being closed and not offlined, we cannot
+      //       mark the region unassignedRegions as that changes the ordering of
+      //       the messages we've received. In this case, a close could be
+      //       processed before an open resulting in the master not agreeing on
+      //       the region's state.
       try {
-        master.toDoQueue.put(new ProcessRegionClose(master, region, 
-            offlineRegion, reassignRegion));
+        master.toDoQueue.put(new ProcessRegionClose(master, region, offlineRegion,
+            reassignRegion));
       } catch (InterruptedException e) {
-        throw new RuntimeException(
-            "Putting into toDoQueue was interrupted.", e);
+        throw new RuntimeException("Putting into toDoQueue was interrupted.", e);
       }
     }
   }
@@ -545,9 +532,9 @@
   private boolean cancelLease(final String serverName) {
     boolean leaseCancelled = false;
     HServerInfo info = serversToServerInfo.remove(serverName);
+    // Only cancel lease and update load information once.
+    // This method can be called a couple of times during shutdown.
     if (info != null) {
-      // Only cancel lease and update load information once.
-      // This method can be called a couple of times during shutdown.
       if (master.getRootRegionLocation() != null &&
         info.getServerAddress().equals(master.getRootRegionLocation())) {
         master.regionManager.reassignRootRegion();