You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ji...@apache.org on 2008/10/16 21:28:24 UTC

svn commit: r705333 - in /hadoop/hbase/trunk: CHANGES.txt src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java

Author: jimk
Date: Thu Oct 16 12:28:17 2008
New Revision: 705333

URL: http://svn.apache.org/viewvc?rev=705333&view=rev
Log:
HBASE-926   If no master, regionservers should hang out rather than fail on connection and shut themselves down

Modified:
    hadoop/hbase/trunk/CHANGES.txt
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java

Modified: hadoop/hbase/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/CHANGES.txt?rev=705333&r1=705332&r2=705333&view=diff
==============================================================================
--- hadoop/hbase/trunk/CHANGES.txt (original)
+++ hadoop/hbase/trunk/CHANGES.txt Thu Oct 16 12:28:17 2008
@@ -29,6 +29,8 @@
    HBASE-929   Clarify that ttl in HColumnDescriptor is seconds
    HBASE-930   RegionServer stuck: HLog: Could not append. Requesting close of
                log java.io.IOException: Could not get block locations. Aborting...
+   HBASE-926   If no master, regionservers should hang out rather than fail on
+               connection and shut themselves down
 
   IMPROVEMENTS
    HBASE-901   Add a limit to key length, check key and value length on client side

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=705333&r1=705332&r2=705333&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java Thu Oct 16 12:28:17 2008
@@ -294,11 +294,8 @@
         long now = System.currentTimeMillis();
         if (lastMsg != 0 && (now - lastMsg) >= serverLeaseTimeout) {
           // It has been way too long since we last reported to the master.
-          // Commit suicide.
-          LOG.fatal("unable to report to master for " + (now - lastMsg) +
-            " milliseconds - aborting server");
-          abort();
-          break;
+          LOG.warn("unable to report to master for " + (now - lastMsg) +
+            " milliseconds - retrying");
         }
         if ((now - lastMsg) >= msgInterval) {
           HMsg outboundArray[] = null;
@@ -403,12 +400,8 @@
               LOG.warn("Processing message (Retry: " + tries + ")", e);
               tries++;
             } else {
-              LOG.fatal("Exceeded max retries: " + this.numRetries, e);
-              if (!checkFileSystem()) {
-                continue;
-              }
-              // Something seriously wrong. Shutdown.
-              stop();
+              LOG.error("Exceeded max retries: " + this.numRetries, e);
+              checkFileSystem();
             }
           }
         }
@@ -701,17 +694,26 @@
    * Let the master know we're here
    * Run initialization using parameters passed us by the master.
    */
-  private MapWritable reportForDuty(final Sleeper sleeper)
-  throws IOException {
+  private MapWritable reportForDuty(final Sleeper sleeper) {
     if (LOG.isDebugEnabled()) {
       LOG.debug("Telling master at " +
         conf.get(MASTER_ADDRESS) + " that we are up");
     }
-    // Do initial RPC setup.  The final argument indicates that the RPC should retry indefinitely.
-    this.hbaseMaster = (HMasterRegionInterface)HbaseRPC.waitForProxy(
-      HMasterRegionInterface.class, HMasterRegionInterface.versionID,
-      new HServerAddress(conf.get(MASTER_ADDRESS)).getInetSocketAddress(),
-      this.conf, -1);
+    HMasterRegionInterface master = null;
+    while (!stopRequested.get() && master == null) {
+      try {
+        // Do initial RPC setup.  The final argument indicates that the RPC
+        // should retry indefinitely.
+        master = (HMasterRegionInterface)HbaseRPC.waitForProxy(
+            HMasterRegionInterface.class, HMasterRegionInterface.versionID,
+            new HServerAddress(conf.get(MASTER_ADDRESS)).getInetSocketAddress(),
+            this.conf, -1);
+      } catch (IOException e) {
+        LOG.warn("Unable to connect to master. Retrying. Error was:", e);
+        sleeper.sleep();
+      }
+    }
+    this.hbaseMaster = master;
     MapWritable result = null;
     long lastMsg = 0;
     while(!stopRequested.get()) {