You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by te...@apache.org on 2011/12/31 01:18:15 UTC

svn commit: r1225996 - in /hbase/branches/0.92: CHANGES.txt src/main/java/org/apache/hadoop/hbase/master/HMaster.java src/test/java/org/apache/hadoop/hbase/master/TestMasterZKSessionRecovery.java

Author: tedyu
Date: Sat Dec 31 00:18:14 2011
New Revision: 1225996

URL: http://svn.apache.org/viewvc?rev=1225996&view=rev
Log:
HBASE-5099 revert due to continuous 0.92 build failures

Removed:
    hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/master/TestMasterZKSessionRecovery.java
Modified:
    hbase/branches/0.92/CHANGES.txt
    hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/HMaster.java

Modified: hbase/branches/0.92/CHANGES.txt
URL: http://svn.apache.org/viewvc/hbase/branches/0.92/CHANGES.txt?rev=1225996&r1=1225995&r2=1225996&view=diff
==============================================================================
--- hbase/branches/0.92/CHANGES.txt (original)
+++ hbase/branches/0.92/CHANGES.txt Sat Dec 31 00:18:14 2011
@@ -501,8 +501,6 @@ Release 0.92.0 - Unreleased
    HBASE-5077  SplitLogWorker fails to let go of a task, kills the RS
    HBASE-5096  Replication does not handle deletes correctly. (Lars H)
    HBASE-5103  Fix improper master znode deserialization (Jonathan Hsieh)
-   HBASE-5099  ZK event thread waiting for root region assignment may block server
-               shutdown handler for the region sever the root region was on (Jimmy)
 
   TESTS
    HBASE-4492  TestRollingRestart fails intermittently

Modified: hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/HMaster.java?rev=1225996&r1=1225995&r2=1225996&view=diff
==============================================================================
--- hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/HMaster.java (original)
+++ hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/HMaster.java Sat Dec 31 00:18:14 2011
@@ -27,13 +27,8 @@ import java.net.InetSocketAddress;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
-import java.util.concurrent.Callable;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
-import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicReference;
+import java.util.Set;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -61,6 +56,7 @@ import org.apache.hadoop.hbase.client.Re
 import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
 import org.apache.hadoop.hbase.executor.ExecutorService;
 import org.apache.hadoop.hbase.executor.ExecutorService.ExecutorType;
+import org.apache.hadoop.hbase.io.hfile.CacheConfig;
 import org.apache.hadoop.hbase.ipc.HBaseRPC;
 import org.apache.hadoop.hbase.ipc.HBaseServer;
 import org.apache.hadoop.hbase.ipc.HMasterInterface;
@@ -76,6 +72,7 @@ import org.apache.hadoop.hbase.master.ha
 import org.apache.hadoop.hbase.master.handler.TableDeleteFamilyHandler;
 import org.apache.hadoop.hbase.master.handler.TableModifyFamilyHandler;
 import org.apache.hadoop.hbase.master.metrics.MasterMetrics;
+import org.apache.hadoop.hbase.master.RegionPlan;
 import org.apache.hadoop.hbase.monitoring.MemoryBoundedLogMessageBuffer;
 import org.apache.hadoop.hbase.monitoring.MonitoredTask;
 import org.apache.hadoop.hbase.monitoring.TaskMonitor;
@@ -91,8 +88,8 @@ import org.apache.hadoop.hbase.util.Thre
 import org.apache.hadoop.hbase.util.VersionInfo;
 import org.apache.hadoop.hbase.zookeeper.ClusterId;
 import org.apache.hadoop.hbase.zookeeper.ClusterStatusTracker;
-import org.apache.hadoop.hbase.zookeeper.DrainingServerTracker;
 import org.apache.hadoop.hbase.zookeeper.RegionServerTracker;
+import org.apache.hadoop.hbase.zookeeper.DrainingServerTracker;
 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
 import org.apache.hadoop.io.MapWritable;
 import org.apache.hadoop.io.Text;
@@ -1238,9 +1235,7 @@ implements HMasterInterface, HMasterRegi
   }
 
   /**
-   * We do the following in a different thread.  If it is not completed
-   * in time, we will time it out and assume it is not easy to recover.
-   *
+   * We do the following.
    * 1. Create a new ZK session. (since our current one is expired)
    * 2. Try to become a primary master again
    * 3. Initialize all ZK based system trackers.
@@ -1251,53 +1246,29 @@ implements HMasterInterface, HMasterRegi
    * @return True if we could successfully recover from ZK session expiry.
    * @throws InterruptedException
    * @throws IOException
-   * @throws KeeperException
-   * @throws ExecutionException
    */
   private boolean tryRecoveringExpiredZKSession() throws InterruptedException,
-      IOException, KeeperException, ExecutionException {
-
+      IOException, KeeperException {
     this.zooKeeper = new ZooKeeperWatcher(conf, MASTER + ":"
-      + this.serverName.getPort(), this, true);
+        + this.serverName.getPort(), this, true);
 
-    Callable<Boolean> callable = new Callable<Boolean> () {
-      public Boolean call() throws InterruptedException,
-          IOException, KeeperException {
-        MonitoredTask status =
-          TaskMonitor.get().createStatus("Recovering expired ZK session");
-        try {
-          if (!becomeActiveMaster(status)) {
-            return Boolean.FALSE;
-          }
-          initializeZKBasedSystemTrackers();
-          // Update in-memory structures to reflect our earlier Root/Meta assignment.
-          assignRootAndMeta(status);
-          // process RIT if any
-          // TODO: Why does this not call AssignmentManager.joinCluster?  Otherwise
-          // we are not processing dead servers if any.
-          assignmentManager.processDeadServersAndRegionsInTransition();
-          return Boolean.TRUE;
-        } finally {
-          status.cleanup();
-        }
-      }
-    };
-
-    long timeout =
-      conf.getLong("hbase.master.zksession.recover.timeout", 300000);
-    java.util.concurrent.ExecutorService executor =
-      Executors.newSingleThreadExecutor();
-    Future<Boolean> result = executor.submit(callable);
-    executor.shutdown();
-    if (executor.awaitTermination(timeout, TimeUnit.MILLISECONDS)
-        && result.isDone()) {
-      Boolean recovered = result.get();
-      if (recovered != null) {
-        return recovered.booleanValue();
+    MonitoredTask status = 
+      TaskMonitor.get().createStatus("Recovering expired ZK session");
+    try {
+      if (!becomeActiveMaster(status)) {
+        return false;
       }
+      initializeZKBasedSystemTrackers();
+      // Update in-memory structures to reflect our earlier Root/Meta assignment.
+      assignRootAndMeta(status);
+      // process RIT if any
+      // TODO: Why does this not call AssignmentManager.joinCluster?  Otherwise
+      // we are not processing dead servers if any.
+      this.assignmentManager.processDeadServersAndRegionsInTransition();
+      return true;
+    } finally {
+      status.cleanup();
     }
-    executor.shutdownNow();
-    return false;
   }
 
   /**