You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by jd...@apache.org on 2012/07/20 00:53:24 UTC

svn commit: r1363573 - /hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java

Author: jdcryans
Date: Thu Jul 19 22:53:24 2012
New Revision: 1363573

URL: http://svn.apache.org/viewvc?rev=1363573&view=rev
Log:
HBASE-6325  [replication] Race in ReplicationSourceManager.init can initiate a failover even if the node is alive

Modified:
    hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java

Modified: hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java?rev=1363573&r1=1363572&r2=1363573&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java (original)
+++ hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java Thu Jul 19 22:53:24 2012
@@ -81,7 +81,7 @@ public class ReplicationSourceManager {
   // The path to the latest log we saw, for new coming sources
   private Path latestPath;
   // List of all the other region servers in this cluster
-  private final List<String> otherRegionServers;
+  private final List<String> otherRegionServers = new ArrayList<String>();
   // Path to the hlogs directories
   private final Path logDir;
   // Path to the hlog archive
@@ -122,12 +122,9 @@ public class ReplicationSourceManager {
     this.sleepBeforeFailover = conf.getLong("replication.sleep.before.failover", 2000);
     this.zkHelper.registerRegionServerListener(
         new OtherRegionServerWatcher(this.zkHelper.getZookeeperWatcher()));
-    List<String> otherRSs =
-        this.zkHelper.getRegisteredRegionServers();
     this.zkHelper.registerRegionServerListener(
         new PeersWatcher(this.zkHelper.getZookeeperWatcher()));
     this.zkHelper.listPeersIdsAndWatch();
-    this.otherRegionServers = otherRSs == null ? new ArrayList<String>() : otherRSs;
     // It's preferable to failover 1 RS at a time, but with good zk servers
     // more could be processed at the same time.
     int nbWorkers = conf.getInt("replication.executor.workers", 1);
@@ -182,6 +179,7 @@ public class ReplicationSourceManager {
       return;
     }
     synchronized (otherRegionServers) {
+      refreshOtherRegionServersList();
       LOG.info("Current list of replicators: " + currentReplicators
           + " other RSs: " + otherRegionServers);
     }
@@ -399,6 +397,27 @@ public class ReplicationSourceManager {
   }
 
   /**
+   * Reads the list of region servers from ZK and atomically clears our
+   * local view of it and replaces it with the updated list.
+   * 
+   * @return true if the local list of the other region servers was updated
+   * with the ZK data (even if it was empty),
+   * false if the data was missing in ZK
+   */
+  private boolean refreshOtherRegionServersList() {
+    List<String> newRsList = zkHelper.getRegisteredRegionServers();
+    if (newRsList == null) {
+      return false;
+    } else {
+      synchronized (otherRegionServers) {
+        otherRegionServers.clear();
+        otherRegionServers.addAll(newRsList);
+      }
+    }
+    return true;
+  }
+
+  /**
    * Watcher used to be notified of the other region server's death
    * in the local cluster. It initiates the process to transfer the queues
    * if it is able to grab the lock.
@@ -417,7 +436,7 @@ public class ReplicationSourceManager {
      * @param path full path of the new node
      */
     public void nodeCreated(String path) {
-      refreshRegionServersList(path);
+      refreshListIfRightPath(path);
     }
 
     /**
@@ -428,7 +447,7 @@ public class ReplicationSourceManager {
       if (stopper.isStopped()) {
         return;
       }
-      boolean cont = refreshRegionServersList(path);
+      boolean cont = refreshListIfRightPath(path);
       if (!cont) {
         return;
       }
@@ -444,23 +463,14 @@ public class ReplicationSourceManager {
       if (stopper.isStopped()) {
         return;
       }
-      refreshRegionServersList(path);
+      refreshListIfRightPath(path);
     }
 
-    private boolean refreshRegionServersList(String path) {
+    private boolean refreshListIfRightPath(String path) {
       if (!path.startsWith(zkHelper.getZookeeperWatcher().rsZNode)) {
         return false;
       }
-      List<String> newRsList = (zkHelper.getRegisteredRegionServers());
-      if (newRsList == null) {
-        return false;
-      } else {
-        synchronized (otherRegionServers) {
-          otherRegionServers.clear();
-          otherRegionServers.addAll(newRsList);
-        }
-      }
-      return true;
+      return refreshOtherRegionServersList();
     }
   }