You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by jd...@apache.org on 2012/07/20 00:53:24 UTC
svn commit: r1363573 -
/hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java
Author: jdcryans
Date: Thu Jul 19 22:53:24 2012
New Revision: 1363573
URL: http://svn.apache.org/viewvc?rev=1363573&view=rev
Log:
HBASE-6325 [replication] Race in ReplicationSourceManager.init can initiate a failover even if the node is alive
Modified:
hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java
Modified: hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java?rev=1363573&r1=1363572&r2=1363573&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java (original)
+++ hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java Thu Jul 19 22:53:24 2012
@@ -81,7 +81,7 @@ public class ReplicationSourceManager {
// The path to the latest log we saw, for new coming sources
private Path latestPath;
// List of all the other region servers in this cluster
- private final List<String> otherRegionServers;
+ private final List<String> otherRegionServers = new ArrayList<String>();
// Path to the hlogs directories
private final Path logDir;
// Path to the hlog archive
@@ -122,12 +122,9 @@ public class ReplicationSourceManager {
this.sleepBeforeFailover = conf.getLong("replication.sleep.before.failover", 2000);
this.zkHelper.registerRegionServerListener(
new OtherRegionServerWatcher(this.zkHelper.getZookeeperWatcher()));
- List<String> otherRSs =
- this.zkHelper.getRegisteredRegionServers();
this.zkHelper.registerRegionServerListener(
new PeersWatcher(this.zkHelper.getZookeeperWatcher()));
this.zkHelper.listPeersIdsAndWatch();
- this.otherRegionServers = otherRSs == null ? new ArrayList<String>() : otherRSs;
// It's preferable to failover 1 RS at a time, but with good zk servers
// more could be processed at the same time.
int nbWorkers = conf.getInt("replication.executor.workers", 1);
@@ -182,6 +179,7 @@ public class ReplicationSourceManager {
return;
}
synchronized (otherRegionServers) {
+ refreshOtherRegionServersList();
LOG.info("Current list of replicators: " + currentReplicators
+ " other RSs: " + otherRegionServers);
}
@@ -399,6 +397,27 @@ public class ReplicationSourceManager {
}
/**
+ * Reads the list of region servers from ZK and atomically clears our
+ * local view of it and replaces it with the updated list.
+ *
+ * @return true if the local list of the other region servers was updated
+ * with the ZK data (even if it was empty),
+ * false if the data was missing in ZK
+ */
+ private boolean refreshOtherRegionServersList() {
+ List<String> newRsList = zkHelper.getRegisteredRegionServers();
+ if (newRsList == null) {
+ return false;
+ } else {
+ synchronized (otherRegionServers) {
+ otherRegionServers.clear();
+ otherRegionServers.addAll(newRsList);
+ }
+ }
+ return true;
+ }
+
+ /**
* Watcher used to be notified of the other region server's death
* in the local cluster. It initiates the process to transfer the queues
* if it is able to grab the lock.
@@ -417,7 +436,7 @@ public class ReplicationSourceManager {
* @param path full path of the new node
*/
public void nodeCreated(String path) {
- refreshRegionServersList(path);
+ refreshListIfRightPath(path);
}
/**
@@ -428,7 +447,7 @@ public class ReplicationSourceManager {
if (stopper.isStopped()) {
return;
}
- boolean cont = refreshRegionServersList(path);
+ boolean cont = refreshListIfRightPath(path);
if (!cont) {
return;
}
@@ -444,23 +463,14 @@ public class ReplicationSourceManager {
if (stopper.isStopped()) {
return;
}
- refreshRegionServersList(path);
+ refreshListIfRightPath(path);
}
- private boolean refreshRegionServersList(String path) {
+ private boolean refreshListIfRightPath(String path) {
if (!path.startsWith(zkHelper.getZookeeperWatcher().rsZNode)) {
return false;
}
- List<String> newRsList = (zkHelper.getRegisteredRegionServers());
- if (newRsList == null) {
- return false;
- } else {
- synchronized (otherRegionServers) {
- otherRegionServers.clear();
- otherRegionServers.addAll(newRsList);
- }
- }
- return true;
+ return refreshOtherRegionServersList();
}
}