You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by jd...@apache.org on 2012/07/20 00:23:27 UTC
svn commit: r1363571 - in /hbase/branches/0.92: CHANGES.txt
src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java
src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java
Author: jdcryans
Date: Thu Jul 19 22:23:26 2012
New Revision: 1363571
URL: http://svn.apache.org/viewvc?rev=1363571&view=rev
Log:
HBASE-6319 ReplicationSource can call terminate on itself and deadlock
HBASE-6325 [replication] Race in ReplicationSourceManager.init can initiate a failover even if the node is alive
Modified:
hbase/branches/0.92/CHANGES.txt
hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java
hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java
Modified: hbase/branches/0.92/CHANGES.txt
URL: http://svn.apache.org/viewvc/hbase/branches/0.92/CHANGES.txt?rev=1363571&r1=1363570&r2=1363571&view=diff
==============================================================================
--- hbase/branches/0.92/CHANGES.txt (original)
+++ hbase/branches/0.92/CHANGES.txt Thu Jul 19 22:23:26 2012
@@ -92,6 +92,9 @@ Release 0.92.2 - Unreleased
HBASE-6357 Failed distributed log splitting stuck on master web UI
HBASE-6382 Upgrade Jersey to 1.8 to match Hadoop 1 and 2
HBASE-6392 UnknownRegionException blocks hbck from sideline big overlap regions
+ HBASE-6319 ReplicationSource can call terminate on itself and deadlock
+ HBASE-6325 [replication] Race in ReplicationSourceManager.init can initiate a failover even if the node is alive
+
IMPROVEMENTS
HBASE-5592 Make it easier to get a table from shell (Ben West)
Modified: hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java?rev=1363571&r1=1363570&r2=1363571&view=diff
==============================================================================
--- hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java (original)
+++ hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java Thu Jul 19 22:23:26 2012
@@ -705,7 +705,10 @@ public class ReplicationSource extends T
+ " because an error occurred: " + reason, cause);
}
this.running = false;
- Threads.shutdown(this, this.sleepForRetries);
+ // Only wait for the thread to die if it's not us
+ if (!Thread.currentThread().equals(this)) {
+ Threads.shutdown(this, this.sleepForRetries);
+ }
}
/**
Modified: hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java?rev=1363571&r1=1363570&r2=1363571&view=diff
==============================================================================
--- hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java (original)
+++ hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java Thu Jul 19 22:23:26 2012
@@ -78,7 +78,7 @@ public class ReplicationSourceManager {
// The path to the latest log we saw, for new coming sources
private Path latestPath;
// List of all the other region servers in this cluster
- private final List<String> otherRegionServers;
+ private final List<String> otherRegionServers = new ArrayList<String>();
// Path to the hlogs directories
private final Path logDir;
// Path to the hlog archive
@@ -119,12 +119,9 @@ public class ReplicationSourceManager {
this.sleepBeforeFailover = conf.getLong("replication.sleep.before.failover", 2000);
this.zkHelper.registerRegionServerListener(
new OtherRegionServerWatcher(this.zkHelper.getZookeeperWatcher()));
- List<String> otherRSs =
- this.zkHelper.getRegisteredRegionServers();
this.zkHelper.registerRegionServerListener(
new PeersWatcher(this.zkHelper.getZookeeperWatcher()));
this.zkHelper.listPeersIdsAndWatch();
- this.otherRegionServers = otherRSs == null ? new ArrayList<String>() : otherRSs;
// It's preferable to failover 1 RS at a time, but with good zk servers
// more could be processed at the same time.
int nbWorkers = conf.getInt("replication.executor.workers", 1);
@@ -179,6 +176,7 @@ public class ReplicationSourceManager {
return;
}
synchronized (otherRegionServers) {
+ refreshOtherRegionServersList();
LOG.info("Current list of replicators: " + currentReplicators
+ " other RSs: " + otherRegionServers);
}
@@ -398,6 +396,27 @@ public class ReplicationSourceManager {
}
/**
+ * Reads the list of region servers from ZK and atomically clears our
+ * local view of it and replaces it with the updated list.
+ *
+ * @return true if the local list of the other region servers was updated
+ * with the ZK data (even if it was empty),
+ * false if the data was missing in ZK
+ */
+ private boolean refreshOtherRegionServersList() {
+ List<String> newRsList = zkHelper.getRegisteredRegionServers();
+ if (newRsList == null) {
+ return false;
+ } else {
+ synchronized (otherRegionServers) {
+ otherRegionServers.clear();
+ otherRegionServers.addAll(newRsList);
+ }
+ }
+ return true;
+ }
+
+ /**
* Watcher used to be notified of the other region server's death
* in the local cluster. It initiates the process to transfer the queues
* if it is able to grab the lock.
@@ -416,7 +435,7 @@ public class ReplicationSourceManager {
* @param path full path of the new node
*/
public void nodeCreated(String path) {
- refreshRegionServersList(path);
+ refreshListIfRightPath(path);
}
/**
@@ -427,7 +446,7 @@ public class ReplicationSourceManager {
if (stopper.isStopped()) {
return;
}
- boolean cont = refreshRegionServersList(path);
+ boolean cont = refreshListIfRightPath(path);
if (!cont) {
return;
}
@@ -443,23 +462,14 @@ public class ReplicationSourceManager {
if (stopper.isStopped()) {
return;
}
- refreshRegionServersList(path);
+ refreshListIfRightPath(path);
}
- private boolean refreshRegionServersList(String path) {
+ private boolean refreshListIfRightPath(String path) {
if (!path.startsWith(zkHelper.getZookeeperWatcher().rsZNode)) {
return false;
}
- List<String> newRsList = (zkHelper.getRegisteredRegionServers());
- if (newRsList == null) {
- return false;
- } else {
- synchronized (otherRegionServers) {
- otherRegionServers.clear();
- otherRegionServers.addAll(newRsList);
- }
- }
- return true;
+ return refreshOtherRegionServersList();
}
}