You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2018/10/16 05:27:30 UTC
hbase git commit: HBASE-21266 Not running balancer because processing
dead regionservers, but empty dead rs list
Repository: hbase
Updated Branches:
refs/heads/branch-2.0 df40de044 -> a96cf8ee2
HBASE-21266 Not running balancer because processing dead regionservers, but empty dead rs list
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/a96cf8ee
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/a96cf8ee
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/a96cf8ee
Branch: refs/heads/branch-2.0
Commit: a96cf8ee25687a71db1b05a149ecf067e4be63e3
Parents: df40de0
Author: Andrew Purtell <ap...@apache.org>
Authored: Thu Oct 11 15:28:36 2018 -0700
Committer: Michael Stack <st...@apache.org>
Committed: Mon Oct 15 22:27:20 2018 -0700
----------------------------------------------------------------------
.../apache/hadoop/hbase/master/DeadServer.java | 81 +++++++++++++++-----
.../hadoop/hbase/master/TestDeadServer.java | 4 +-
.../TestEndToEndSplitTransaction.java | 6 +-
3 files changed, 68 insertions(+), 23 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hbase/blob/a96cf8ee/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
index 116d24e..4183201 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
@@ -25,6 +25,8 @@ import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.Pair;
+import com.google.common.base.Preconditions;
+
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
@@ -36,6 +38,7 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
+
/**
* Class to hold dead servers list and utility querying dead server list.
* On znode expiration, servers are added here.
@@ -54,14 +57,9 @@ public class DeadServer {
private final Map<ServerName, Long> deadServers = new HashMap<>();
/**
- * Number of dead servers currently being processed
- */
- private int numProcessing = 0;
-
- /**
- * Whether a dead server is being processed currently.
+ * Set of dead servers currently being processed
*/
- private volatile boolean processing = false;
+ private final Set<ServerName> processingServers = new HashSet<ServerName>();
/**
* A dead server that comes back alive has a different start code. The new start code should be
@@ -76,7 +74,13 @@ public class DeadServer {
while (it.hasNext()) {
ServerName sn = it.next();
if (ServerName.isSameAddress(sn, newServerName)) {
+ // remove from deadServers
it.remove();
+ // remove from processingServers
+ boolean removed = processingServers.remove(sn);
+ if (removed) {
+ LOG.debug("Removed " + sn + " ; numProcessing=" + processingServers.size());
+ }
return true;
}
}
@@ -93,13 +97,23 @@ public class DeadServer {
}
/**
+ * @param serverName server name.
+ * @return true if this server is on the processing servers list false otherwise
+ */
+ public synchronized boolean isProcessingServer(final ServerName serverName) {
+ return processingServers.contains(serverName);
+ }
+
+ /**
* Checks if there are currently any dead servers being processed by the
* master. Returns true if at least one region server is currently being
* processed as dead.
*
* @return true if any RS are being processed as dead
*/
- public synchronized boolean areDeadServersInProgress() { return processing; }
+ public synchronized boolean areDeadServersInProgress() {
+ return !processingServers.isEmpty();
+ }
public synchronized Set<ServerName> copyServerNames() {
Set<ServerName> clone = new HashSet<>(deadServers.size());
@@ -112,10 +126,13 @@ public class DeadServer {
* @param sn the server name
*/
public synchronized void add(ServerName sn) {
- processing = true;
if (!deadServers.containsKey(sn)){
deadServers.put(sn, EnvironmentEdgeManager.currentTime());
}
+ boolean added = processingServers.add(sn);
+ if (LOG.isDebugEnabled() && added) {
+ LOG.debug("Added " + sn + "; numProcessing=" + processingServers.size());
+ }
}
/**
@@ -123,18 +140,27 @@ public class DeadServer {
* @param sn ServerName for the dead server.
*/
public synchronized void notifyServer(ServerName sn) {
- if (LOG.isTraceEnabled()) { LOG.trace("Started processing " + sn); }
- processing = true;
- numProcessing++;
+ boolean added = processingServers.add(sn);
+ if (LOG.isDebugEnabled()) {
+ if (added) {
+ LOG.debug("Added " + sn + "; numProcessing=" + processingServers.size());
+ }
+ LOG.debug("Started processing " + sn + "; numProcessing=" + processingServers.size());
+ }
}
+ /**
+ * Complete processing for this dead server.
+ * @param sn ServerName for the dead server.
+ */
public synchronized void finish(ServerName sn) {
- numProcessing--;
- if (LOG.isTraceEnabled()) LOG.trace("Finished " + sn + "; numProcessing=" + numProcessing);
-
- assert numProcessing >= 0: "Number of dead servers in processing should always be non-negative";
-
- if (numProcessing == 0) { processing = false; }
+ boolean removed = processingServers.remove(sn);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Finished processing " + sn + "; numProcessing=" + processingServers.size());
+ if (removed) {
+ LOG.debug("Removed " + sn + " ; numProcessing=" + processingServers.size());
+ }
+ }
}
public synchronized int size() {
@@ -150,19 +176,33 @@ public class DeadServer {
while (it.hasNext()) {
ServerName sn = it.next();
if (ServerName.isSameAddress(sn, newServerName)) {
+ // remove from deadServers
it.remove();
+ // remove from processingServers
+ boolean removed = processingServers.remove(sn);
+ if (removed) {
+ LOG.debug("Removed " + sn + " ; numProcessing=" + processingServers.size());
+ }
}
}
}
@Override
public synchronized String toString() {
+ // Display unified set of servers from both maps
+ Set<ServerName> servers = new HashSet<ServerName>();
+ servers.addAll(deadServers.keySet());
+ servers.addAll(processingServers);
StringBuilder sb = new StringBuilder();
- for (ServerName sn : deadServers.keySet()) {
+ for (ServerName sn : servers) {
if (sb.length() > 0) {
sb.append(", ");
}
sb.append(sn.toString());
+ // Star entries that are being processed
+ if (processingServers.contains(sn)) {
+ sb.append("*");
+ }
}
return sb.toString();
}
@@ -211,6 +251,9 @@ public class DeadServer {
*/
public synchronized boolean removeDeadServer(final ServerName deadServerName) {
+ Preconditions.checkState(!processingServers.contains(deadServerName),
+ "Asked to remove server still in processingServers set " + deadServerName +
+ " (numProcessing=" + processingServers.size() + ")");
if (deadServers.remove(deadServerName) == null) {
return false;
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/a96cf8ee/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java
index 4e852f8..73ff789 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java
@@ -123,7 +123,6 @@ public class TestDeadServer {
DeadServer d = new DeadServer();
-
d.add(hostname123);
mee.incValue(1);
d.add(hostname1234);
@@ -164,14 +163,17 @@ public class TestDeadServer {
d.add(hostname1234);
Assert.assertEquals(2, d.size());
+ d.finish(hostname123);
d.removeDeadServer(hostname123);
Assert.assertEquals(1, d.size());
+ d.finish(hostname1234);
d.removeDeadServer(hostname1234);
Assert.assertTrue(d.isEmpty());
d.add(hostname1234);
Assert.assertFalse(d.removeDeadServer(hostname123_2));
Assert.assertEquals(1, d.size());
+ d.finish(hostname1234);
Assert.assertTrue(d.removeDeadServer(hostname1234));
Assert.assertTrue(d.isEmpty());
}
http://git-wip-us.apache.org/repos/asf/hbase/blob/a96cf8ee/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java
index 85e9d30..1418d6e 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java
@@ -296,7 +296,7 @@ public class TestEndToEndSplitTransaction {
Throwable ex;
RegionChecker(Configuration conf, Stoppable stopper, TableName tableName) throws IOException {
- super("RegionChecker", stopper, 10);
+ super("RegionChecker", stopper, 100);
this.conf = conf;
this.tableName = tableName;
@@ -509,7 +509,7 @@ public class TestEndToEndSplitTransaction {
log("found region in META: " + hri.getRegionNameAsString());
break;
}
- Threads.sleep(10);
+ Threads.sleep(100);
}
}
@@ -532,7 +532,7 @@ public class TestEndToEndSplitTransaction {
} catch (IOException ex) {
// wait some more
}
- Threads.sleep(10);
+ Threads.sleep(100);
}
}
}