You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ap...@apache.org on 2018/10/15 22:24:56 UTC

[1/3] hbase git commit: HBASE-21266 Not running balancer because processing dead regionservers, but empty dead rs list

Repository: hbase
Updated Branches:
  refs/heads/branch-1 ebad3ab8e -> ea9084626
  refs/heads/branch-1.3 0adef4e61 -> 743f9a4ed
  refs/heads/branch-1.4 137423d70 -> c6654b1d8


HBASE-21266 Not running balancer because processing dead regionservers, but empty dead rs list

Signed-off-by: Michael Stack <st...@apache.org>

Conflicts:
	hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
	hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/743f9a4e
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/743f9a4e
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/743f9a4e

Branch: refs/heads/branch-1.3
Commit: 743f9a4ed0d94a34bde78ff801b1f3d9d2229aa2
Parents: 0adef4e
Author: Andrew Purtell <ap...@apache.org>
Authored: Thu Oct 11 15:28:36 2018 -0700
Committer: Andrew Purtell <ap...@apache.org>
Committed: Mon Oct 15 14:50:42 2018 -0700

----------------------------------------------------------------------
 .../apache/hadoop/hbase/master/DeadServer.java  | 92 ++++++++++++++------
 .../hadoop/hbase/master/TestDeadServer.java     |  2 -
 .../TestEndToEndSplitTransaction.java           |  6 +-
 .../TestSplitTransactionOnCluster.java          | 23 +++--
 4 files changed, 84 insertions(+), 39 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/743f9a4e/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
index 81accd2..ea8ca6a 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
@@ -18,12 +18,7 @@
  */
 package org.apache.hadoop.hbase.master;
 
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.ServerName;
-import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
-import org.apache.hadoop.hbase.util.Pair;
+import com.google.common.base.Preconditions;
 
 import java.util.ArrayList;
 import java.util.Collections;
@@ -36,6 +31,13 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
+import org.apache.hadoop.hbase.util.Pair;
+
 /**
  * Class to hold dead servers list and utility querying dead server list.
  * On znode expiration, servers are added here.
@@ -54,14 +56,9 @@ public class DeadServer {
   private final Map<ServerName, Long> deadServers = new HashMap<ServerName, Long>();
 
   /**
-   * Number of dead servers currently being processed
+   * Set of dead servers currently being processed
    */
-  private int numProcessing = 0;
-
-  /**
-   * Whether a dead server is being processed currently.
-   */
-  private boolean processing = false;
+  private final Set<ServerName> processingServers = new HashSet<ServerName>();
 
   /**
    * A dead server that comes back alive has a different start code. The new start code should be
@@ -76,7 +73,13 @@ public class DeadServer {
     while (it.hasNext()) {
       ServerName sn = it.next();
       if (ServerName.isSameHostnameAndPort(sn, newServerName)) {
+        // remove from deadServers
         it.remove();
+        // remove from processingServers
+        boolean removed = processingServers.remove(sn);
+        if (removed) {
+          LOG.debug("Removed " + sn + " ; numProcessing=" + processingServers.size());
+        }
         return true;
       }
     }
@@ -93,13 +96,23 @@ public class DeadServer {
   }
 
   /**
+   * @param serverName server name.
+   * @return true if this server is on the processing servers list false otherwise
+   */
+  public synchronized boolean isProcessingServer(final ServerName serverName) {
+    return processingServers.contains(serverName);
+  }
+
+  /**
    * Checks if there are currently any dead servers being processed by the
    * master.  Returns true if at least one region server is currently being
    * processed as dead.
    *
    * @return true if any RS are being processed as dead
    */
-  public synchronized boolean areDeadServersInProgress() { return processing; }
+  public synchronized boolean areDeadServersInProgress() {
+    return !processingServers.isEmpty();
+  }
 
   public synchronized Set<ServerName> copyServerNames() {
     Set<ServerName> clone = new HashSet<ServerName>(deadServers.size());
@@ -112,10 +125,13 @@ public class DeadServer {
    * @param sn the server name
    */
   public synchronized void add(ServerName sn) {
-    processing = true;
     if (!deadServers.containsKey(sn)){
       deadServers.put(sn, EnvironmentEdgeManager.currentTime());
     }
+    boolean added = processingServers.add(sn);
+    if (LOG.isDebugEnabled() && added) {
+      LOG.debug("Added " + sn + "; numProcessing=" + processingServers.size());
+    }
   }
 
   /**
@@ -123,18 +139,27 @@ public class DeadServer {
    * @param sn ServerName for the dead server.
    */
   public synchronized void notifyServer(ServerName sn) {
-    if (LOG.isDebugEnabled()) { LOG.debug("Started processing " + sn); }
-    processing = true;
-    numProcessing++;
+    boolean added = processingServers.add(sn);
+    if (LOG.isDebugEnabled()) {
+      if (added) {
+        LOG.debug("Added " + sn + "; numProcessing=" + processingServers.size());
+      }
+      LOG.debug("Started processing " + sn + "; numProcessing=" + processingServers.size());
+    }
   }
 
+  /**
+   * Complete processing for this dead server.
+   * @param sn ServerName for the dead server.
+   */
   public synchronized void finish(ServerName sn) {
-    numProcessing--;
-    if (LOG.isDebugEnabled()) LOG.debug("Finished " + sn + "; numProcessing=" + numProcessing);
-
-    assert numProcessing >= 0: "Number of dead servers in processing should always be non-negative";
-
-    if (numProcessing == 0) { processing = false; }
+    boolean removed = processingServers.remove(sn);
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Finished processing " + sn + "; numProcessing=" + processingServers.size());
+      if (removed) {
+        LOG.debug("Removed " + sn + " ; numProcessing=" + processingServers.size());
+      }
+    }
   }
 
   public synchronized int size() {
@@ -150,19 +175,36 @@ public class DeadServer {
     while (it.hasNext()) {
       ServerName sn = it.next();
       if (ServerName.isSameHostnameAndPort(sn, newServerName)) {
+        // remove from deadServers
         it.remove();
+        // remove from processingServers
+        boolean removed = processingServers.remove(sn);
+        if (removed) {
+          LOG.debug("Removed " + sn + " ; numProcessing=" + processingServers.size());
+        }
       }
     }
   }
 
   public synchronized String toString() {
+    // Display unified set of servers from both maps
+    Set<ServerName> servers = new HashSet<ServerName>();
+    servers.addAll(deadServers.keySet());
+    servers.addAll(processingServers);
     StringBuilder sb = new StringBuilder();
-    for (ServerName sn : deadServers.keySet()) {
+    for (ServerName sn : servers) {
       if (sb.length() > 0) {
         sb.append(", ");
       }
       sb.append(sn.toString());
+      // Star entries that are being processed
+      if (processingServers.contains(sn)) {
+        sb.append("*");
+      }
     }
+    sb.append(" (numProcessing=");
+    sb.append(processingServers.size());
+    sb.append(')');
     return sb.toString();
   }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/743f9a4e/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java
index ee8e173..23bdc49 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java
@@ -116,7 +116,6 @@ public class TestDeadServer {
 
     DeadServer d = new DeadServer();
 
-
     d.add(hostname123);
     mee.incValue(1);
     d.add(hostname1234);
@@ -149,6 +148,5 @@ public class TestDeadServer {
     d.cleanPreviousInstance(hostname123_2);
     Assert.assertTrue(d.isEmpty());
   }
-
 }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/743f9a4e/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java
index 1561f1b..78a06ad 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java
@@ -459,7 +459,7 @@ public class TestEndToEndSplitTransaction {
     Throwable ex;
 
     RegionChecker(Configuration conf, Stoppable stopper, TableName tableName) throws IOException {
-      super("RegionChecker", stopper, 10);
+      super("RegionChecker", stopper, 100);
       this.conf = conf;
       this.tableName = tableName;
 
@@ -669,7 +669,7 @@ public class TestEndToEndSplitTransaction {
         log("found region in META: " + hri.getRegionNameAsString());
         break;
       }
-      Threads.sleep(10);
+      Threads.sleep(100);
     }
   }
 
@@ -690,7 +690,7 @@ public class TestEndToEndSplitTransaction {
         } catch (IOException ex) {
           // wait some more
         }
-        Threads.sleep(10);
+        Threads.sleep(100);
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/hbase/blob/743f9a4e/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
index 4d33988..2c17d0f 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
@@ -1385,15 +1385,20 @@ public class TestSplitTransactionOnCluster {
       regionServer.kill();
       cluster.getRegionServerThreads().get(serverWith).join();
       // Wait until finish processing of shutdown
-      while (cluster.getMaster().getServerManager().areDeadServersInProgress()) {
-        Thread.sleep(10);
-      }
-      AssignmentManager am = cluster.getMaster().getAssignmentManager();
-      while(am.getRegionStates().isRegionsInTransition()) {
-        Thread.sleep(10);
-      }
-      assertEquals(am.getRegionStates().getRegionsInTransition().toString(), 0, am
-          .getRegionStates().getRegionsInTransition().size());
+      TESTING_UTIL.waitFor(60000, 1000, new Waiter.Predicate<Exception>() {
+        @Override
+        public boolean evaluate() throws Exception {
+          return !cluster.getMaster().getServerManager().areDeadServersInProgress();
+        }
+      });
+      // Wait until there are no more regions in transition
+      TESTING_UTIL.waitFor(60000, 1000, new Waiter.Predicate<Exception>() {
+        @Override
+        public boolean evaluate() throws Exception {
+          return !cluster.getMaster().getAssignmentManager().
+              getRegionStates().isRegionsInTransition();
+        }
+      });
       regionDirs =
           FSUtils.getRegionDirs(tableDir.getFileSystem(cluster.getConfiguration()), tableDir);
       assertEquals(1,regionDirs.size());


[2/3] hbase git commit: HBASE-21266 Not running balancer because processing dead regionservers, but empty dead rs list

Posted by ap...@apache.org.
HBASE-21266 Not running balancer because processing dead regionservers, but empty dead rs list

Signed-off-by: Michael Stack <st...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/c6654b1d
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/c6654b1d
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/c6654b1d

Branch: refs/heads/branch-1.4
Commit: c6654b1d8eb0d58769540205f0d8c2c6fd256942
Parents: 137423d
Author: Andrew Purtell <ap...@apache.org>
Authored: Thu Oct 11 15:28:36 2018 -0700
Committer: Andrew Purtell <ap...@apache.org>
Committed: Mon Oct 15 14:51:15 2018 -0700

----------------------------------------------------------------------
 .../apache/hadoop/hbase/master/DeadServer.java  | 95 ++++++++++++++------
 .../hadoop/hbase/master/TestDeadServer.java     |  4 +-
 .../TestEndToEndSplitTransaction.java           |  6 +-
 .../TestSplitTransactionOnCluster.java          | 23 +++--
 4 files changed, 90 insertions(+), 38 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/c6654b1d/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
index c1b5180..e7846b7 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
@@ -18,12 +18,7 @@
  */
 package org.apache.hadoop.hbase.master;
 
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.ServerName;
-import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
-import org.apache.hadoop.hbase.util.Pair;
+import com.google.common.base.Preconditions;
 
 import java.util.ArrayList;
 import java.util.Collections;
@@ -36,6 +31,13 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
+import org.apache.hadoop.hbase.util.Pair;
+
 /**
  * Class to hold dead servers list and utility querying dead server list.
  * On znode expiration, servers are added here.
@@ -54,14 +56,9 @@ public class DeadServer {
   private final Map<ServerName, Long> deadServers = new HashMap<ServerName, Long>();
 
   /**
-   * Number of dead servers currently being processed
+   * Set of dead servers currently being processed
    */
-  private int numProcessing = 0;
-
-  /**
-   * Whether a dead server is being processed currently.
-   */
-  private boolean processing = false;
+  private final Set<ServerName> processingServers = new HashSet<ServerName>();
 
   /**
    * A dead server that comes back alive has a different start code. The new start code should be
@@ -76,7 +73,13 @@ public class DeadServer {
     while (it.hasNext()) {
       ServerName sn = it.next();
       if (ServerName.isSameHostnameAndPort(sn, newServerName)) {
+        // remove from deadServers
         it.remove();
+        // remove from processingServers
+        boolean removed = processingServers.remove(sn);
+        if (removed) {
+          LOG.debug("Removed " + sn + " ; numProcessing=" + processingServers.size());
+        }
         return true;
       }
     }
@@ -93,13 +96,23 @@ public class DeadServer {
   }
 
   /**
+   * @param serverName server name.
+   * @return true if this server is on the processing servers list false otherwise
+   */
+  public synchronized boolean isProcessingServer(final ServerName serverName) {
+    return processingServers.contains(serverName);
+  }
+
+  /**
    * Checks if there are currently any dead servers being processed by the
    * master.  Returns true if at least one region server is currently being
    * processed as dead.
    *
    * @return true if any RS are being processed as dead
    */
-  public synchronized boolean areDeadServersInProgress() { return processing; }
+  public synchronized boolean areDeadServersInProgress() {
+    return !processingServers.isEmpty();
+  }
 
   public synchronized Set<ServerName> copyServerNames() {
     Set<ServerName> clone = new HashSet<ServerName>(deadServers.size());
@@ -112,10 +125,13 @@ public class DeadServer {
    * @param sn the server name
    */
   public synchronized void add(ServerName sn) {
-    processing = true;
     if (!deadServers.containsKey(sn)){
       deadServers.put(sn, EnvironmentEdgeManager.currentTime());
     }
+    boolean added = processingServers.add(sn);
+    if (LOG.isDebugEnabled() && added) {
+      LOG.debug("Added " + sn + "; numProcessing=" + processingServers.size());
+    }
   }
 
   /**
@@ -123,18 +139,27 @@ public class DeadServer {
    * @param sn ServerName for the dead server.
    */
   public synchronized void notifyServer(ServerName sn) {
-    if (LOG.isDebugEnabled()) { LOG.debug("Started processing " + sn); }
-    processing = true;
-    numProcessing++;
+    boolean added = processingServers.add(sn);
+    if (LOG.isDebugEnabled()) {
+      if (added) {
+        LOG.debug("Added " + sn + "; numProcessing=" + processingServers.size());
+      }
+      LOG.debug("Started processing " + sn + "; numProcessing=" + processingServers.size());
+    }
   }
 
+  /**
+   * Complete processing for this dead server.
+   * @param sn ServerName for the dead server.
+   */
   public synchronized void finish(ServerName sn) {
-    numProcessing--;
-    if (LOG.isDebugEnabled()) LOG.debug("Finished " + sn + "; numProcessing=" + numProcessing);
-
-    assert numProcessing >= 0: "Number of dead servers in processing should always be non-negative";
-
-    if (numProcessing == 0) { processing = false; }
+    boolean removed = processingServers.remove(sn);
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Finished processing " + sn + "; numProcessing=" + processingServers.size());
+      if (removed) {
+        LOG.debug("Removed " + sn + " ; numProcessing=" + processingServers.size());
+      }
+    }
   }
 
   public synchronized int size() {
@@ -150,20 +175,37 @@ public class DeadServer {
     while (it.hasNext()) {
       ServerName sn = it.next();
       if (ServerName.isSameHostnameAndPort(sn, newServerName)) {
+        // remove from deadServers
         it.remove();
+        // remove from processingServers
+        boolean removed = processingServers.remove(sn);
+        if (removed) {
+          LOG.debug("Removed " + sn + " ; numProcessing=" + processingServers.size());
+        }
       }
     }
   }
 
   @Override
   public synchronized String toString() {
+    // Display unified set of servers from both maps
+    Set<ServerName> servers = new HashSet<ServerName>();
+    servers.addAll(deadServers.keySet());
+    servers.addAll(processingServers);
     StringBuilder sb = new StringBuilder();
-    for (ServerName sn : deadServers.keySet()) {
+    for (ServerName sn : servers) {
       if (sb.length() > 0) {
         sb.append(", ");
       }
       sb.append(sn.toString());
+      // Star entries that are being processed
+      if (processingServers.contains(sn)) {
+        sb.append("*");
+      }
     }
+    sb.append(" (numProcessing=");
+    sb.append(processingServers.size());
+    sb.append(')');
     return sb.toString();
   }
 
@@ -210,6 +252,9 @@ public class DeadServer {
    * @return true if this server was removed
    */
   public synchronized boolean removeDeadServer(final ServerName deadServerName) {
+    Preconditions.checkState(!processingServers.contains(deadServerName),
+      "Asked to remove server still in processingServers set " + deadServerName +
+          " (numProcessing=" + processingServers.size() + ")");
     if (deadServers.remove(deadServerName) == null) {
       return false;
     }

http://git-wip-us.apache.org/repos/asf/hbase/blob/c6654b1d/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java
index f1a01c5..8876e70 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java
@@ -116,7 +116,6 @@ public class TestDeadServer {
 
     DeadServer d = new DeadServer();
 
-
     d.add(hostname123);
     mee.incValue(1);
     d.add(hostname1234);
@@ -157,14 +156,17 @@ public class TestDeadServer {
     d.add(hostname1234);
     Assert.assertEquals(2, d.size());
 
+    d.finish(hostname123);
     d.removeDeadServer(hostname123);
     Assert.assertEquals(1, d.size());
+    d.finish(hostname1234);
     d.removeDeadServer(hostname1234);
     Assert.assertTrue(d.isEmpty());
 
     d.add(hostname1234);
     Assert.assertFalse(d.removeDeadServer(hostname123_2));
     Assert.assertEquals(1, d.size());
+    d.finish(hostname1234);
     Assert.assertTrue(d.removeDeadServer(hostname1234));
     Assert.assertTrue(d.isEmpty());
   }

http://git-wip-us.apache.org/repos/asf/hbase/blob/c6654b1d/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java
index 86662a4..1892811 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java
@@ -459,7 +459,7 @@ public class TestEndToEndSplitTransaction {
     Throwable ex;
 
     RegionChecker(Configuration conf, Stoppable stopper, TableName tableName) throws IOException {
-      super("RegionChecker", stopper, 10);
+      super("RegionChecker", stopper, 100);
       this.conf = conf;
       this.tableName = tableName;
 
@@ -669,7 +669,7 @@ public class TestEndToEndSplitTransaction {
         log("found region in META: " + hri.getRegionNameAsString());
         break;
       }
-      Threads.sleep(10);
+      Threads.sleep(100);
     }
   }
 
@@ -690,7 +690,7 @@ public class TestEndToEndSplitTransaction {
         } catch (IOException ex) {
           // wait some more
         }
-        Threads.sleep(10);
+        Threads.sleep(100);
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/hbase/blob/c6654b1d/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
index 32700af..fd1527b 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
@@ -1380,15 +1380,20 @@ public class TestSplitTransactionOnCluster {
       regionServer.kill();
       cluster.getRegionServerThreads().get(serverWith).join();
       // Wait until finish processing of shutdown
-      while (cluster.getMaster().getServerManager().areDeadServersInProgress()) {
-        Thread.sleep(10);
-      }
-      AssignmentManager am = cluster.getMaster().getAssignmentManager();
-      while(am.getRegionStates().isRegionsInTransition()) {
-        Thread.sleep(10);
-      }
-      assertEquals(am.getRegionStates().getRegionsInTransition().toString(), 0, am
-          .getRegionStates().getRegionsInTransition().size());
+      TESTING_UTIL.waitFor(60000, 1000, new Waiter.Predicate<Exception>() {
+        @Override
+        public boolean evaluate() throws Exception {
+          return !cluster.getMaster().getServerManager().areDeadServersInProgress();
+        }
+      });
+      // Wait until there are no more regions in transition
+      TESTING_UTIL.waitFor(60000, 1000, new Waiter.Predicate<Exception>() {
+        @Override
+        public boolean evaluate() throws Exception {
+          return !cluster.getMaster().getAssignmentManager().
+              getRegionStates().isRegionsInTransition();
+        }
+      });
       regionDirs =
           FSUtils.getRegionDirs(tableDir.getFileSystem(cluster.getConfiguration()), tableDir);
       assertEquals(1,regionDirs.size());


[3/3] hbase git commit: HBASE-21266 Not running balancer because processing dead regionservers, but empty dead rs list

Posted by ap...@apache.org.
HBASE-21266 Not running balancer because processing dead regionservers, but empty dead rs list

Signed-off-by: Michael Stack <st...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/ea908462
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/ea908462
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/ea908462

Branch: refs/heads/branch-1
Commit: ea90846268aacbd323b8fb513a6b9a058a545635
Parents: ebad3ab
Author: Andrew Purtell <ap...@apache.org>
Authored: Thu Oct 11 15:28:36 2018 -0700
Committer: Andrew Purtell <ap...@apache.org>
Committed: Mon Oct 15 14:51:24 2018 -0700

----------------------------------------------------------------------
 .../apache/hadoop/hbase/master/DeadServer.java  | 95 ++++++++++++++------
 .../hadoop/hbase/master/TestDeadServer.java     |  4 +-
 .../TestEndToEndSplitTransaction.java           |  6 +-
 .../TestSplitTransactionOnCluster.java          | 23 +++--
 4 files changed, 90 insertions(+), 38 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/ea908462/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
index c1b5180..e7846b7 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java
@@ -18,12 +18,7 @@
  */
 package org.apache.hadoop.hbase.master;
 
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
-import org.apache.hadoop.hbase.ServerName;
-import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
-import org.apache.hadoop.hbase.util.Pair;
+import com.google.common.base.Preconditions;
 
 import java.util.ArrayList;
 import java.util.Collections;
@@ -36,6 +31,13 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
+import org.apache.hadoop.hbase.util.Pair;
+
 /**
  * Class to hold dead servers list and utility querying dead server list.
  * On znode expiration, servers are added here.
@@ -54,14 +56,9 @@ public class DeadServer {
   private final Map<ServerName, Long> deadServers = new HashMap<ServerName, Long>();
 
   /**
-   * Number of dead servers currently being processed
+   * Set of dead servers currently being processed
    */
-  private int numProcessing = 0;
-
-  /**
-   * Whether a dead server is being processed currently.
-   */
-  private boolean processing = false;
+  private final Set<ServerName> processingServers = new HashSet<ServerName>();
 
   /**
    * A dead server that comes back alive has a different start code. The new start code should be
@@ -76,7 +73,13 @@ public class DeadServer {
     while (it.hasNext()) {
       ServerName sn = it.next();
       if (ServerName.isSameHostnameAndPort(sn, newServerName)) {
+        // remove from deadServers
         it.remove();
+        // remove from processingServers
+        boolean removed = processingServers.remove(sn);
+        if (removed) {
+          LOG.debug("Removed " + sn + " ; numProcessing=" + processingServers.size());
+        }
         return true;
       }
     }
@@ -93,13 +96,23 @@ public class DeadServer {
   }
 
   /**
+   * @param serverName server name.
+   * @return true if this server is on the processing servers list false otherwise
+   */
+  public synchronized boolean isProcessingServer(final ServerName serverName) {
+    return processingServers.contains(serverName);
+  }
+
+  /**
    * Checks if there are currently any dead servers being processed by the
    * master.  Returns true if at least one region server is currently being
    * processed as dead.
    *
    * @return true if any RS are being processed as dead
    */
-  public synchronized boolean areDeadServersInProgress() { return processing; }
+  public synchronized boolean areDeadServersInProgress() {
+    return !processingServers.isEmpty();
+  }
 
   public synchronized Set<ServerName> copyServerNames() {
     Set<ServerName> clone = new HashSet<ServerName>(deadServers.size());
@@ -112,10 +125,13 @@ public class DeadServer {
    * @param sn the server name
    */
   public synchronized void add(ServerName sn) {
-    processing = true;
     if (!deadServers.containsKey(sn)){
       deadServers.put(sn, EnvironmentEdgeManager.currentTime());
     }
+    boolean added = processingServers.add(sn);
+    if (LOG.isDebugEnabled() && added) {
+      LOG.debug("Added " + sn + "; numProcessing=" + processingServers.size());
+    }
   }
 
   /**
@@ -123,18 +139,27 @@ public class DeadServer {
    * @param sn ServerName for the dead server.
    */
   public synchronized void notifyServer(ServerName sn) {
-    if (LOG.isDebugEnabled()) { LOG.debug("Started processing " + sn); }
-    processing = true;
-    numProcessing++;
+    boolean added = processingServers.add(sn);
+    if (LOG.isDebugEnabled()) {
+      if (added) {
+        LOG.debug("Added " + sn + "; numProcessing=" + processingServers.size());
+      }
+      LOG.debug("Started processing " + sn + "; numProcessing=" + processingServers.size());
+    }
   }
 
+  /**
+   * Complete processing for this dead server.
+   * @param sn ServerName for the dead server.
+   */
   public synchronized void finish(ServerName sn) {
-    numProcessing--;
-    if (LOG.isDebugEnabled()) LOG.debug("Finished " + sn + "; numProcessing=" + numProcessing);
-
-    assert numProcessing >= 0: "Number of dead servers in processing should always be non-negative";
-
-    if (numProcessing == 0) { processing = false; }
+    boolean removed = processingServers.remove(sn);
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Finished processing " + sn + "; numProcessing=" + processingServers.size());
+      if (removed) {
+        LOG.debug("Removed " + sn + " ; numProcessing=" + processingServers.size());
+      }
+    }
   }
 
   public synchronized int size() {
@@ -150,20 +175,37 @@ public class DeadServer {
     while (it.hasNext()) {
       ServerName sn = it.next();
       if (ServerName.isSameHostnameAndPort(sn, newServerName)) {
+        // remove from deadServers
         it.remove();
+        // remove from processingServers
+        boolean removed = processingServers.remove(sn);
+        if (removed) {
+          LOG.debug("Removed " + sn + " ; numProcessing=" + processingServers.size());
+        }
       }
     }
   }
 
   @Override
   public synchronized String toString() {
+    // Display unified set of servers from both maps
+    Set<ServerName> servers = new HashSet<ServerName>();
+    servers.addAll(deadServers.keySet());
+    servers.addAll(processingServers);
     StringBuilder sb = new StringBuilder();
-    for (ServerName sn : deadServers.keySet()) {
+    for (ServerName sn : servers) {
       if (sb.length() > 0) {
         sb.append(", ");
       }
       sb.append(sn.toString());
+      // Star entries that are being processed
+      if (processingServers.contains(sn)) {
+        sb.append("*");
+      }
     }
+    sb.append(" (numProcessing=");
+    sb.append(processingServers.size());
+    sb.append(')');
     return sb.toString();
   }
 
@@ -210,6 +252,9 @@ public class DeadServer {
    * @return true if this server was removed
    */
   public synchronized boolean removeDeadServer(final ServerName deadServerName) {
+    Preconditions.checkState(!processingServers.contains(deadServerName),
+      "Asked to remove server still in processingServers set " + deadServerName +
+          " (numProcessing=" + processingServers.size() + ")");
     if (deadServers.remove(deadServerName) == null) {
       return false;
     }

http://git-wip-us.apache.org/repos/asf/hbase/blob/ea908462/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java
index f1a01c5..8876e70 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDeadServer.java
@@ -116,7 +116,6 @@ public class TestDeadServer {
 
     DeadServer d = new DeadServer();
 
-
     d.add(hostname123);
     mee.incValue(1);
     d.add(hostname1234);
@@ -157,14 +156,17 @@ public class TestDeadServer {
     d.add(hostname1234);
     Assert.assertEquals(2, d.size());
 
+    d.finish(hostname123);
     d.removeDeadServer(hostname123);
     Assert.assertEquals(1, d.size());
+    d.finish(hostname1234);
     d.removeDeadServer(hostname1234);
     Assert.assertTrue(d.isEmpty());
 
     d.add(hostname1234);
     Assert.assertFalse(d.removeDeadServer(hostname123_2));
     Assert.assertEquals(1, d.size());
+    d.finish(hostname1234);
     Assert.assertTrue(d.removeDeadServer(hostname1234));
     Assert.assertTrue(d.isEmpty());
   }

http://git-wip-us.apache.org/repos/asf/hbase/blob/ea908462/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java
index 86662a4..1892811 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java
@@ -459,7 +459,7 @@ public class TestEndToEndSplitTransaction {
     Throwable ex;
 
     RegionChecker(Configuration conf, Stoppable stopper, TableName tableName) throws IOException {
-      super("RegionChecker", stopper, 10);
+      super("RegionChecker", stopper, 100);
       this.conf = conf;
       this.tableName = tableName;
 
@@ -669,7 +669,7 @@ public class TestEndToEndSplitTransaction {
         log("found region in META: " + hri.getRegionNameAsString());
         break;
       }
-      Threads.sleep(10);
+      Threads.sleep(100);
     }
   }
 
@@ -690,7 +690,7 @@ public class TestEndToEndSplitTransaction {
         } catch (IOException ex) {
           // wait some more
         }
-        Threads.sleep(10);
+        Threads.sleep(100);
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/hbase/blob/ea908462/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
index 32700af..fd1527b 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java
@@ -1380,15 +1380,20 @@ public class TestSplitTransactionOnCluster {
       regionServer.kill();
       cluster.getRegionServerThreads().get(serverWith).join();
       // Wait until finish processing of shutdown
-      while (cluster.getMaster().getServerManager().areDeadServersInProgress()) {
-        Thread.sleep(10);
-      }
-      AssignmentManager am = cluster.getMaster().getAssignmentManager();
-      while(am.getRegionStates().isRegionsInTransition()) {
-        Thread.sleep(10);
-      }
-      assertEquals(am.getRegionStates().getRegionsInTransition().toString(), 0, am
-          .getRegionStates().getRegionsInTransition().size());
+      TESTING_UTIL.waitFor(60000, 1000, new Waiter.Predicate<Exception>() {
+        @Override
+        public boolean evaluate() throws Exception {
+          return !cluster.getMaster().getServerManager().areDeadServersInProgress();
+        }
+      });
+      // Wait until there are no more regions in transition
+      TESTING_UTIL.waitFor(60000, 1000, new Waiter.Predicate<Exception>() {
+        @Override
+        public boolean evaluate() throws Exception {
+          return !cluster.getMaster().getAssignmentManager().
+              getRegionStates().isRegionsInTransition();
+        }
+      });
       regionDirs =
           FSUtils.getRegionDirs(tableDir.getFileSystem(cluster.getConfiguration()), tableDir);
       assertEquals(1,regionDirs.size());