You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@accumulo.apache.org by GitBox <gi...@apache.org> on 2018/11/30 19:32:15 UTC

[GitHub] asfgit closed pull request #781: fixes ACCUMULO-4410: Master didn't not resume balancing after adminis…

asfgit closed pull request #781: fixes ACCUMULO-4410: Master didn't not resume balancing after adminis…
URL: https://github.com/apache/accumulo/pull/781
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/server/base/src/main/java/org/apache/accumulo/server/master/LiveTServerSet.java b/server/base/src/main/java/org/apache/accumulo/server/master/LiveTServerSet.java
index a4b39df2fc..163ed5ff6d 100644
--- a/server/base/src/main/java/org/apache/accumulo/server/master/LiveTServerSet.java
+++ b/server/base/src/main/java/org/apache/accumulo/server/master/LiveTServerSet.java
@@ -283,8 +283,7 @@ public synchronized void scanServers() {
       }
 
       // log.debug("Current: " + current.keySet());
-      if (!doomed.isEmpty() || !updates.isEmpty())
-        this.cback.update(this, doomed, updates);
+      this.cback.update(this, doomed, updates);
     } catch (Exception ex) {
       log.error("{}", ex.getMessage(), ex);
     }
@@ -372,8 +371,7 @@ public void process(WatchedEvent event) {
 
           try {
             checkServer(updates, doomed, path, server);
-            if (!doomed.isEmpty() || !updates.isEmpty())
-              this.cback.update(this, doomed, updates);
+            this.cback.update(this, doomed, updates);
           } catch (Exception ex) {
             log.error("Exception", ex);
           }
diff --git a/server/master/src/main/java/org/apache/accumulo/master/Master.java b/server/master/src/main/java/org/apache/accumulo/master/Master.java
index 2f124e350a..7255410684 100644
--- a/server/master/src/main/java/org/apache/accumulo/master/Master.java
+++ b/server/master/src/main/java/org/apache/accumulo/master/Master.java
@@ -1554,49 +1554,57 @@ public static void main(String[] args) throws Exception {
   @Override
   public void update(LiveTServerSet current, Set<TServerInstance> deleted,
       Set<TServerInstance> added) {
-    DeadServerList obit = new DeadServerList(
-        ZooUtil.getRoot(getInstance()) + Constants.ZDEADTSERVERS);
-    if (added.size() > 0) {
-      log.info("New servers: " + added);
-      for (TServerInstance up : added)
-        obit.delete(up.hostPort());
-    }
-    for (TServerInstance dead : deleted) {
-      String cause = "unexpected failure";
-      if (serversToShutdown.contains(dead))
-        cause = "clean shutdown"; // maybe an incorrect assumption
-      if (!getMasterGoalState().equals(MasterGoalState.CLEAN_STOP))
-        obit.post(dead.hostPort(), cause);
-    }
-
-    Set<TServerInstance> unexpected = new HashSet<>(deleted);
-    unexpected.removeAll(this.serversToShutdown);
-    if (unexpected.size() > 0) {
-      if (stillMaster() && !getMasterGoalState().equals(MasterGoalState.CLEAN_STOP)) {
-        log.warn("Lost servers " + unexpected);
+    // if we have deleted or added tservers, then adjust our dead server list
+    if (!deleted.isEmpty() || !added.isEmpty()) {
+      DeadServerList obit = new DeadServerList(
+          ZooUtil.getRoot(getInstance()) + Constants.ZDEADTSERVERS);
+      if (added.size() > 0) {
+        log.info("New servers: " + added);
+        for (TServerInstance up : added)
+          obit.delete(up.hostPort());
+      }
+      for (TServerInstance dead : deleted) {
+        String cause = "unexpected failure";
+        if (serversToShutdown.contains(dead))
+          cause = "clean shutdown"; // maybe an incorrect assumption
+        if (!getMasterGoalState().equals(MasterGoalState.CLEAN_STOP))
+          obit.post(dead.hostPort(), cause);
       }
-    }
-    serversToShutdown.removeAll(deleted);
-    badServers.keySet().removeAll(deleted);
-    // clear out any bad server with the same host/port as a new server
-    synchronized (badServers) {
-      cleanListByHostAndPort(badServers.keySet(), deleted, added);
-    }
-    synchronized (serversToShutdown) {
-      cleanListByHostAndPort(serversToShutdown, deleted, added);
-    }
 
-    synchronized (migrations) {
-      Iterator<Entry<KeyExtent,TServerInstance>> iter = migrations.entrySet().iterator();
-      while (iter.hasNext()) {
-        Entry<KeyExtent,TServerInstance> entry = iter.next();
-        if (deleted.contains(entry.getValue())) {
-          log.info("Canceling migration of " + entry.getKey() + " to " + entry.getValue());
-          iter.remove();
+      Set<TServerInstance> unexpected = new HashSet<>(deleted);
+      unexpected.removeAll(this.serversToShutdown);
+      if (unexpected.size() > 0) {
+        if (stillMaster() && !getMasterGoalState().equals(MasterGoalState.CLEAN_STOP)) {
+          log.warn("Lost servers " + unexpected);
+        }
+      }
+      serversToShutdown.removeAll(deleted);
+      badServers.keySet().removeAll(deleted);
+      // clear out any bad server with the same host/port as a new server
+      synchronized (badServers) {
+        cleanListByHostAndPort(badServers.keySet(), deleted, added);
+      }
+      synchronized (serversToShutdown) {
+        cleanListByHostAndPort(serversToShutdown, deleted, added);
+      }
+
+      synchronized (migrations) {
+        Iterator<Entry<KeyExtent,TServerInstance>> iter = migrations.entrySet().iterator();
+        while (iter.hasNext()) {
+          Entry<KeyExtent,TServerInstance> entry = iter.next();
+          if (deleted.contains(entry.getValue())) {
+            log.info("Canceling migration of " + entry.getKey() + " to " + entry.getValue());
+            iter.remove();
+          }
         }
       }
+      nextEvent.event("There are now %d tablet servers", current.size());
     }
-    nextEvent.event("There are now %d tablet servers", current.size());
+
+    // clear out any servers that are no longer current
+    // this is needed when we are using a fate operation to shutdown a tserver as it
+    // will continue to add the server to the serversToShutdown (ACCUMULO-4410)
+    serversToShutdown.retainAll(current.getCurrentServers());
   }
 
   private static void cleanListByHostAndPort(Collection<TServerInstance> badServers,
diff --git a/server/master/src/main/java/org/apache/accumulo/master/tserverOps/ShutdownTServer.java b/server/master/src/main/java/org/apache/accumulo/master/tserverOps/ShutdownTServer.java
index 183adca15f..a6693dd1a0 100644
--- a/server/master/src/main/java/org/apache/accumulo/master/tserverOps/ShutdownTServer.java
+++ b/server/master/src/main/java/org/apache/accumulo/master/tserverOps/ShutdownTServer.java
@@ -39,7 +39,7 @@
   private static final long serialVersionUID = 1L;
   private static final Logger log = LoggerFactory.getLogger(ShutdownTServer.class);
   private TServerInstance server;
-  private boolean force, requestedShutdown;
+  private boolean force;
 
   public ShutdownTServer(TServerInstance server, boolean force) {
     this.server = server;
@@ -54,12 +54,7 @@ public long isReady(long tid, Master master) throws Exception {
     }
 
     // Inform the master that we want this server to shutdown
-    // We don't want to spam the master with shutdown requests, so
-    // only send this request once
-    if (!requestedShutdown) {
-      master.shutdownTServer(server);
-      requestedShutdown = true;
-    }
+    master.shutdownTServer(server);
 
     if (master.onlineTabletServers().contains(server)) {
       TServerConnection connection = master.getConnection(server);
diff --git a/server/master/src/test/java/org/apache/accumulo/master/tableOps/ShutdownTServerTest.java b/server/master/src/test/java/org/apache/accumulo/master/tableOps/ShutdownTServerTest.java
index 2fc51b82d8..bd72d83c77 100644
--- a/server/master/src/test/java/org/apache/accumulo/master/tableOps/ShutdownTServerTest.java
+++ b/server/master/src/test/java/org/apache/accumulo/master/tableOps/ShutdownTServerTest.java
@@ -16,6 +16,7 @@
  */
 package org.apache.accumulo.master.tableOps;
 
+import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
 
 import java.util.Collections;
@@ -23,6 +24,7 @@
 
 import org.apache.accumulo.core.master.thrift.TableInfo;
 import org.apache.accumulo.core.master.thrift.TabletServerStatus;
+import org.apache.accumulo.fate.Repo;
 import org.apache.accumulo.master.Master;
 import org.apache.accumulo.master.tserverOps.ShutdownTServer;
 import org.apache.accumulo.server.master.LiveTServerSet.TServerConnection;
@@ -65,16 +67,25 @@ public void testSingleShutdown() throws Exception {
     // Reset the mocks
     EasyMock.reset(tserver, tserverCnxn, master);
 
-    // The same as above, but should not expect call shutdownTServer on master again
+    // reset the table map to the empty set to simulate all tablets unloaded
+    status.tableMap = new HashMap<>();
+    master.shutdownTServer(tserver);
+    EasyMock.expectLastCall().once();
     EasyMock.expect(master.onlineTabletServers()).andReturn(Collections.singleton(tserver));
     EasyMock.expect(master.getConnection(tserver)).andReturn(tserverCnxn);
     EasyMock.expect(tserverCnxn.getTableMap(false)).andReturn(status);
+    EasyMock.expect(master.getMasterLock()).andReturn(null);
+    tserverCnxn.halt(null);
+    EasyMock.expectLastCall().once();
 
     EasyMock.replay(tserver, tserverCnxn, master);
 
     // FATE op is not ready
     wait = op.isReady(tid, master);
-    assertTrue("Expected wait to be greater than 0", wait > 0);
+    assertTrue("Expected wait to be 0", wait == 0);
+
+    Repo<Master> op2 = op.call(tid, master);
+    assertNull("Expected no follow on step", op2);
 
     EasyMock.verify(tserver, tserverCnxn, master);
   }


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services