You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ozone.apache.org by na...@apache.org on 2023/04/25 14:26:58 UTC

[ozone] branch master updated: HDDS-8452. Scm server side RPC support for Scm Decommissioning. (#4595)

This is an automated email from the ASF dual-hosted git repository.

nanda pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new c422d99d81 HDDS-8452. Scm server side RPC support for Scm Decommissioning. (#4595)
c422d99d81 is described below

commit c422d99d81e9242999380c013a7ebb79df36732b
Author: Neil Joshi <ne...@gmail.com>
AuthorDate: Tue Apr 25 08:26:52 2023 -0600

    HDDS-8452. Scm server side RPC support for Scm Decommissioning. (#4595)
---
 .../hdds/scm/server/StorageContainerManager.java   | 35 ++++++++++++---
 .../hadoop/hdds/scm/ha/TestSCMHAManagerImpl.java   | 50 ++++++++++++++++++++++
 2 files changed, 79 insertions(+), 6 deletions(-)

diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
index b1c83e390e..3f9f49ac0b 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
@@ -159,6 +159,7 @@ import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod;
 import org.apache.hadoop.security.authentication.client.AuthenticationException;
 import org.apache.hadoop.util.JvmPauseMonitor;
+import org.apache.ratis.protocol.RaftPeerId;
 import org.apache.ratis.util.ExitUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -2148,24 +2149,46 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
 
   /**
    * This will remove the given SCM node from HA Ring by removing it from
-   * Ratis Ring and deleting the related certificates from certificate store.
+   * Ratis Ring.
    *
    * @return true if remove was successful, else false.
    */
-  public boolean removePeerFromHARing(RemoveSCMRequest request)
+  public boolean removePeerFromHARing(String scmId)
       throws IOException {
+
+    if (getScmHAManager().getRatisServer() == null) {
+      throw new IOException("Cannot remove SCM " +
+          scmId + " in a non-HA cluster");
+    }
+
     // We cannot remove a node if it's currently leader.
-    if (scmContext.isLeader() && request.getScmId().equals(getScmId())) {
+    if (scmContext.isLeader() && scmId.equals(getScmId())) {
       throw new IOException("Cannot remove current leader.");
     }
 
     // Currently we don't support removal of primordial node.
-    if (request.getScmId().equals(primaryScmNodeId)) {
+    if (scmId.equals(primaryScmNodeId)) {
       throw new IOException("Removal of primordial node is not supported.");
     }
 
-    // TODO: Remove the certificate from certificate store.
-    return scmHAManager.removeSCM(request);
+    Preconditions.checkNotNull(getScmHAManager().getRatisServer()
+        .getDivision().getGroup());
+
+    // check valid scmid in ratis peers list
+    if (getScmHAManager().getRatisServer().getDivision()
+        .getGroup().getPeer(RaftPeerId.valueOf(scmId)) == null) {
+      throw new IOException("ScmId " + scmId +
+          " supplied for scm removal not in Ratis Peer list");
+    }
+
+    // create removeSCM request
+    RemoveSCMRequest request = new RemoveSCMRequest(
+        getClusterId(), scmId,
+        getScmHAManager().getRatisServer().getDivision()
+            .getGroup().getPeer(RaftPeerId.valueOf(scmId))
+            .getAddress());
+
+    return getScmHAManager().removeSCM(request);
 
   }
 
diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMHAManagerImpl.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMHAManagerImpl.java
index cc7aa0ec42..10d15c9170 100644
--- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMHAManagerImpl.java
+++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMHAManagerImpl.java
@@ -21,6 +21,7 @@ import org.apache.commons.io.FileUtils;
 import org.apache.hadoop.hdds.HddsConfigKeys;
 import org.apache.hadoop.hdds.conf.OzoneConfiguration;
 import org.apache.hadoop.hdds.scm.AddSCMRequest;
+import org.apache.hadoop.hdds.scm.HddsTestUtils;
 import org.apache.hadoop.hdds.scm.RemoveSCMRequest;
 import org.apache.hadoop.hdds.scm.ScmConfigKeys;
 import org.apache.hadoop.hdds.scm.block.BlockManager;
@@ -29,6 +30,7 @@ import org.apache.hadoop.hdds.scm.block.DeletedBlockLogImpl;
 import org.apache.hadoop.hdds.scm.metadata.SCMMetadataStore;
 import org.apache.hadoop.hdds.scm.node.NodeDecommissionManager;
 import org.apache.hadoop.hdds.scm.safemode.SCMSafeModeManager;
+import org.apache.hadoop.hdds.scm.server.SCMConfigurator;
 import org.apache.hadoop.hdds.scm.server.SCMDatanodeProtocolServer;
 import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
 import org.apache.hadoop.hdds.scm.server.upgrade.FinalizationManager;
@@ -38,6 +40,7 @@ import org.apache.hadoop.hdds.utils.TransactionInfo;
 import org.apache.hadoop.hdds.utils.db.BatchOperation;
 import org.apache.hadoop.hdds.utils.db.DBStore;
 import org.apache.hadoop.hdds.utils.db.Table;
+import org.apache.hadoop.security.authentication.client.AuthenticationException;
 import org.apache.ozone.test.GenericTestUtils;
 import org.apache.ratis.server.DivisionInfo;
 import org.junit.jupiter.api.AfterEach;
@@ -50,6 +53,8 @@ import java.io.IOException;
 import java.util.UUID;
 import java.util.concurrent.TimeoutException;
 
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.when;
 
@@ -128,6 +133,51 @@ class TestSCMHAManagerImpl {
     }
   }
 
+  private StorageContainerManager testsetup() throws Exception {
+    OzoneConfiguration config = new OzoneConfiguration();
+    config.set(ScmConfigKeys.OZONE_SCM_PRIMORDIAL_NODE_ID_KEY, "scm1");
+    File dir = GenericTestUtils.getRandomizedTestDir();
+    config.set(HddsConfigKeys.OZONE_METADATA_DIRS, dir.toString());
+    SCMConfigurator configurator = new SCMConfigurator();
+    configurator.setSCMHAManager(SCMHAManagerStub.getInstance(true));
+    configurator.setScmContext(SCMContext.emptyContext());
+    configurator.setSCMHAManager(primarySCMHAManager);
+    StorageContainerManager scm = HddsTestUtils.getScm(config, configurator);
+
+    return scm;
+  }
+  @Test
+  public void testHARingRemovalErrors() throws IOException,
+      AuthenticationException {
+    OzoneConfiguration config = new OzoneConfiguration();
+    config.set(ScmConfigKeys.OZONE_SCM_PRIMORDIAL_NODE_ID_KEY, "scm1");
+    File dir = GenericTestUtils.getRandomizedTestDir();
+    config.set(HddsConfigKeys.OZONE_METADATA_DIRS, dir.toString());
+    SCMConfigurator configurator = new SCMConfigurator();
+    configurator.setSCMHAManager(SCMHAManagerStub.getInstance(true));
+    configurator.setScmContext(SCMContext.emptyContext());
+    configurator.setSCMHAManager(primarySCMHAManager);
+    final StorageContainerManager scm2 = HddsTestUtils
+        .getScm(config, configurator);
+
+    try {
+      // try removing scmid from ratis group not amongst peer list
+      String randomScmId = UUID.randomUUID().toString();
+      IOException ex;
+      ex = assertThrows(IOException.class, () ->
+          scm2.removePeerFromHARing(randomScmId));
+      assertTrue(ex.getMessage().contains("Peer"));
+
+      // TODO: try removing primordial node from ratis ring
+
+      // try removing leader scm from ratis ring
+      ex = assertThrows(IOException.class, () ->
+          scm2.removePeerFromHARing(scm2.getScmId()));
+      assertTrue(ex.getMessage().contains("leader"));
+    } finally {
+      scm2.getScmHAManager().getRatisServer().stop();
+    }
+  }
   @Test
   public void testRemoveSCM() throws IOException, InterruptedException {
     Assertions.assertEquals(1, primarySCMHAManager.getRatisServer()


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@ozone.apache.org
For additional commands, e-mail: commits-help@ozone.apache.org