You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ozone.apache.org by na...@apache.org on 2023/04/25 14:26:58 UTC
[ozone] branch master updated: HDDS-8452. Scm server side RPC support for Scm Decommissioning. (#4595)
This is an automated email from the ASF dual-hosted git repository.
nanda pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new c422d99d81 HDDS-8452. Scm server side RPC support for Scm Decommissioning. (#4595)
c422d99d81 is described below
commit c422d99d81e9242999380c013a7ebb79df36732b
Author: Neil Joshi <ne...@gmail.com>
AuthorDate: Tue Apr 25 08:26:52 2023 -0600
HDDS-8452. Scm server side RPC support for Scm Decommissioning. (#4595)
---
.../hdds/scm/server/StorageContainerManager.java | 35 ++++++++++++---
.../hadoop/hdds/scm/ha/TestSCMHAManagerImpl.java | 50 ++++++++++++++++++++++
2 files changed, 79 insertions(+), 6 deletions(-)
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
index b1c83e390e..3f9f49ac0b 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
@@ -159,6 +159,7 @@ import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod;
import org.apache.hadoop.security.authentication.client.AuthenticationException;
import org.apache.hadoop.util.JvmPauseMonitor;
+import org.apache.ratis.protocol.RaftPeerId;
import org.apache.ratis.util.ExitUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -2148,24 +2149,46 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
/**
* This will remove the given SCM node from HA Ring by removing it from
- * Ratis Ring and deleting the related certificates from certificate store.
+ * Ratis Ring.
*
* @return true if remove was successful, else false.
*/
- public boolean removePeerFromHARing(RemoveSCMRequest request)
+ public boolean removePeerFromHARing(String scmId)
throws IOException {
+
+ if (getScmHAManager().getRatisServer() == null) {
+ throw new IOException("Cannot remove SCM " +
+ scmId + " in a non-HA cluster");
+ }
+
// We cannot remove a node if it's currently leader.
- if (scmContext.isLeader() && request.getScmId().equals(getScmId())) {
+ if (scmContext.isLeader() && scmId.equals(getScmId())) {
throw new IOException("Cannot remove current leader.");
}
// Currently we don't support removal of primordial node.
- if (request.getScmId().equals(primaryScmNodeId)) {
+ if (scmId.equals(primaryScmNodeId)) {
throw new IOException("Removal of primordial node is not supported.");
}
- // TODO: Remove the certificate from certificate store.
- return scmHAManager.removeSCM(request);
+ Preconditions.checkNotNull(getScmHAManager().getRatisServer()
+ .getDivision().getGroup());
+
+ // check valid scmid in ratis peers list
+ if (getScmHAManager().getRatisServer().getDivision()
+ .getGroup().getPeer(RaftPeerId.valueOf(scmId)) == null) {
+ throw new IOException("ScmId " + scmId +
+ " supplied for scm removal not in Ratis Peer list");
+ }
+
+ // create removeSCM request
+ RemoveSCMRequest request = new RemoveSCMRequest(
+ getClusterId(), scmId,
+ getScmHAManager().getRatisServer().getDivision()
+ .getGroup().getPeer(RaftPeerId.valueOf(scmId))
+ .getAddress());
+
+ return getScmHAManager().removeSCM(request);
}
diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMHAManagerImpl.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMHAManagerImpl.java
index cc7aa0ec42..10d15c9170 100644
--- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMHAManagerImpl.java
+++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMHAManagerImpl.java
@@ -21,6 +21,7 @@ import org.apache.commons.io.FileUtils;
import org.apache.hadoop.hdds.HddsConfigKeys;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.scm.AddSCMRequest;
+import org.apache.hadoop.hdds.scm.HddsTestUtils;
import org.apache.hadoop.hdds.scm.RemoveSCMRequest;
import org.apache.hadoop.hdds.scm.ScmConfigKeys;
import org.apache.hadoop.hdds.scm.block.BlockManager;
@@ -29,6 +30,7 @@ import org.apache.hadoop.hdds.scm.block.DeletedBlockLogImpl;
import org.apache.hadoop.hdds.scm.metadata.SCMMetadataStore;
import org.apache.hadoop.hdds.scm.node.NodeDecommissionManager;
import org.apache.hadoop.hdds.scm.safemode.SCMSafeModeManager;
+import org.apache.hadoop.hdds.scm.server.SCMConfigurator;
import org.apache.hadoop.hdds.scm.server.SCMDatanodeProtocolServer;
import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
import org.apache.hadoop.hdds.scm.server.upgrade.FinalizationManager;
@@ -38,6 +40,7 @@ import org.apache.hadoop.hdds.utils.TransactionInfo;
import org.apache.hadoop.hdds.utils.db.BatchOperation;
import org.apache.hadoop.hdds.utils.db.DBStore;
import org.apache.hadoop.hdds.utils.db.Table;
+import org.apache.hadoop.security.authentication.client.AuthenticationException;
import org.apache.ozone.test.GenericTestUtils;
import org.apache.ratis.server.DivisionInfo;
import org.junit.jupiter.api.AfterEach;
@@ -50,6 +53,8 @@ import java.io.IOException;
import java.util.UUID;
import java.util.concurrent.TimeoutException;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
@@ -128,6 +133,51 @@ class TestSCMHAManagerImpl {
}
}
+ private StorageContainerManager testsetup() throws Exception {
+ OzoneConfiguration config = new OzoneConfiguration();
+ config.set(ScmConfigKeys.OZONE_SCM_PRIMORDIAL_NODE_ID_KEY, "scm1");
+ File dir = GenericTestUtils.getRandomizedTestDir();
+ config.set(HddsConfigKeys.OZONE_METADATA_DIRS, dir.toString());
+ SCMConfigurator configurator = new SCMConfigurator();
+ configurator.setSCMHAManager(SCMHAManagerStub.getInstance(true));
+ configurator.setScmContext(SCMContext.emptyContext());
+ configurator.setSCMHAManager(primarySCMHAManager);
+ StorageContainerManager scm = HddsTestUtils.getScm(config, configurator);
+
+ return scm;
+ }
+ @Test
+ public void testHARingRemovalErrors() throws IOException,
+ AuthenticationException {
+ OzoneConfiguration config = new OzoneConfiguration();
+ config.set(ScmConfigKeys.OZONE_SCM_PRIMORDIAL_NODE_ID_KEY, "scm1");
+ File dir = GenericTestUtils.getRandomizedTestDir();
+ config.set(HddsConfigKeys.OZONE_METADATA_DIRS, dir.toString());
+ SCMConfigurator configurator = new SCMConfigurator();
+ configurator.setSCMHAManager(SCMHAManagerStub.getInstance(true));
+ configurator.setScmContext(SCMContext.emptyContext());
+ configurator.setSCMHAManager(primarySCMHAManager);
+ final StorageContainerManager scm2 = HddsTestUtils
+ .getScm(config, configurator);
+
+ try {
+ // try removing scmid from ratis group not amongst peer list
+ String randomScmId = UUID.randomUUID().toString();
+ IOException ex;
+ ex = assertThrows(IOException.class, () ->
+ scm2.removePeerFromHARing(randomScmId));
+ assertTrue(ex.getMessage().contains("Peer"));
+
+ // TODO: try removing primordial node from ratis ring
+
+ // try removing leader scm from ratis ring
+ ex = assertThrows(IOException.class, () ->
+ scm2.removePeerFromHARing(scm2.getScmId()));
+ assertTrue(ex.getMessage().contains("leader"));
+ } finally {
+ scm2.getScmHAManager().getRatisServer().stop();
+ }
+ }
@Test
public void testRemoveSCM() throws IOException, InterruptedException {
Assertions.assertEquals(1, primarySCMHAManager.getRatisServer()
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@ozone.apache.org
For additional commands, e-mail: commits-help@ozone.apache.org