You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ozone.apache.org by sh...@apache.org on 2021/04/07 08:59:12 UTC

[ozone] branch master updated: HDDS-5062. Add a config to bypass clusterId validation for bootstrapping SCM. (#2114)

This is an automated email from the ASF dual-hosted git repository.

shashikant pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new 33ddcb3  HDDS-5062. Add a config to bypass clusterId validation for bootstrapping SCM. (#2114)
33ddcb3 is described below

commit 33ddcb3320a254b9aecc3d8ec0cfb7ef84b049a3
Author: bshashikant <sh...@apache.org>
AuthorDate: Wed Apr 7 14:28:39 2021 +0530

    HDDS-5062. Add a config to bypass clusterId validation for bootstrapping SCM. (#2114)
---
 .../org/apache/hadoop/hdds/scm/ScmConfigKeys.java  | 17 +++++++++++++++
 .../common/src/main/resources/ozone-default.xml    |  9 ++++++++
 .../hdds/scm/server/StorageContainerManager.java   | 25 ++++++++++++++--------
 .../ozone/scm/TestStorageContainerManagerHA.java   | 19 ++++++++++++++++
 4 files changed, 61 insertions(+), 9 deletions(-)

diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java
index 8f47756..723712c 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java
@@ -313,6 +313,23 @@ public final class ScmConfigKeys {
    */
   public static final String OZONE_SCM_PRIMORDIAL_NODE_ID_KEY =
       "ozone.scm.primordial.node.id";
+
+  /**
+   * The config when set to true skips the clusterId validation from leader
+   * scm during bootstrap. In SCM HA, the primary node starts up the ratis
+   * server while other bootstrapping nodes will get added to the ratis group.
+   * Now, if all the bootstrapping SCM get stopped post the group formation,
+   * the primary node will now step down from leadership as it will loose
+   * majority. If the bootstrapping nodes are now bootstrapped again,
+   * the bootstrapping node will try to first validate the cluster id from the
+   * leader SCM with the persisted cluster id , but as there is no leader
+   * existing, bootstrapping will keep on failing and retrying until
+   * it shuts down.
+   */
+  public static final String OZONE_SCM_SKIP_BOOTSTRAP_VALIDATION_KEY =
+      "ozone.scm.skip.bootstrap.validation";
+  public static final boolean OZONE_SCM_SKIP_BOOTSTRAP_VALIDATION_DEFAULT =
+      false;
   // The path where datanode ID is to be written to.
   // if this value is not set then container startup will fail.
   public static final String OZONE_SCM_DATANODE_ID_DIR =
diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml
index 8a5ebb5..52e6241 100644
--- a/hadoop-hdds/common/src/main/resources/ozone-default.xml
+++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml
@@ -1957,6 +1957,15 @@
     </description>
   </property>
   <property>
+    <name>ozone.scm.skip.bootstrap.validation</name>
+    <value>false</value>
+    <tag>OZONE, SCM, HA</tag>
+    <description>
+      optional config, the config when set to true skips the clusterId
+      validation from leader scm during bootstrap
+    </description>
+  </property>
+  <property>
     <name>ozone.scm.ratis.enable</name>
     <value>false</value>
     <tag>OZONE, SCM, HA, RATIS</tag>
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
index 37ecf77..76c87b3 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
@@ -817,13 +817,8 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
       LOG.error("Bootstrap is not supported without SCM HA.");
       return false;
     }
-    SCMHANodeDetails scmhaNodeDetails = SCMHANodeDetails.loadSCMHAConfig(conf);
-
-    loginAsSCMUserIfSecurityEnabled(scmhaNodeDetails, conf);
-    // The node here will try to fetch the cluster id from any of existing
-    // running SCM instances.
-
     String primordialSCM = SCMHAUtils.getPrimordialSCM(conf);
+    SCMHANodeDetails scmhaNodeDetails = SCMHANodeDetails.loadSCMHAConfig(conf);
     String selfNodeId = scmhaNodeDetails.getLocalNodeDetails().getNodeId();
     if (primordialSCM != null && SCMHAUtils.isPrimordialSCM(conf, selfNodeId)) {
       LOG.info(
@@ -832,15 +827,27 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
               + "Ignoring it.", primordialSCM, selfNodeId);
       return true;
     }
+    SCMStorageConfig scmStorageConfig = new SCMStorageConfig(conf);
+    final String persistedClusterId = scmStorageConfig.getClusterID();
+    StorageState state = scmStorageConfig.getState();
+    if (state == StorageState.INITIALIZED && conf
+        .getBoolean(ScmConfigKeys.OZONE_SCM_SKIP_BOOTSTRAP_VALIDATION_KEY,
+            ScmConfigKeys.OZONE_SCM_SKIP_BOOTSTRAP_VALIDATION_DEFAULT)) {
+      LOG.info("Skipping clusterId validation during bootstrap command.  "
+              + "ClusterId id {}, SCM id {}", persistedClusterId,
+          scmStorageConfig.getScmId());
+      return true;
+    }
+
+    loginAsSCMUserIfSecurityEnabled(scmhaNodeDetails, conf);
+    // The node here will try to fetch the cluster id from any of existing
+    // running SCM instances.
     OzoneConfiguration config =
         SCMHAUtils.removeSelfId(conf,
             scmhaNodeDetails.getLocalNodeDetails().getNodeId());
     final ScmInfo scmInfo = HAUtils.getScmInfo(config);
-    SCMStorageConfig scmStorageConfig = new SCMStorageConfig(conf);
-    final String persistedClusterId = scmStorageConfig.getClusterID();
     final String fetchedId = scmInfo.getClusterId();
     Preconditions.checkNotNull(fetchedId);
-    StorageState state = scmStorageConfig.getState();
     if (state == StorageState.INITIALIZED) {
       Preconditions.checkNotNull(scmStorageConfig.getScmId());
       if (!fetchedId.equals(persistedClusterId)) {
diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestStorageContainerManagerHA.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestStorageContainerManagerHA.java
index 591be84..7bfc8ff 100644
--- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestStorageContainerManagerHA.java
+++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestStorageContainerManagerHA.java
@@ -22,6 +22,7 @@ import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
 import org.apache.hadoop.hdds.scm.ScmConfigKeys;
 import org.apache.hadoop.hdds.scm.container.ContainerID;
 import org.apache.hadoop.hdds.scm.ha.SCMRatisServerImpl;
+import org.apache.hadoop.hdds.scm.server.SCMStorageConfig;
 import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
 import org.apache.hadoop.ozone.MiniOzoneHAClusterImpl;
 import org.apache.hadoop.ozone.client.ObjectStore;
@@ -228,4 +229,22 @@ public class TestStorageContainerManagerHA {
     Assert.assertTrue(
         StorageContainerManager.scmInit(conf2, scm2.getClusterId()));
   }
+
+  @Test
+  public void testBootStrapSCM() throws Exception {
+    StorageContainerManager scm2 = cluster.getStorageContainerManagers().get(1);
+    OzoneConfiguration conf2 = scm2.getConfiguration();
+    boolean isDeleted = scm2.getScmStorageConfig().getVersionFile().delete();
+    Assert.assertTrue(isDeleted);
+    final SCMStorageConfig scmStorageConfig = new SCMStorageConfig(conf2);
+    scmStorageConfig.setClusterId(UUID.randomUUID().toString());
+    scmStorageConfig.getCurrentDir().delete();
+    scmStorageConfig.initialize();
+    conf2.setBoolean(ScmConfigKeys.OZONE_SCM_SKIP_BOOTSTRAP_VALIDATION_KEY,
+        false);
+    Assert.assertFalse(StorageContainerManager.scmBootstrap(conf2));
+    conf2.setBoolean(ScmConfigKeys.OZONE_SCM_SKIP_BOOTSTRAP_VALIDATION_KEY,
+        true);
+    Assert.assertTrue(StorageContainerManager.scmBootstrap(conf2));
+  }
 }

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@ozone.apache.org
For additional commands, e-mail: commits-help@ozone.apache.org