You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ozone.apache.org by sh...@apache.org on 2021/06/11 07:24:28 UTC

[ozone] branch master updated: HDDS-5263. SCM may stay in safe mode forever after a unclean shutdown of SCM. (#2294)

This is an automated email from the ASF dual-hosted git repository.

shashikant pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new ac7166b  HDDS-5263. SCM may stay in safe mode forever after a unclean shutdown of SCM. (#2294)
ac7166b is described below

commit ac7166b88234f89881fa91ecd459e4563b758f92
Author: Bharat Viswanadham <bh...@apache.org>
AuthorDate: Fri Jun 11 12:54:06 2021 +0530

    HDDS-5263. SCM may stay in safe mode forever after a unclean shutdown of SCM. (#2294)
---
 .../hdds/scm/block/SCMBlockDeletingService.java    |  2 +-
 .../hdds/scm/container/ReplicationManager.java     |  4 +-
 .../org/apache/hadoop/hdds/scm/ha/SCMContext.java  | 60 +++++++++++++++
 .../apache/hadoop/hdds/scm/ha/SCMStateMachine.java | 70 ++++++++++++++----
 .../scm/pipeline/BackgroundPipelineCreatorV2.java  |  2 +-
 .../hdds/scm/safemode/ContainerSafeModeRule.java   | 56 ++++++++++++--
 .../hdds/scm/safemode/DataNodeSafeModeRule.java    |  8 ++
 .../scm/safemode/HealthyPipelineSafeModeRule.java  | 85 +++++++++++++++-------
 .../safemode/OneReplicaPipelineSafeModeRule.java   | 70 ++++++++++++------
 .../hdds/scm/safemode/SCMSafeModeManager.java      | 41 ++++++++++-
 .../hadoop/hdds/scm/safemode/SafeModeExitRule.java |  9 +++
 .../hdds/scm/server/StorageContainerManager.java   |  9 ++-
 .../hadoop/hdds/scm/block/TestBlockManager.java    |  2 +-
 .../apache/hadoop/hdds/scm/ha/TestSCMContext.java  |  3 +
 .../hdds/scm/pipeline/TestPipelineManagerImpl.java |  5 +-
 .../hdds/scm/pipeline/TestSCMPipelineManager.java  |  6 +-
 .../safemode/TestHealthyPipelineSafeModeRule.java  |  6 +-
 .../TestOneReplicaPipelineSafeModeRule.java        |  2 +-
 .../hdds/scm/safemode/TestSCMSafeModeManager.java  | 33 ++++++---
 .../apache/hadoop/ozone/MiniOzoneClusterImpl.java  |  6 ++
 .../ozone/scm/TestSCMInstallSnapshotWithHA.java    |  1 +
 21 files changed, 377 insertions(+), 103 deletions(-)

diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/SCMBlockDeletingService.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/SCMBlockDeletingService.java
index edf6ca8..46ddc4a 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/SCMBlockDeletingService.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/SCMBlockDeletingService.java
@@ -224,7 +224,7 @@ public class SCMBlockDeletingService extends BackgroundService
   public void notifyStatusChanged() {
     serviceLock.lock();
     try {
-      if (scmContext.isLeader()) {
+      if (scmContext.isLeaderReady()) {
         serviceStatus = ServiceStatus.RUNNING;
       } else {
         serviceStatus = ServiceStatus.PAUSING;
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java
index 7bc345a..8accae6 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java
@@ -1283,9 +1283,9 @@ public class ReplicationManager implements MetricsSource, SCMService {
   public void notifyStatusChanged() {
     serviceLock.lock();
     try {
-      // 1) SCMContext#isLeader returns true.
+      // 1) SCMContext#isLeaderReady returns true.
       // 2) not in safe mode.
-      if (scmContext.isLeader() && !scmContext.isInSafeMode()) {
+      if (scmContext.isLeaderReady() && !scmContext.isInSafeMode()) {
         // transition from PAUSING to RUNNING
         if (serviceStatus != ServiceStatus.RUNNING) {
           LOG.info("Service {} transitions to RUNNING.", getServiceName());
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMContext.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMContext.java
index 9321e5a..b6e933b 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMContext.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMContext.java
@@ -61,6 +61,7 @@ public final class SCMContext {
    * Raft related info.
    */
   private boolean isLeader;
+  private boolean isLeaderReady;
   private long term;
 
   /**
@@ -77,6 +78,7 @@ public final class SCMContext {
     this.term = term;
     this.safeModeStatus = safeModeStatus;
     this.scm = scm;
+    this.isLeaderReady = false;
   }
 
   /**
@@ -90,6 +92,12 @@ public final class SCMContext {
           isLeader, term, leader, newTerm);
 
       isLeader = leader;
+      // If it is not leader, set isLeaderReady to false.
+      if (!isLeader) {
+        isLeaderReady = false;
+        LOG.info("update <isLeaderReady> from <{}> to <{}>", isLeaderReady,
+            false);
+      }
       term = newTerm;
     } finally {
       lock.writeLock().unlock();
@@ -97,8 +105,32 @@ public final class SCMContext {
   }
 
   /**
+   * Set isLeaderReady flag to true, this indicate leader is ready to accept
+   * transactions.
+   *
+   * On the leader SCM once all the previous leader term transaction are
+   * applied, this will be called to set the isLeaderReady to true.
+   *
+   */
+  public void setLeaderReady() {
+    lock.writeLock().lock();
+    try {
+      LOG.info("update <isLeaderReady> from <{}> to <{}>",
+          isLeaderReady, true);
+
+      isLeaderReady = true;
+    } finally {
+      lock.writeLock().unlock();
+    }
+  }
+
+  /**
    * Check whether current SCM is leader or not.
    *
+   * Use this API to know if SCM can send a command to DN once after it is
+   * elected as leader.
+   * True - it is leader, else false.
+   *
    * @return isLeader
    */
   public boolean isLeader() {
@@ -114,6 +146,34 @@ public final class SCMContext {
     }
   }
 
+
+  /**
+   * Check whether current SCM is leader ready.
+   *
+   * Use this API to know when all the previous leader term transactions are
+   * applied and the SCM DB/in-memory state is latest state and then only
+   * particular command/action need to be taken by SCM.
+   *
+   * In general all background services should use this API to start their
+   * service.
+   *
+   * True - it is leader and ready, else false.
+   *
+   * @return isLeaderReady
+   */
+  public boolean isLeaderReady() {
+    lock.readLock().lock();
+    try {
+      if (term == INVALID_TERM) {
+        return true;
+      }
+
+      return isLeaderReady;
+    } finally {
+      lock.readLock().unlock();
+    }
+  }
+
   /**
    * Get term of current leader SCM.
    *
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java
index 6fdbc5d..f4e13fb 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMStateMachine.java
@@ -26,6 +26,8 @@ import java.util.Optional;
 import java.util.concurrent.CompletableFuture;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicLong;
 
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
@@ -80,6 +82,9 @@ public class SCMStateMachine extends BaseStateMachine {
   // and reinitialize().
   private DBCheckpoint installingDBCheckpoint = null;
 
+  private AtomicLong currentLeaderTerm = new AtomicLong(-1L);
+  private AtomicBoolean refreshedAfterLeaderReady = new AtomicBoolean(false);
+
   public SCMStateMachine(final StorageContainerManager scm,
       SCMHADBTransactionBuffer buffer) {
     this.scm = scm;
@@ -132,6 +137,12 @@ public class SCMStateMachine extends BaseStateMachine {
       final SCMRatisRequest request = SCMRatisRequest.decode(
           Message.valueOf(trx.getStateMachineLogEntry().getLogData()));
       applyTransactionFuture.complete(process(request));
+
+      // After previous term transactions are applied, still in safe mode,
+      // perform refreshAndValidate to update the safemode rule state.
+      if (scm.isInSafeMode() && refreshedAfterLeaderReady.get()) {
+        scm.getScmSafeModeManager().refreshAndValidate();
+      }
       transactionBuffer.updateLatestTrxInfo(TransactionInfo.builder()
           .setCurrentTerm(trx.getLogEntry().getTerm())
           .setTransactionIndex(trx.getLogEntry().getIndex())
@@ -229,21 +240,19 @@ public class SCMStateMachine extends BaseStateMachine {
     if (!isInitialized) {
       return;
     }
+
+    currentLeaderTerm.set(scm.getScmHAManager().getRatisServer().getDivision()
+        .getInfo().getCurrentTerm());
+
     if (!groupMemberId.getPeerId().equals(newLeaderId)) {
       LOG.info("leader changed, yet current SCM is still follower.");
       return;
     }
 
-    long term = scm.getScmHAManager()
-        .getRatisServer()
-        .getDivision()
-        .getInfo()
-        .getCurrentTerm();
-
-    LOG.info("current SCM becomes leader of term {}.", term);
+    LOG.info("current SCM becomes leader of term {}.", currentLeaderTerm);
 
-    scm.getScmContext().updateLeaderAndTerm(true, term);
-    scm.getSCMServiceManager().notifyStatusChanged();
+    scm.getScmContext().updateLeaderAndTerm(true,
+        currentLeaderTerm.get());
     scm.getSequenceIdGen().invalidateBatch();
 
     DeletedBlockLog deletedBlockLog = scm.getScmBlockManager()
@@ -251,7 +260,6 @@ public class SCMStateMachine extends BaseStateMachine {
     Preconditions.checkArgument(
         deletedBlockLog instanceof DeletedBlockLogImplV2);
     ((DeletedBlockLogImplV2) deletedBlockLog).onBecomeLeader();
-
     scm.getScmDecommissionManager().onBecomeLeader();
   }
 
@@ -286,17 +294,49 @@ public class SCMStateMachine extends BaseStateMachine {
 
   @Override
   public void notifyTermIndexUpdated(long term, long index) {
-    if (transactionBuffer != null) {
-      transactionBuffer.updateLatestTrxInfo(
-          TransactionInfo.builder().setCurrentTerm(term)
-              .setTransactionIndex(index).build());
-    }
+
     // We need to call updateLastApplied here because now in ratis when a
     // node becomes leader, it is checking stateMachineIndex >=
     // placeHolderIndex (when a node becomes leader, it writes a conf entry
     // with some information like its peers and termIndex). So, calling
     // updateLastApplied updates lastAppliedTermIndex.
     updateLastAppliedTermIndex(term, index);
+
+    // Skip below part if state machine is not initialized.
+
+    if (!isInitialized) {
+      return;
+    }
+
+    if (transactionBuffer != null) {
+      transactionBuffer.updateLatestTrxInfo(
+          TransactionInfo.builder().setCurrentTerm(term)
+              .setTransactionIndex(index).build());
+    }
+
+    if (currentLeaderTerm.get() == term) {
+      // On leader SCM once after it is ready, notify SCM services and also set
+      // leader ready  in SCMContext.
+      if (scm.getScmHAManager().getRatisServer().getDivision().getInfo()
+          .isLeaderReady()) {
+        scm.getScmContext().setLeaderReady();
+        scm.getSCMServiceManager().notifyStatusChanged();
+      }
+
+      // Means all transactions before this term have been applied.
+      // This means after a restart, all pending transactions have been applied.
+      // Perform
+      // 1. Refresh Safemode rules state.
+      // 2. Start DN Rpc server.
+      if (!refreshedAfterLeaderReady.get()) {
+        scm.getScmSafeModeManager().refresh();
+        LOG.info("bharat starting from sm");
+        scm.getDatanodeProtocolServer().start();
+
+        refreshedAfterLeaderReady.set(true);
+      }
+      currentLeaderTerm.set(-1L);
+    }
   }
 
   @Override
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/BackgroundPipelineCreatorV2.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/BackgroundPipelineCreatorV2.java
index 2ab0b31..a88f422 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/BackgroundPipelineCreatorV2.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/BackgroundPipelineCreatorV2.java
@@ -254,7 +254,7 @@ public class BackgroundPipelineCreatorV2 implements SCMService {
     try {
       // 1) SCMContext#isLeader returns true.
       // 2) not in safe mode or createPipelineInSafeMode is true
-      if (scmContext.isLeader() &&
+      if (scmContext.isLeaderReady() &&
           (!scmContext.isInSafeMode() || createPipelineInSafeMode)) {
         // transition from PAUSING to RUNNING
         if (serviceStatus != ServiceStatus.RUNNING) {
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/ContainerSafeModeRule.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/ContainerSafeModeRule.java
index ffa0209..f2d7c02 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/ContainerSafeModeRule.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/ContainerSafeModeRule.java
@@ -27,6 +27,7 @@ import org.apache.hadoop.hdds.HddsConfigKeys;
 import org.apache.hadoop.hdds.conf.ConfigurationSource;
 import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
 import org.apache.hadoop.hdds.scm.container.ContainerInfo;
+import org.apache.hadoop.hdds.scm.container.ContainerManagerV2;
 import org.apache.hadoop.hdds.scm.events.SCMEvents;
 import org.apache.hadoop.hdds.scm.server.SCMDatanodeProtocolServer.NodeRegistrationContainerReport;
 import org.apache.hadoop.hdds.server.events.EventQueue;
@@ -34,6 +35,8 @@ import org.apache.hadoop.hdds.server.events.TypedEvent;
 
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * Class defining Safe mode exit criteria for Containers.
@@ -41,6 +44,8 @@ import com.google.common.base.Preconditions;
 public class ContainerSafeModeRule extends
     SafeModeExitRule<NodeRegistrationContainerReport>{
 
+  public static final Logger LOG =
+      LoggerFactory.getLogger(ContainerSafeModeRule.class);
   // Required cutoff % for containers with at least 1 reported replica.
   private double safeModeCutoff;
   // Containers read from scm db (excluding containers in ALLOCATED state).
@@ -48,11 +53,14 @@ public class ContainerSafeModeRule extends
   private double maxContainer;
 
   private AtomicLong containerWithMinReplicas = new AtomicLong(0);
+  private final ContainerManagerV2 containerManager;
 
   public ContainerSafeModeRule(String ruleName, EventQueue eventQueue,
       ConfigurationSource conf,
-      List<ContainerInfo> containers, SCMSafeModeManager manager) {
+      List<ContainerInfo> containers,
+      ContainerManagerV2 containerManager, SCMSafeModeManager manager) {
     super(manager, ruleName, eventQueue);
+    this.containerManager = containerManager;
     safeModeCutoff = conf.getDouble(
         HddsConfigKeys.HDDS_SCM_SAFEMODE_THRESHOLD_PCT,
         HddsConfigKeys.HDDS_SCM_SAFEMODE_THRESHOLD_PCT_DEFAULT);
@@ -77,6 +85,8 @@ public class ContainerSafeModeRule extends
     maxContainer = containerMap.size();
     long cutOff = (long) Math.ceil(maxContainer * safeModeCutoff);
     getSafeModeMetrics().setNumContainerWithOneReplicaReportedThreshold(cutOff);
+
+    LOG.info("containers with one replica threshold count {}", cutOff);
   }
 
 
@@ -87,12 +97,12 @@ public class ContainerSafeModeRule extends
 
 
   @Override
-  protected boolean validate() {
+  protected synchronized boolean validate() {
     return getCurrentContainerThreshold() >= safeModeCutoff;
   }
 
   @VisibleForTesting
-  public double getCurrentContainerThreshold() {
+  public synchronized double getCurrentContainerThreshold() {
     if (maxContainer == 0) {
       return 1;
     }
@@ -100,7 +110,8 @@ public class ContainerSafeModeRule extends
   }
 
   @Override
-  protected void process(NodeRegistrationContainerReport reportsProto) {
+  protected synchronized void process(
+      NodeRegistrationContainerReport reportsProto) {
 
     reportsProto.getReport().getReportsList().forEach(c -> {
       if (containerMap.containsKey(c.getContainerID())) {
@@ -121,7 +132,7 @@ public class ContainerSafeModeRule extends
   }
 
   @Override
-  protected void cleanup() {
+  protected synchronized void cleanup() {
     containerMap.clear();
   }
 
@@ -134,4 +145,39 @@ public class ContainerSafeModeRule extends
             getCurrentContainerThreshold(), this.safeModeCutoff);
   }
 
+
+  @Override
+  public synchronized void refresh(boolean forceRefresh) {
+    if (forceRefresh) {
+      reInitializeRule();
+    } else {
+      if (!validate()) {
+        reInitializeRule();
+      }
+    }
+  }
+
+  private void reInitializeRule() {
+    containerMap.clear();
+    containerManager.getContainers().forEach(container -> {
+      // There can be containers in OPEN/CLOSING state which were never
+      // created by the client. We are not considering these containers for
+      // now. These containers can be handled by tracking pipelines.
+
+      Optional.ofNullable(container.getState())
+          .filter(state -> (state == HddsProtos.LifeCycleState.QUASI_CLOSED ||
+              state == HddsProtos.LifeCycleState.CLOSED))
+          .ifPresent(s -> containerMap.put(container.getContainerID(),
+              container));
+    });
+
+    maxContainer = containerMap.size();
+    long cutOff = (long) Math.ceil(maxContainer * safeModeCutoff);
+
+    LOG.info("Refreshed one replica container threshold {}, " +
+        "currentThreshold {}", cutOff, containerWithMinReplicas.get());
+    getSafeModeMetrics()
+        .setNumContainerWithOneReplicaReportedThreshold(cutOff);
+  }
+
 }
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/DataNodeSafeModeRule.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/DataNodeSafeModeRule.java
index ea5a78f..0c4ce84 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/DataNodeSafeModeRule.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/DataNodeSafeModeRule.java
@@ -86,4 +86,12 @@ public class DataNodeSafeModeRule extends
         .format("registered datanodes (=%d) >= required datanodes (=%d)",
             this.registeredDns, this.requiredDns);
   }
+
+
+  @Override
+  public void refresh(boolean forceRefresh) {
+    // Do nothing.
+    // As for this rule, there is nothing we read from SCM DB state and
+    // validate it.
+  }
 }
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/HealthyPipelineSafeModeRule.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/HealthyPipelineSafeModeRule.java
index 14bc58b..3ecf550 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/HealthyPipelineSafeModeRule.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/HealthyPipelineSafeModeRule.java
@@ -51,24 +51,22 @@ public class HealthyPipelineSafeModeRule extends SafeModeExitRule<Pipeline> {
   private int currentHealthyPipelineCount = 0;
   private final double healthyPipelinesPercent;
   private final Set<PipelineID> processedPipelineIDs = new HashSet<>();
+  private final PipelineManager pipelineManager;
+  private final int minHealthyPipelines;
 
   HealthyPipelineSafeModeRule(String ruleName, EventQueue eventQueue,
       PipelineManager pipelineManager,
       SCMSafeModeManager manager, ConfigurationSource configuration) {
     super(manager, ruleName, eventQueue);
+    this.pipelineManager = pipelineManager;
     healthyPipelinesPercent =
         configuration.getDouble(HddsConfigKeys.
                 HDDS_SCM_SAFEMODE_HEALTHY_PIPELINE_THRESHOLD_PCT,
             HddsConfigKeys.
                 HDDS_SCM_SAFEMODE_HEALTHY_PIPELINE_THRESHOLD_PCT_DEFAULT);
 
-    int minDatanodes = configuration.getInt(
-        HddsConfigKeys.HDDS_SCM_SAFEMODE_MIN_DATANODE,
-        HddsConfigKeys.HDDS_SCM_SAFEMODE_MIN_DATANODE_DEFAULT);
-
     // We only care about THREE replica pipeline
-    int minHealthyPipelines = minDatanodes /
-        HddsProtos.ReplicationFactor.THREE_VALUE;
+    minHealthyPipelines = getMinHealthyPipelines(configuration);
 
     Preconditions.checkArgument(
         (healthyPipelinesPercent >= 0.0 && healthyPipelinesPercent <= 1.0),
@@ -76,26 +74,22 @@ public class HealthyPipelineSafeModeRule extends SafeModeExitRule<Pipeline> {
             HDDS_SCM_SAFEMODE_HEALTHY_PIPELINE_THRESHOLD_PCT
             + " value should be >= 0.0 and <= 1.0");
 
-    // We want to wait for RATIS THREE factor write pipelines
-    int pipelineCount = pipelineManager.getPipelines(
-        new RatisReplicationConfig(HddsProtos.ReplicationFactor.THREE),
-        Pipeline.PipelineState.OPEN).size();
+    initializeRule(false);
+  }
 
-    // This value will be zero when pipeline count is 0.
-    // On a fresh installed cluster, there will be zero pipelines in the SCM
-    // pipeline DB.
-    healthyPipelineThresholdCount = Math.max(minHealthyPipelines,
-        (int) Math.ceil(healthyPipelinesPercent * pipelineCount));
+  private int getMinHealthyPipelines(ConfigurationSource config) {
+    int minDatanodes = config.getInt(
+        HddsConfigKeys.HDDS_SCM_SAFEMODE_MIN_DATANODE,
+        HddsConfigKeys.HDDS_SCM_SAFEMODE_MIN_DATANODE_DEFAULT);
 
-    LOG.info("Total pipeline count is {}, healthy pipeline " +
-        "threshold count is {}", pipelineCount, healthyPipelineThresholdCount);
+    // We only care about THREE replica pipeline
+    return minDatanodes / HddsProtos.ReplicationFactor.THREE_VALUE;
 
-    getSafeModeMetrics().setNumHealthyPipelinesThreshold(
-        healthyPipelineThresholdCount);
   }
 
   @VisibleForTesting
-  public void setHealthyPipelineThresholdCount(int actualPipelineCount) {
+  public synchronized void setHealthyPipelineThresholdCount(
+      int actualPipelineCount) {
     healthyPipelineThresholdCount =
         (int) Math.ceil(healthyPipelinesPercent * actualPipelineCount);
   }
@@ -106,12 +100,12 @@ public class HealthyPipelineSafeModeRule extends SafeModeExitRule<Pipeline> {
   }
 
   @Override
-  protected boolean validate() {
+  protected synchronized boolean validate() {
     return currentHealthyPipelineCount >= healthyPipelineThresholdCount;
   }
 
   @Override
-  protected void process(Pipeline pipeline) {
+  protected synchronized void process(Pipeline pipeline) {
 
     // When SCM is in safe mode for long time, already registered
     // datanode can send pipeline report again, or SCMPipelineManager will
@@ -130,22 +124,57 @@ public class HealthyPipelineSafeModeRule extends SafeModeExitRule<Pipeline> {
       SCMSafeModeManager.getLogger().info(
           "SCM in safe mode. Healthy pipelines reported count is {}, " +
               "required healthy pipeline reported count is {}",
-          currentHealthyPipelineCount, healthyPipelineThresholdCount);
+          currentHealthyPipelineCount, getHealthyPipelineThresholdCount());
+
     }
   }
 
+
+  public synchronized void refresh(boolean forceRefresh) {
+    if (forceRefresh) {
+      initializeRule(true);
+    } else {
+      if (!validate()) {
+        initializeRule(true);
+      }
+    }
+  }
+
+  private synchronized void initializeRule(boolean refresh) {
+    int pipelineCount = pipelineManager.getPipelines(
+        new RatisReplicationConfig(HddsProtos.ReplicationFactor.THREE),
+        Pipeline.PipelineState.OPEN).size();
+
+    healthyPipelineThresholdCount = Math.max(minHealthyPipelines,
+        (int) Math.ceil(healthyPipelinesPercent * pipelineCount));
+
+    if (refresh) {
+      LOG.info("Refreshed total pipeline count is {}, healthy pipeline " +
+          "threshold count is {}", pipelineCount,
+          healthyPipelineThresholdCount);
+    } else {
+      LOG.info("Total pipeline count is {}, healthy pipeline " +
+          "threshold count is {}", pipelineCount,
+          healthyPipelineThresholdCount);
+    }
+
+    getSafeModeMetrics().setNumHealthyPipelinesThreshold(
+        healthyPipelineThresholdCount);
+  }
+
+
   @Override
-  protected void cleanup() {
+  protected synchronized void cleanup() {
     processedPipelineIDs.clear();
   }
 
   @VisibleForTesting
-  public int getCurrentHealthyPipelineCount() {
+  public synchronized int getCurrentHealthyPipelineCount() {
     return currentHealthyPipelineCount;
   }
 
   @VisibleForTesting
-  public int getHealthyPipelineThresholdCount() {
+  public synchronized int getHealthyPipelineThresholdCount() {
     return healthyPipelineThresholdCount;
   }
 
@@ -153,7 +182,7 @@ public class HealthyPipelineSafeModeRule extends SafeModeExitRule<Pipeline> {
   public String getStatusText() {
     return String.format("healthy Ratis/THREE pipelines (=%d) >= "
             + "healthyPipelineThresholdCount (=%d)",
-        this.currentHealthyPipelineCount,
-        this.healthyPipelineThresholdCount);
+        getCurrentHealthyPipelineCount(),
+        getHealthyPipelineThresholdCount());
   }
 }
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/OneReplicaPipelineSafeModeRule.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/OneReplicaPipelineSafeModeRule.java
index 3f5bcd6..0303e46 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/OneReplicaPipelineSafeModeRule.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/OneReplicaPipelineSafeModeRule.java
@@ -56,6 +56,7 @@ public class OneReplicaPipelineSafeModeRule extends
   private Set<PipelineID> oldPipelineIDSet;
   private int currentReportedPipelineCount = 0;
   private PipelineManager pipelineManager;
+  private final double pipelinePercent;
 
 
   public OneReplicaPipelineSafeModeRule(String ruleName, EventQueue eventQueue,
@@ -63,32 +64,21 @@ public class OneReplicaPipelineSafeModeRule extends
       SCMSafeModeManager safeModeManager, ConfigurationSource configuration) {
     super(safeModeManager, ruleName, eventQueue);
 
-    double percent =
+    pipelinePercent =
         configuration.getDouble(
             HddsConfigKeys.HDDS_SCM_SAFEMODE_ONE_NODE_REPORTED_PIPELINE_PCT,
             HddsConfigKeys.
                 HDDS_SCM_SAFEMODE_ONE_NODE_REPORTED_PIPELINE_PCT_DEFAULT);
 
-    Preconditions.checkArgument((percent >= 0.0 && percent <= 1.0),
+    Preconditions.checkArgument((pipelinePercent >= 0.0
+            && pipelinePercent <= 1.0),
         HddsConfigKeys.
             HDDS_SCM_SAFEMODE_ONE_NODE_REPORTED_PIPELINE_PCT  +
             " value should be >= 0.0 and <= 1.0");
 
     this.pipelineManager = pipelineManager;
-    oldPipelineIDSet = pipelineManager.getPipelines(
-        new RatisReplicationConfig(ReplicationFactor.THREE),
-        Pipeline.PipelineState.OPEN)
-        .stream().map(p -> p.getId()).collect(Collectors.toSet());
-    int totalPipelineCount = oldPipelineIDSet.size();
-
-    thresholdCount = (int) Math.ceil(percent * totalPipelineCount);
-
-    LOG.info("Total pipeline count is {}, pipeline's with at least one " +
-        "datanode reported threshold count is {}", totalPipelineCount,
-        thresholdCount);
+    initializeRule(false);
 
-    getSafeModeMetrics().setNumPipelinesWithAtleastOneReplicaReportedThreshold(
-        thresholdCount);
   }
 
   @Override
@@ -97,12 +87,12 @@ public class OneReplicaPipelineSafeModeRule extends
   }
 
   @Override
-  protected boolean validate() {
+  protected synchronized boolean validate() {
     return currentReportedPipelineCount >= thresholdCount;
   }
 
   @Override
-  protected void process(PipelineReportFromDatanode report) {
+  protected synchronized void process(PipelineReportFromDatanode report) {
     Preconditions.checkNotNull(report);
     for (PipelineReport report1 : report.getReport().getPipelineReportList()) {
       Pipeline pipeline;
@@ -136,17 +126,17 @@ public class OneReplicaPipelineSafeModeRule extends
   }
 
   @Override
-  protected void cleanup() {
+  protected synchronized void cleanup() {
     reportedPipelineIDSet.clear();
   }
 
   @VisibleForTesting
-  public int getThresholdCount() {
+  public synchronized int getThresholdCount() {
     return thresholdCount;
   }
 
   @VisibleForTesting
-  public int getCurrentReportedPipelineCount() {
+  public synchronized int getCurrentReportedPipelineCount() {
     return currentReportedPipelineCount;
   }
 
@@ -156,7 +146,43 @@ public class OneReplicaPipelineSafeModeRule extends
         .format(
             "reported Ratis/THREE pipelines with at least one datanode (=%d) "
                 + ">= threshold (=%d)",
-            this.currentReportedPipelineCount,
-            this.thresholdCount);
+            getCurrentReportedPipelineCount(),
+            getThresholdCount());
+  }
+
+  @Override
+  public synchronized void refresh(boolean forceRefresh) {
+    if (forceRefresh) {
+      initializeRule(true);
+    } else {
+      if (!validate()) {
+        initializeRule(true);
+      }
+    }
+  }
+
+  private void initializeRule(boolean refresh) {
+
+    oldPipelineIDSet = pipelineManager.getPipelines(
+        new RatisReplicationConfig(ReplicationFactor.THREE),
+        Pipeline.PipelineState.OPEN)
+        .stream().map(p -> p.getId()).collect(Collectors.toSet());
+
+    int totalPipelineCount = oldPipelineIDSet.size();
+
+    thresholdCount = (int) Math.ceil(pipelinePercent * totalPipelineCount);
+
+    if (refresh) {
+      LOG.info("Refreshed Total pipeline count is {}, pipeline's with at " +
+              "least one datanode reported threshold count is {}",
+          totalPipelineCount, thresholdCount);
+    } else {
+      LOG.info("Total pipeline count is {}, pipeline's with at " +
+              "least one datanode reported threshold count is {}",
+          totalPipelineCount, thresholdCount);
+    }
+
+    getSafeModeMetrics().setNumPipelinesWithAtleastOneReplicaReportedThreshold(
+        thresholdCount);
   }
 }
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SCMSafeModeManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SCMSafeModeManager.java
index 206e40b..60dba33 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SCMSafeModeManager.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SCMSafeModeManager.java
@@ -28,6 +28,7 @@ import org.apache.commons.lang3.tuple.Pair;
 import org.apache.hadoop.hdds.HddsConfigKeys;
 import org.apache.hadoop.hdds.conf.ConfigurationSource;
 import org.apache.hadoop.hdds.scm.container.ContainerInfo;
+import org.apache.hadoop.hdds.scm.container.ContainerManagerV2;
 import org.apache.hadoop.hdds.scm.events.SCMEvents;
 import org.apache.hadoop.hdds.scm.ha.SCMContext;
 import org.apache.hadoop.hdds.scm.ha.SCMService.Event;
@@ -109,7 +110,8 @@ public class SCMSafeModeManager implements SafeModeManager {
   private final SafeModeMetrics safeModeMetrics;
 
   public SCMSafeModeManager(ConfigurationSource conf,
-      List<ContainerInfo> allContainers, PipelineManager pipelineManager,
+      List<ContainerInfo> allContainers,
+      ContainerManagerV2 containerManager, PipelineManager pipelineManager,
       EventQueue eventQueue, SCMServiceManager serviceManager,
       SCMContext scmContext) {
     this.config = conf;
@@ -125,7 +127,7 @@ public class SCMSafeModeManager implements SafeModeManager {
       this.safeModeMetrics = SafeModeMetrics.create();
       ContainerSafeModeRule containerSafeModeRule =
           new ContainerSafeModeRule(CONT_EXIT_RULE, eventQueue, config,
-              allContainers, this);
+              allContainers,  containerManager, this);
       DataNodeSafeModeRule dataNodeSafeModeRule =
           new DataNodeSafeModeRule(DN_EXIT_RULE, eventQueue, config, this);
       exitRules.put(CONT_EXIT_RULE, containerSafeModeRule);
@@ -194,11 +196,13 @@ public class SCMSafeModeManager implements SafeModeManager {
       EventPublisher eventQueue) {
 
     if (exitRules.get(ruleName) != null) {
-      validatedRules.add(ruleName);
+      boolean added = validatedRules.add(ruleName);
       if (preCheckRules.contains(ruleName)) {
         validatedPreCheckRules.add(ruleName);
       }
-      LOG.info("{} rule is successfully validated", ruleName);
+      if (added) {
+        LOG.info("{} rule is successfully validated", ruleName);
+      }
     } else {
       // This should never happen
       LOG.error("No Such Exit rule {}", ruleName);
@@ -253,6 +257,35 @@ public class SCMSafeModeManager implements SafeModeManager {
     emitSafeModeStatus();
   }
 
+  /**
+   * Refresh Rule state.
+   */
+  public void refresh() {
+    if (inSafeMode.get()) {
+      exitRules.values().forEach(rule -> {
+        // Refresh rule irrespective of validate(), as at this point validate
+        // does not represent current state validation, as validate is being
+        // done with stale state.
+        rule.refresh(true);
+      });
+    }
+  }
+
+  /**
+   * Refresh Rule state and validate rules.
+   */
+  public void refreshAndValidate() {
+    if (inSafeMode.get()) {
+      exitRules.values().forEach(rule -> {
+        rule.refresh(false);
+        if (rule.validate() && inSafeMode.get()) {
+          validateSafeModeExitRules(rule.getRuleName(), eventPublisher);
+          rule.cleanup();
+        }
+      });
+    }
+  }
+
   @Override
   public boolean getInSafeMode() {
     if (!isSafeModeEnabled) {
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeExitRule.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeExitRule.java
index 5c553b8..0d68cf6 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeExitRule.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeExitRule.java
@@ -116,4 +116,13 @@ public abstract class SafeModeExitRule<T> implements EventHandler<T> {
    * @return status text.
    */
   abstract String getStatusText();
+
+  /**
+   * Refresh the rule state from current state of SCM.
+   *
+   * @param forceRefresh - refresh rule irrespective of validate() is
+   * true/false.
+   *
+   */
+  protected abstract void refresh(boolean forceRefresh);
 }
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
index 388dd8e..097de16 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
@@ -579,7 +579,7 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
       scmSafeModeManager = configurator.getScmSafeModeManager();
     } else {
       scmSafeModeManager = new SCMSafeModeManager(conf,
-          containerManager.getContainers(),
+          containerManager.getContainers(), containerManager,
           pipelineManager, eventQueue, serviceManager, scmContext);
     }
     scmDecommissionManager = new NodeDecommissionManager(conf, scmNodeManager,
@@ -1229,7 +1229,11 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
       LOG.info(buildRpcServerStartMessage("ScmDatanodeProtocl RPC " +
           "server", getDatanodeProtocolServer().getDatanodeRpcAddress()));
     }
-    getDatanodeProtocolServer().start();
+
+    // If HA is enabled, start datanode protocol server once leader is ready.
+    if (!scmStorageConfig.isSCMHAEnabled()) {
+      getDatanodeProtocolServer().start();
+    }
     if (getSecurityProtocolServer() != null) {
       getSecurityProtocolServer().start();
       persistSCMCertificates();
@@ -1516,7 +1520,6 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
   public boolean checkLeader() {
     // For NON-HA setup, the node will always be the leader
     if (!SCMHAUtils.isSCMHAEnabled(configuration)) {
-      Preconditions.checkArgument(scmContext.isLeader());
       return true;
     } else {
       // FOR HA setup, the node has to be the leader and ready to serve
diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestBlockManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestBlockManager.java
index 16c06cb..c033831 100644
--- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestBlockManager.java
+++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestBlockManager.java
@@ -157,7 +157,7 @@ public class TestBlockManager {
             pipelineManager,
             scmMetadataStore.getContainerTable());
     SCMSafeModeManager safeModeManager = new SCMSafeModeManager(conf,
-        containerManager.getContainers(),
+        containerManager.getContainers(), containerManager,
         pipelineManager, eventQueue, serviceManager, scmContext) {
       @Override
       public void emitSafeModeStatus() {
diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMContext.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMContext.java
index 9fe2da1..f60c2eb 100644
--- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMContext.java
+++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMContext.java
@@ -41,7 +41,9 @@ public class TestSCMContext {
 
     // become leader
     scmContext.updateLeaderAndTerm(true, 10);
+    scmContext.setLeaderReady();
     assertTrue(scmContext.isLeader());
+    assertTrue(scmContext.isLeaderReady());
     try {
       assertEquals(scmContext.getTermOfLeader(), 10);
     } catch (NotLeaderException e) {
@@ -51,6 +53,7 @@ public class TestSCMContext {
     // step down
     scmContext.updateLeaderAndTerm(false, 0);
     assertFalse(scmContext.isLeader());
+    assertFalse(scmContext.isLeaderReady());
   }
 
   @Test
diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineManagerImpl.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineManagerImpl.java
index a32f222..a0bb44a 100644
--- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineManagerImpl.java
+++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineManagerImpl.java
@@ -348,7 +348,7 @@ public class TestPipelineManagerImpl {
   public void testPipelineReport() throws Exception {
     PipelineManagerV2Impl pipelineManager = createPipelineManager(true);
     SCMSafeModeManager scmSafeModeManager =
-        new SCMSafeModeManager(conf, new ArrayList<>(), pipelineManager,
+        new SCMSafeModeManager(conf, new ArrayList<>(), null, pipelineManager,
             new EventQueue(), serviceManager, scmContext);
     Pipeline pipeline = pipelineManager
         .createPipeline(new RatisReplicationConfig(ReplicationFactor.THREE));
@@ -465,7 +465,8 @@ public class TestPipelineManagerImpl {
 
     SCMSafeModeManager scmSafeModeManager =
         new SCMSafeModeManager(new OzoneConfiguration(), new ArrayList<>(),
-            pipelineManager, new EventQueue(), serviceManager, scmContext);
+            null, pipelineManager, new EventQueue(),
+            serviceManager, scmContext);
     PipelineReportHandler pipelineReportHandler =
         new PipelineReportHandler(scmSafeModeManager, pipelineManager,
             SCMContext.emptyContext(), conf);
diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMPipelineManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMPipelineManager.java
index f67779f..daf171d 100644
--- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMPipelineManager.java
+++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMPipelineManager.java
@@ -222,8 +222,8 @@ public class TestSCMPipelineManager {
         mockRatisProvider);
 
     SCMSafeModeManager scmSafeModeManager =
-        new SCMSafeModeManager(conf, new ArrayList<>(), pipelineManager,
-            eventQueue, new SCMServiceManager(),
+        new SCMSafeModeManager(conf, new ArrayList<>(), null,
+            pipelineManager, eventQueue, new SCMServiceManager(),
             SCMContext.emptyContext());
 
     // create a pipeline in allocated state with no dns yet reported
@@ -494,7 +494,7 @@ public class TestSCMPipelineManager {
 
     SCMSafeModeManager scmSafeModeManager =
         new SCMSafeModeManager(new OzoneConfiguration(), new ArrayList<>(),
-            pipelineManager, eventQueue,
+            null, pipelineManager, eventQueue,
             new SCMServiceManager(),
             SCMContext.emptyContext());
     PipelineReportHandler pipelineReportHandler =
diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestHealthyPipelineSafeModeRule.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestHealthyPipelineSafeModeRule.java
index 317b879..14b839d 100644
--- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestHealthyPipelineSafeModeRule.java
+++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestHealthyPipelineSafeModeRule.java
@@ -92,7 +92,7 @@ public class TestHealthyPipelineSafeModeRule {
       pipelineManager.setPipelineProvider(HddsProtos.ReplicationType.RATIS,
           mockRatisProvider);
       SCMSafeModeManager scmSafeModeManager = new SCMSafeModeManager(
-          config, containers, pipelineManager, eventQueue,
+          config, containers, null, pipelineManager, eventQueue,
           serviceManager, scmContext);
 
       HealthyPipelineSafeModeRule healthyPipelineSafeModeRule =
@@ -172,7 +172,7 @@ public class TestHealthyPipelineSafeModeRule {
       MockRatisPipelineProvider.markPipelineHealthy(pipeline3);
 
       SCMSafeModeManager scmSafeModeManager = new SCMSafeModeManager(
-          config, containers, pipelineManager, eventQueue,
+          config, containers, null, pipelineManager, eventQueue,
           serviceManager, scmContext);
 
       HealthyPipelineSafeModeRule healthyPipelineSafeModeRule =
@@ -270,7 +270,7 @@ public class TestHealthyPipelineSafeModeRule {
       MockRatisPipelineProvider.markPipelineHealthy(pipeline3);
 
       SCMSafeModeManager scmSafeModeManager = new SCMSafeModeManager(
-          config, containers, pipelineManager, eventQueue,
+          config, containers, null, pipelineManager, eventQueue,
           serviceManager, scmContext);
 
       HealthyPipelineSafeModeRule healthyPipelineSafeModeRule =
diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestOneReplicaPipelineSafeModeRule.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestOneReplicaPipelineSafeModeRule.java
index 11042fa..e91a505 100644
--- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestOneReplicaPipelineSafeModeRule.java
+++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestOneReplicaPipelineSafeModeRule.java
@@ -111,7 +111,7 @@ public class TestOneReplicaPipelineSafeModeRule {
         HddsProtos.ReplicationFactor.ONE);
 
     SCMSafeModeManager scmSafeModeManager =
-        new SCMSafeModeManager(ozoneConfiguration, containers,
+        new SCMSafeModeManager(ozoneConfiguration, containers, null,
             pipelineManager, eventQueue, serviceManager, scmContext);
 
     rule = scmSafeModeManager.getOneReplicaPipelineSafeModeRule();
diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeManager.java
index 0f71ea0..8ac3fcd 100644
--- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeManager.java
+++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeManager.java
@@ -123,7 +123,7 @@ public class TestSCMSafeModeManager {
   @Test
   public void testSafeModeStateWithNullContainers() {
     new SCMSafeModeManager(config, Collections.emptyList(),
-        null, queue, serviceManager, scmContext);
+        null, null,  queue, serviceManager, scmContext);
   }
 
   private void testSafeMode(int numContainers) throws Exception {
@@ -135,7 +135,8 @@ public class TestSCMSafeModeManager {
       container.setState(HddsProtos.LifeCycleState.CLOSED);
     }
     scmSafeModeManager = new SCMSafeModeManager(
-        config, containers, null, queue, serviceManager, scmContext);
+        config, containers, null, null, queue,
+        serviceManager, scmContext);
 
     assertTrue(scmSafeModeManager.getInSafeMode());
     queue.fireEvent(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
@@ -168,7 +169,8 @@ public class TestSCMSafeModeManager {
       container.setState(HddsProtos.LifeCycleState.CLOSED);
     }
     scmSafeModeManager = new SCMSafeModeManager(
-        config, containers, null, queue, serviceManager, scmContext);
+        config, containers, null, null, queue,
+        serviceManager, scmContext);
 
     long cutOff = (long) Math.ceil(numContainers * config.getDouble(
         HddsConfigKeys.HDDS_SCM_SAFEMODE_THRESHOLD_PCT,
@@ -264,7 +266,8 @@ public class TestSCMSafeModeManager {
               scmContext,
               serviceManager);
       scmSafeModeManager = new SCMSafeModeManager(
-          conf, containers, pipelineManager, queue, serviceManager, scmContext);
+          conf, containers, null, pipelineManager, queue, serviceManager,
+          scmContext);
       fail("testFailWithIncorrectValueForHealthyPipelinePercent");
     } catch (IllegalArgumentException ex) {
       GenericTestUtils.assertExceptionContains("value should be >= 0.0 and <=" +
@@ -289,7 +292,8 @@ public class TestSCMSafeModeManager {
               scmContext,
               serviceManager);
       scmSafeModeManager = new SCMSafeModeManager(
-          conf, containers, pipelineManager, queue, serviceManager, scmContext);
+          conf, containers, null, pipelineManager, queue, serviceManager,
+          scmContext);
       fail("testFailWithIncorrectValueForOneReplicaPipelinePercent");
     } catch (IllegalArgumentException ex) {
       GenericTestUtils.assertExceptionContains("value should be >= 0.0 and <=" +
@@ -313,7 +317,8 @@ public class TestSCMSafeModeManager {
               scmContext,
               serviceManager);
       scmSafeModeManager = new SCMSafeModeManager(
-          conf, containers, pipelineManager, queue, serviceManager, scmContext);
+          conf, containers, null, pipelineManager, queue, serviceManager,
+          scmContext);
       fail("testFailWithIncorrectValueForSafeModePercent");
     } catch (IllegalArgumentException ex) {
       GenericTestUtils.assertExceptionContains("value should be >= 0.0 and <=" +
@@ -367,7 +372,8 @@ public class TestSCMSafeModeManager {
     }
 
     scmSafeModeManager = new SCMSafeModeManager(
-        conf, containers, pipelineManager, queue, serviceManager, scmContext);
+        conf, containers, null, pipelineManager, queue, serviceManager,
+        scmContext);
 
     assertTrue(scmSafeModeManager.getInSafeMode());
     testContainerThreshold(containers, 1.0);
@@ -478,7 +484,8 @@ public class TestSCMSafeModeManager {
     conf.setBoolean(HddsConfigKeys.HDDS_SCM_SAFEMODE_ENABLED, false);
     PipelineManager pipelineManager = Mockito.mock(PipelineManager.class);
     scmSafeModeManager = new SCMSafeModeManager(
-        conf, containers, pipelineManager, queue, serviceManager, scmContext);
+        conf, containers, null, pipelineManager, queue, serviceManager,
+        scmContext);
     assertFalse(scmSafeModeManager.getInSafeMode());
   }
 
@@ -510,7 +517,7 @@ public class TestSCMSafeModeManager {
     }
 
     scmSafeModeManager = new SCMSafeModeManager(
-        config, containers, null, queue, serviceManager, scmContext);
+        config, containers, null, null, queue, serviceManager, scmContext);
 
     assertTrue(scmSafeModeManager.getInSafeMode());
 
@@ -534,7 +541,8 @@ public class TestSCMSafeModeManager {
     OzoneConfiguration conf = new OzoneConfiguration(config);
     conf.setInt(HddsConfigKeys.HDDS_SCM_SAFEMODE_MIN_DATANODE, numOfDns);
     scmSafeModeManager = new SCMSafeModeManager(
-        conf, containers, null, queue, serviceManager, scmContext);
+        conf, containers, null, null, queue,
+        serviceManager, scmContext);
 
     // Assert SCM is in Safe mode.
     assertTrue(scmSafeModeManager.getInSafeMode());
@@ -609,7 +617,7 @@ public class TestSCMSafeModeManager {
       MockRatisPipelineProvider.markPipelineHealthy(pipeline);
 
       scmSafeModeManager = new SCMSafeModeManager(
-          config, containers, pipelineManager, queue, serviceManager,
+          config, containers, null, pipelineManager, queue, serviceManager,
           scmContext);
 
       queue.fireEvent(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
@@ -669,7 +677,8 @@ public class TestSCMSafeModeManager {
     SafeModeEventHandler smHandler = new SafeModeEventHandler();
     queue.addHandler(SCMEvents.SAFE_MODE_STATUS, smHandler);
     scmSafeModeManager = new SCMSafeModeManager(
-        config, containers, pipelineManager, queue, serviceManager, scmContext);
+        config, containers, null, pipelineManager, queue, serviceManager,
+        scmContext);
 
     // Assert SCM is in Safe mode.
     assertTrue(scmSafeModeManager.getInSafeMode());
diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java
index b09859f..e9d6d6a 100644
--- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java
+++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java
@@ -682,6 +682,10 @@ public class MiniOzoneClusterImpl implements MiniOzoneCluster {
       // default max retry timeout set to 30s
       scmClientConfig.setMaxRetryTimeout(30 * 1000);
       conf.setFromObject(scmClientConfig);
+      // In this way safemode exit will happen only when atleast we have one
+      // pipeline.
+      conf.setInt(HddsConfigKeys.HDDS_SCM_SAFEMODE_MIN_DATANODE,
+          numOfDatanodes >=3 ? 3 : 1);
       configureTrace();
     }
 
@@ -723,6 +727,8 @@ public class MiniOzoneClusterImpl implements MiniOzoneCluster {
       scmStore.setScmId(scmId.get());
       scmStore.initialize();
       if (SCMHAUtils.isSCMHAEnabled(conf)) {
+        scmStore.setSCMHAFlag(true);
+        scmStore.persistCurrentState();
         SCMRatisServerImpl.initialize(clusterId, scmId.get(),
             SCMHANodeDetails.loadSCMHAConfig(conf).getLocalNodeDetails(), conf);
       }
diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestSCMInstallSnapshotWithHA.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestSCMInstallSnapshotWithHA.java
index 18f9391..4223ae9 100644
--- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestSCMInstallSnapshotWithHA.java
+++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestSCMInstallSnapshotWithHA.java
@@ -93,6 +93,7 @@ public class TestSCMInstallSnapshotWithHA {
     scmhaConfiguration.setRatisSnapshotThreshold(SNAPSHOT_THRESHOLD);
     conf.setFromObject(scmhaConfiguration);
 
+
     cluster = (MiniOzoneHAClusterImpl) MiniOzoneCluster.newHABuilder(conf)
         .setClusterId(clusterId)
         .setScmId(scmId)

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@ozone.apache.org
For additional commands, e-mail: commits-help@ozone.apache.org