You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ozone.apache.org by av...@apache.org on 2021/04/19 22:11:57 UTC
[ozone] branch HDDS-3698-nonrolling-upgrade updated: HDDS-4992. SCM
should not use pipelines with HEALTHY_READONLY datanodes (#2142)
This is an automated email from the ASF dual-hosted git repository.
avijayan pushed a commit to branch HDDS-3698-nonrolling-upgrade
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/HDDS-3698-nonrolling-upgrade by this push:
new d2da5a4 HDDS-4992. SCM should not use pipelines with HEALTHY_READONLY datanodes (#2142)
d2da5a4 is described below
commit d2da5a48e6e890e78157ce1bb38716dcd180acdb
Author: Ethan Rose <33...@users.noreply.github.com>
AuthorDate: Mon Apr 19 18:11:42 2021 -0400
HDDS-4992. SCM should not use pipelines with HEALTHY_READONLY datanodes (#2142)
---
.../apache/hadoop/hdds/scm/events/SCMEvents.java | 13 +-
...andler.java => HealthyReadOnlyNodeHandler.java} | 27 +--
.../hadoop/hdds/scm/node/NodeStateManager.java | 17 +-
.../hadoop/hdds/scm/node/SCMNodeManager.java | 2 +-
.../hdds/scm/server/StorageContainerManager.java | 14 +-
.../hadoop/hdds/scm/node/TestNodeStateManager.java | 6 +-
.../hadoop/hdds/scm/node/TestSCMNodeManager.java | 194 ++++++++++++++++++---
.../hadoop/ozone/recon/scm/ReconNodeManager.java | 7 +-
8 files changed, 219 insertions(+), 61 deletions(-)
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/events/SCMEvents.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/events/SCMEvents.java
index 90cca46..7e914ec 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/events/SCMEvents.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/events/SCMEvents.java
@@ -172,18 +172,17 @@ public final class SCMEvents {
* state to healthy state.
*/
public static final TypedEvent<DatanodeDetails>
- READ_ONLY_HEALTHY_TO_HEALTHY_NODE =
+ HEALTHY_READONLY_TO_HEALTHY_NODE =
new TypedEvent<>(DatanodeDetails.class,
- "READ_ONLY_HEALTHY_TO_HEALTHY_NODE");
+ "HEALTHY_READONLY_TO_HEALTHY_NODE");
/**
- * This event will be triggered whenever a datanode is moved from non-healthy
- * state to readonly-healthy state.
+ * This event will be triggered whenever a datanode is moved to a
+ * healthy-readonly state.
*/
public static final TypedEvent<DatanodeDetails>
- NON_HEALTHY_TO_READONLY_HEALTHY_NODE =
- new TypedEvent<>(DatanodeDetails.class,
- "NON_HEALTHY_TO_READONLY_HEALTHY_NODE");
+ HEALTHY_READONLY_NODE =
+ new TypedEvent<>(DatanodeDetails.class, "HEALTHY_READONLY_NODE");
/**
* This event will be triggered by CommandStatusReportHandler whenever a
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NonHealthyToReadOnlyHealthyNodeHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/HealthyReadOnlyNodeHandler.java
similarity index 72%
rename from hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NonHealthyToReadOnlyHealthyNodeHandler.java
rename to hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/HealthyReadOnlyNodeHandler.java
index 7fd36c7..73e8c71 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NonHealthyToReadOnlyHealthyNodeHandler.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/HealthyReadOnlyNodeHandler.java
@@ -17,6 +17,7 @@
*/
package org.apache.hadoop.hdds.scm.node;
+import java.io.IOException;
import java.util.Set;
import org.apache.hadoop.hdds.conf.ConfigurationSource;
@@ -32,16 +33,16 @@ import org.slf4j.LoggerFactory;
/**
* Handles non healthy to healthy(ReadOnly) node event.
*/
-public class NonHealthyToReadOnlyHealthyNodeHandler
+public class HealthyReadOnlyNodeHandler
implements EventHandler<DatanodeDetails> {
private static final Logger LOG =
- LoggerFactory.getLogger(NonHealthyToReadOnlyHealthyNodeHandler.class);
+ LoggerFactory.getLogger(HealthyReadOnlyNodeHandler.class);
private final PipelineManager pipelineManager;
private final NodeManager nodeManager;
private final ConfigurationSource conf;
- public NonHealthyToReadOnlyHealthyNodeHandler(
+ public HealthyReadOnlyNodeHandler(
NodeManager nodeManager, PipelineManager pipelineManager,
OzoneConfiguration conf) {
this.pipelineManager = pipelineManager;
@@ -52,14 +53,18 @@ public class NonHealthyToReadOnlyHealthyNodeHandler
@Override
public void onMessage(DatanodeDetails datanodeDetails,
EventPublisher publisher) {
- Set<PipelineID> pipelineIds =
- nodeManager.getPipelines(datanodeDetails);
- LOG.info("Datanode {} moved to HEALTH READ ONLY state.",
- datanodeDetails);
- if (!pipelineIds.isEmpty()) {
- LOG.error("Datanode {} is part of pipelines {} in HEALTH READ ONLY " +
- "state.",
- datanodeDetails, pipelineIds);
+ LOG.info("Datanode {} moved to HEALTHY READONLY state.", datanodeDetails);
+
+ Set<PipelineID> pipelineIDs = nodeManager.getPipelines(datanodeDetails);
+ for (PipelineID id: pipelineIDs) {
+ LOG.info("Closing pipeline {} which uses HEALTHY READONLY datanode {} ",
+ id, datanodeDetails);
+ try {
+ pipelineManager.closePipeline(pipelineManager.getPipeline(id), false);
+ } catch (IOException ex) {
+ LOG.error("Failed to close pipeline {} which uses HEALTHY READONLY " +
+ "datanode {}: ", id, datanodeDetails, ex);
+ }
}
}
}
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java
index 9e70d8c..ec5b349 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java
@@ -65,7 +65,7 @@ import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.STALE;
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_DEADNODE_INTERVAL;
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL;
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL;
-import static org.apache.hadoop.hdds.scm.events.SCMEvents.NON_HEALTHY_TO_READONLY_HEALTHY_NODE;
+import static org.apache.hadoop.hdds.scm.events.SCMEvents.HEALTHY_READONLY_NODE;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -200,11 +200,12 @@ public class NodeStateManager implements Runnable, Closeable {
checkPaused = false; // accessed only from test functions
layoutMatchCondition =
- (layout) -> layout.getMetadataLayoutVersion() ==
- layoutVersionManager.getMetadataLayoutVersion();
- layoutMisMatchCondition =
- (layout) -> layout.getMetadataLayoutVersion() !=
- layoutVersionManager.getMetadataLayoutVersion();
+ (layout) -> (layout.getMetadataLayoutVersion() ==
+ layoutVersionManager.getMetadataLayoutVersion()) &&
+ (layout.getSoftwareLayoutVersion() ==
+ layoutVersionManager.getSoftwareLayoutVersion());
+
+ layoutMisMatchCondition = (layout) -> !layoutMatchCondition.test(layout);
scheduleNextHealthCheck();
}
@@ -216,9 +217,9 @@ public class NodeStateManager implements Runnable, Closeable {
state2EventMap.put(STALE, SCMEvents.STALE_NODE);
state2EventMap.put(DEAD, SCMEvents.DEAD_NODE);
state2EventMap
- .put(HEALTHY, SCMEvents.READ_ONLY_HEALTHY_TO_HEALTHY_NODE);
+ .put(HEALTHY, SCMEvents.HEALTHY_READONLY_TO_HEALTHY_NODE);
state2EventMap
- .put(NodeState.HEALTHY_READONLY, NON_HEALTHY_TO_READONLY_HEALTHY_NODE);
+ .put(NodeState.HEALTHY_READONLY, HEALTHY_READONLY_NODE);
}
/*
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java
index fced3c3..eea05b1 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java
@@ -345,7 +345,7 @@ public class SCMNodeManager implements NodeManager {
DatanodeDetails datanodeDetails, NodeReportProto nodeReport,
PipelineReportsProto pipelineReportsProto,
LayoutVersionProto layoutInfo) {
- if (layoutInfo.getSoftwareLayoutVersion() >
+ if (layoutInfo.getSoftwareLayoutVersion() !=
scmLayoutVersionManager.getSoftwareLayoutVersion()) {
return RegisteredCommand.newBuilder()
.setErrorCode(ErrorCode.errorNodeNotPermitted)
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
index d88745b..e00037e 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
@@ -84,7 +84,7 @@ import org.apache.hadoop.hdds.scm.node.NewNodeHandler;
import org.apache.hadoop.hdds.scm.node.StartDatanodeAdminHandler;
import org.apache.hadoop.hdds.scm.node.NodeManager;
import org.apache.hadoop.hdds.scm.node.NodeReportHandler;
-import org.apache.hadoop.hdds.scm.node.NonHealthyToReadOnlyHealthyNodeHandler;
+import org.apache.hadoop.hdds.scm.node.HealthyReadOnlyNodeHandler;
import org.apache.hadoop.hdds.scm.node.ReadOnlyHealthyToHealthyNodeHandler;
import org.apache.hadoop.hdds.scm.node.SCMNodeManager;
import org.apache.hadoop.hdds.scm.node.StaleNodeHandler;
@@ -370,9 +370,9 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
new StartDatanodeAdminHandler(scmNodeManager, pipelineManager);
ReadOnlyHealthyToHealthyNodeHandler readOnlyHealthyToHealthyNodeHandler =
new ReadOnlyHealthyToHealthyNodeHandler(conf, serviceManager);
- NonHealthyToReadOnlyHealthyNodeHandler
- nonHealthyToReadOnlyHealthyNodeHandler =
- new NonHealthyToReadOnlyHealthyNodeHandler(scmNodeManager,
+ HealthyReadOnlyNodeHandler
+ healthyReadOnlyNodeHandler =
+ new HealthyReadOnlyNodeHandler(scmNodeManager,
pipelineManager, conf);
ContainerActionsHandler actionsHandler = new ContainerActionsHandler();
PendingDeleteHandler pendingDeleteHandler =
@@ -410,10 +410,10 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
eventQueue.addHandler(SCMEvents.CLOSE_CONTAINER, closeContainerHandler);
eventQueue.addHandler(SCMEvents.NEW_NODE, newNodeHandler);
eventQueue.addHandler(SCMEvents.STALE_NODE, staleNodeHandler);
- eventQueue.addHandler(SCMEvents.READ_ONLY_HEALTHY_TO_HEALTHY_NODE,
+ eventQueue.addHandler(SCMEvents.HEALTHY_READONLY_TO_HEALTHY_NODE,
readOnlyHealthyToHealthyNodeHandler);
- eventQueue.addHandler(SCMEvents.NON_HEALTHY_TO_READONLY_HEALTHY_NODE,
- nonHealthyToReadOnlyHealthyNodeHandler);
+ eventQueue.addHandler(SCMEvents.HEALTHY_READONLY_NODE,
+ healthyReadOnlyNodeHandler);
eventQueue.addHandler(SCMEvents.DEAD_NODE, deadNodeHandler);
eventQueue.addHandler(SCMEvents.START_ADMIN_ON_NODE,
datanodeStartAdminHandler);
diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestNodeStateManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestNodeStateManager.java
index 1dd3f59..59abb2d 100644
--- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestNodeStateManager.java
+++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestNodeStateManager.java
@@ -202,7 +202,7 @@ public class TestNodeStateManager {
dni.updateLastHeartbeatTime();
nsm.checkNodesHealth();
assertEquals(NodeState.HEALTHY_READONLY, nsm.getNodeStatus(dn).getHealth());
- assertEquals(SCMEvents.NON_HEALTHY_TO_READONLY_HEALTHY_NODE,
+ assertEquals(SCMEvents.HEALTHY_READONLY_NODE,
eventPublisher.getLastEvent());
// Make the node stale again, and transition to healthy.
@@ -213,7 +213,7 @@ public class TestNodeStateManager {
dni.updateLastHeartbeatTime();
nsm.checkNodesHealth();
assertEquals(NodeState.HEALTHY_READONLY, nsm.getNodeStatus(dn).getHealth());
- assertEquals(SCMEvents.NON_HEALTHY_TO_READONLY_HEALTHY_NODE,
+ assertEquals(SCMEvents.HEALTHY_READONLY_NODE,
eventPublisher.getLastEvent());
}
@@ -247,7 +247,7 @@ public class TestNodeStateManager {
HddsProtos.NodeOperationalState.values()) {
eventPublisher.clearEvents();
nsm.setNodeOperationalState(dn, s);
- assertEquals(SCMEvents.NON_HEALTHY_TO_READONLY_HEALTHY_NODE,
+ assertEquals(SCMEvents.HEALTHY_READONLY_NODE,
eventPublisher.getLastEvent());
}
diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java
index 9947547..49b5dc9 100644
--- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java
+++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java
@@ -37,6 +37,7 @@ import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.protocol.proto
.StorageContainerDatanodeProtocolProtos.LayoutVersionProto;
+import org.apache.hadoop.hdds.scm.exceptions.SCMException;
import org.apache.hadoop.hdds.scm.ha.SCMContext;
import org.apache.hadoop.hdds.scm.net.NetworkTopologyImpl;
import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher.NodeReportFromDatanode;
@@ -53,6 +54,7 @@ import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
import org.apache.hadoop.hdds.server.events.EventPublisher;
import org.apache.hadoop.hdds.server.events.EventQueue;
import org.apache.hadoop.hdds.upgrade.HDDSLayoutVersionManager;
+import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMRegisteredResponseProto.ErrorCode;
import org.apache.hadoop.ozone.protocol.commands.CloseContainerCommand;
import org.apache.hadoop.ozone.protocol.commands.CommandForDatanode;
import org.apache.hadoop.ozone.protocol.commands.RegisteredCommand;
@@ -61,6 +63,7 @@ import org.apache.hadoop.ozone.upgrade.LayoutVersionManager;
import org.apache.hadoop.ozone.protocol.commands.SetNodeOperationalStateCommand;
import org.apache.hadoop.security.authentication.client.AuthenticationException;
import org.apache.hadoop.test.GenericTestUtils;
+import org.apache.hadoop.test.LambdaTestUtils;
import org.apache.hadoop.test.PathUtils;
import org.junit.After;
import org.junit.Assert;
@@ -72,6 +75,7 @@ import org.junit.Test;
import org.junit.rules.ExpectedException;
import java.util.Map;
+import java.util.function.Predicate;
import static java.util.concurrent.TimeUnit.MILLISECONDS;
import static java.util.concurrent.TimeUnit.SECONDS;
@@ -113,6 +117,20 @@ public class TestSCMNodeManager {
private File testDir;
private StorageContainerManager scm;
+ private static final int MAX_LV = HDDSLayoutVersionManager.maxLayoutVersion();
+ private static final LayoutVersionProto LARGER_SLV_LAYOUT_PROTO =
+ toLayoutVersionProto(MAX_LV, MAX_LV + 1);
+ private static final LayoutVersionProto SMALLER_MLV_LAYOUT_PROTO =
+ toLayoutVersionProto(MAX_LV - 1, MAX_LV);
+ // In a real cluster, startup is disallowed if MLV is larger than SLV, so
+ // increase both numbers to test smaller SLV or larger MLV.
+ private static final LayoutVersionProto SMALLER_MLV_SLV_LAYOUT_PROTO =
+ toLayoutVersionProto(MAX_LV - 1, MAX_LV - 1);
+ private static final LayoutVersionProto LARGER_MLV_SLV_LAYOUT_PROTO =
+ toLayoutVersionProto(MAX_LV + 1, MAX_LV + 1);
+ private static final LayoutVersionProto CORRECT_LAYOUT_PROTO =
+ toLayoutVersionProto(MAX_LV, MAX_LV);
+
@Rule
public ExpectedException thrown = ExpectedException.none();
@@ -200,7 +218,73 @@ public class TestSCMNodeManager {
}
/**
- * Tests that Node manager handles Layout versions correctly.
+ * Tests that node manager handles layout version changes from heartbeats
+ * correctly.
+ *
+ * @throws IOException
+ * @throws InterruptedException
+ * @throws TimeoutException
+ */
+ @Test
+ public void testScmLayoutOnHeartbeat() throws Exception {
+ OzoneConfiguration conf = getConf();
+ conf.setTimeDuration(ScmConfigKeys.OZONE_SCM_PIPELINE_CREATION_INTERVAL,
+ 1, TimeUnit.DAYS);
+
+ try (SCMNodeManager nodeManager = createNodeManager(conf)) {
+ // Register 2 nodes correctly.
+ // These will be used with a faulty node to test pipeline creation.
+ TestUtils.createRandomDatanodeAndRegister(nodeManager);
+ TestUtils.createRandomDatanodeAndRegister(nodeManager);
+
+ scm.exitSafeMode();
+
+ assertPipelineClosedAfterLayoutHeartbeat(nodeManager,
+ SMALLER_MLV_LAYOUT_PROTO);
+ assertPipelineClosedAfterLayoutHeartbeat(nodeManager,
+ LARGER_MLV_SLV_LAYOUT_PROTO);
+ assertPipelineClosedAfterLayoutHeartbeat(nodeManager,
+ SMALLER_MLV_SLV_LAYOUT_PROTO);
+ assertPipelineClosedAfterLayoutHeartbeat(nodeManager,
+ LARGER_SLV_LAYOUT_PROTO);
+ }
+ }
+
+ private void assertPipelineClosedAfterLayoutHeartbeat(
+ SCMNodeManager nodeManager, LayoutVersionProto layout) throws Exception {
+
+ // Initial condition: 2 healthy nodes registered.
+ assertPipelineCounts(oneCount -> oneCount == 2,
+ threeCount -> threeCount == 0);
+
+ // Even when safemode exit or new node addition trigger pipeline
+ // creation, they will fail with not enough healthy nodes for ratis 3
+ // pipeline. Therefore we do not have to worry about this create call
+ // failing due to datanodes reaching their maximum pipeline limit.
+ assertPipelineCreationFailsWithNotEnoughNodes(2);
+
+ // Register a new node correctly.
+ DatanodeDetails node = TestUtils
+ .createRandomDatanodeAndRegister(nodeManager);
+
+ // Safemode exit and adding the new node should trigger pipeline creation.
+ assertPipelineCounts(oneCount -> oneCount == 3,
+ threeCount -> threeCount >= 1);
+
+ // node sends incorrect layout.
+ nodeManager.processHeartbeat(node, layout);
+
+ // Its pipelines should be closed then removed, meaning there is not
+ // enough nodes for factor 3 pipelines.
+ assertPipelineCounts(oneCount -> oneCount == 2,
+ threeCount -> threeCount == 0);
+
+ assertPipelineCreationFailsWithNotEnoughNodes(2);
+ }
+
+ /**
+ * Tests that node manager handles layout versions for newly registered nodes
+ * correctly.
*
* @throws IOException
* @throws InterruptedException
@@ -208,31 +292,97 @@ public class TestSCMNodeManager {
*/
@Test
public void testScmLayoutOnRegister()
- throws IOException, InterruptedException, AuthenticationException {
+ throws Exception {
- try (SCMNodeManager nodeManager = createNodeManager(getConf())) {
- HDDSLayoutVersionManager layoutVersionManager =
- nodeManager.getLayoutVersionManager();
- int nodeManagerMetadataLayoutVersion =
- layoutVersionManager.getMetadataLayoutVersion();
- int nodeManagerSoftwareLayoutVersion =
- layoutVersionManager.getSoftwareLayoutVersion();
- LayoutVersionProto layoutInfoSuccess = toLayoutVersionProto(
- nodeManagerMetadataLayoutVersion, nodeManagerSoftwareLayoutVersion);
- LayoutVersionProto layoutInfoFailure = toLayoutVersionProto(
- nodeManagerSoftwareLayoutVersion + 1,
- nodeManagerSoftwareLayoutVersion + 1);
- RegisteredCommand rcmd = nodeManager.register(
- MockDatanodeDetails.randomDatanodeDetails(), null,
- getRandomPipelineReports(), layoutInfoSuccess);
- assertTrue(rcmd.getError() == success);
- rcmd = nodeManager.register(
- MockDatanodeDetails.randomDatanodeDetails(), null,
- getRandomPipelineReports(), layoutInfoFailure);
- assertTrue(rcmd.getError() == errorNodeNotPermitted);
+ OzoneConfiguration conf = getConf();
+ conf.setTimeDuration(ScmConfigKeys.OZONE_SCM_PIPELINE_CREATION_INTERVAL,
+ 1, TimeUnit.DAYS);
+
+ try (SCMNodeManager nodeManager = createNodeManager(conf)) {
+ // Nodes with mismatched SLV cannot join the cluster.
+ assertRegister(nodeManager,
+ LARGER_SLV_LAYOUT_PROTO, errorNodeNotPermitted);
+ assertRegister(nodeManager,
+ SMALLER_MLV_SLV_LAYOUT_PROTO, errorNodeNotPermitted);
+ assertRegister(nodeManager,
+ LARGER_MLV_SLV_LAYOUT_PROTO, errorNodeNotPermitted);
+ // Nodes with mismatched MLV can join, but should not be allowed in
+ // pipelines.
+ DatanodeDetails badMlvNode1 = assertRegister(nodeManager,
+ SMALLER_MLV_LAYOUT_PROTO, success);
+ DatanodeDetails badMlvNode2 = assertRegister(nodeManager,
+ SMALLER_MLV_LAYOUT_PROTO, success);
+ // This node has correct MLV and SLV, so it can join and be used in
+ // pipelines.
+ assertRegister(nodeManager, CORRECT_LAYOUT_PROTO, success);
+
+ Assert.assertEquals(3, nodeManager.getAllNodes().size());
+
+ scm.exitSafeMode();
+
+ // SCM should auto create a factor 1 pipeline for the one healthy node.
+ // Still should not have enough healthy nodes for ratis 3 pipeline.
+ assertPipelineCounts(oneCount -> oneCount == 1,
+ threeCount -> threeCount == 0);
+
+ // Even when safemode exit or new node addition trigger pipeline
+ // creation, they will fail with not enough healthy nodes for ratis 3
+ // pipeline. Therefore we do not have to worry about this create call
+ // failing due to datanodes reaching their maximum pipeline limit.
+ assertPipelineCreationFailsWithNotEnoughNodes(1);
+
+ // Heartbeat bad MLV nodes back to healthy.
+ nodeManager.processHeartbeat(badMlvNode1, CORRECT_LAYOUT_PROTO);
+ nodeManager.processHeartbeat(badMlvNode2, CORRECT_LAYOUT_PROTO);
+
+ // After moving out of healthy readonly, pipeline creation should be
+ // triggered.
+ assertPipelineCounts(oneCount -> oneCount == 3,
+ threeCount -> threeCount >= 1);
}
}
+ private DatanodeDetails assertRegister(SCMNodeManager manager,
+ LayoutVersionProto layout, ErrorCode expectedResult) {
+ RegisteredCommand cmd = manager.register(
+ MockDatanodeDetails.randomDatanodeDetails(), null,
+ getRandomPipelineReports(), layout);
+
+ Assert.assertEquals(expectedResult, cmd.getError());
+ return cmd.getDatanode();
+ }
+
+ private void assertPipelineCreationFailsWithNotEnoughNodes(
+ int actualNodeCount) throws Exception {
+ try {
+ scm.getPipelineManager()
+ .createPipeline(HddsProtos.ReplicationType.RATIS,
+ HddsProtos.ReplicationFactor.THREE);
+ Assert.fail("3 nodes should not have been found for a pipeline.");
+ } catch (SCMException ex) {
+ Assert.assertTrue(ex.getMessage().contains("Required 3. Found " +
+ actualNodeCount));
+ }
+ }
+
+ private void assertPipelineCounts(Predicate<Integer> factorOneCheck,
+ Predicate<Integer> factorThreeCheck) throws Exception {
+ LambdaTestUtils.await(5000, 1000, () -> {
+ int numOne = scm.getPipelineManager()
+ .getPipelines(HddsProtos.ReplicationType.RATIS,
+ HddsProtos.ReplicationFactor.ONE).size();
+ int numThree = scm.getPipelineManager()
+ .getPipelines(HddsProtos.ReplicationType.RATIS,
+ HddsProtos.ReplicationFactor.THREE).size();
+
+ // Moving nodes out of healthy readonly should have triggered
+ // pipeline creation. With 3 healthy nodes, we should have at least one
+ // factor 3 pipeline now, and factor 1s should have been auto created
+ // for all nodes.
+ return factorOneCheck.test(numOne) && factorThreeCheck.test(numThree);
+ });
+ }
+
/**
* asserts that if we send no heartbeats node manager stays in safemode.
*
diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconNodeManager.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconNodeManager.java
index 2dcf79d..7c967bc 100644
--- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconNodeManager.java
+++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconNodeManager.java
@@ -92,8 +92,11 @@ public class ReconNodeManager extends SCMNodeManager {
DatanodeDetails datanodeDetails = iterator.next().getValue();
register(datanodeDetails, null, null,
LayoutVersionProto.newBuilder()
- .setMetadataLayoutVersion(0)
- .setSoftwareLayoutVersion(0).build());
+ .setMetadataLayoutVersion(
+ HDDSLayoutVersionManager.maxLayoutVersion())
+ .setSoftwareLayoutVersion(
+ HDDSLayoutVersionManager.maxLayoutVersion())
+ .build());
nodeCount++;
}
LOG.info("Loaded {} nodes from node DB.", nodeCount);
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@ozone.apache.org
For additional commands, e-mail: commits-help@ozone.apache.org