You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by bh...@apache.org on 2019/03/01 01:03:45 UTC
[hadoop] branch trunk updated: HDDS-1187. Healthy pipeline Chill
Mode rule to consider only pipelines with replication factor three.
This is an automated email from the ASF dual-hosted git repository.
bharat pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/trunk by this push:
new eae8819 HDDS-1187. Healthy pipeline Chill Mode rule to consider only pipelines with replication factor three.
eae8819 is described below
commit eae8819fd2acb7474190b64340b82cba09577810
Author: Bharat Viswanadham <bh...@apache.org>
AuthorDate: Thu Feb 28 17:03:39 2019 -0800
HDDS-1187. Healthy pipeline Chill Mode rule to consider only pipelines with replication factor three.
---
.../chillmode/HealthyPipelineChillModeRule.java | 39 ++++++++---
.../TestHealthyPipelineChillModeRule.java | 77 ++++++++++++++++++++++
.../scm/chillmode/TestSCMChillModeManager.java | 26 ++++----
3 files changed, 120 insertions(+), 22 deletions(-)
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/chillmode/HealthyPipelineChillModeRule.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/chillmode/HealthyPipelineChillModeRule.java
index 07088ca..3f475b8 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/chillmode/HealthyPipelineChillModeRule.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/chillmode/HealthyPipelineChillModeRule.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.hdds.scm.chillmode;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdds.HddsConfigKeys;
+import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.PipelineReport;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.PipelineReportsProto;
@@ -34,6 +35,9 @@ import com.google.common.base.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import java.util.HashSet;
+import java.util.Set;
+
/**
* Class defining Chill mode exit criteria for Pipelines.
*
@@ -45,12 +49,14 @@ public class HealthyPipelineChillModeRule
implements ChillModeExitRule<PipelineReportFromDatanode>,
EventHandler<PipelineReportFromDatanode> {
- private static final Logger LOG =
+ public static final Logger LOG =
LoggerFactory.getLogger(HealthyPipelineChillModeRule.class);
private final PipelineManager pipelineManager;
private final SCMChillModeManager chillModeManager;
private final int healthyPipelineThresholdCount;
private int currentHealthyPipelineCount = 0;
+ private final Set<DatanodeDetails> processedDatanodeDetails =
+ new HashSet<>();
HealthyPipelineChillModeRule(PipelineManager pipelineManager,
SCMChillModeManager manager, Configuration configuration) {
@@ -71,7 +77,7 @@ public class HealthyPipelineChillModeRule
// On a fresh installed cluster, there will be zero pipelines in the SCM
// pipeline DB.
healthyPipelineThresholdCount =
- (int) Math.ceil((healthyPipelinesPercent / 100) * pipelineCount);
+ (int) Math.ceil(healthyPipelinesPercent * pipelineCount);
LOG.info(" Total pipeline count is {}, healthy pipeline " +
"threshold count is {}", pipelineCount, healthyPipelineThresholdCount);
@@ -101,7 +107,8 @@ public class HealthyPipelineChillModeRule
continue;
}
- if (pipeline.getPipelineState() == Pipeline.PipelineState.OPEN) {
+ if (pipeline.getFactor() == HddsProtos.ReplicationFactor.THREE &&
+ pipeline.getPipelineState() == Pipeline.PipelineState.OPEN) {
// If the pipeline is open state mean, all 3 datanodes are reported
// for this pipeline.
currentHealthyPipelineCount++;
@@ -125,14 +132,26 @@ public class HealthyPipelineChillModeRule
return;
}
- // Process pipeline report from datanode
- process(pipelineReportFromDatanode);
- if (chillModeManager.getInChillMode()) {
- SCMChillModeManager.getLogger().info(
- "SCM in chill mode. Healthy pipelines reported count is {}, " +
- "required healthy pipeline reported count is {}",
- currentHealthyPipelineCount, healthyPipelineThresholdCount);
+ // When SCM is in chill mode for long time, already registered
+ // datanode can send pipeline report again, then pipeline handler fires
+ // processed report event, we should not consider this pipeline report
+ // from datanode again during threshold calculation.
+ DatanodeDetails dnDetails = pipelineReportFromDatanode.getDatanodeDetails();
+ if (!processedDatanodeDetails.contains(
+ pipelineReportFromDatanode.getDatanodeDetails())) {
+
+ // Process pipeline report from datanode
+ process(pipelineReportFromDatanode);
+
+ if (chillModeManager.getInChillMode()) {
+ SCMChillModeManager.getLogger().info(
+ "SCM in chill mode. Healthy pipelines reported count is {}, " +
+ "required healthy pipeline reported count is {}",
+ currentHealthyPipelineCount, healthyPipelineThresholdCount);
+ }
+
+ processedDatanodeDetails.add(dnDetails);
}
if (validate()) {
diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/chillmode/TestHealthyPipelineChillModeRule.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/chillmode/TestHealthyPipelineChillModeRule.java
index adfa73f..61fbf19 100644
--- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/chillmode/TestHealthyPipelineChillModeRule.java
+++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/chillmode/TestHealthyPipelineChillModeRule.java
@@ -38,6 +38,7 @@ import org.apache.hadoop.hdds.server.events.EventQueue;
import org.apache.hadoop.test.GenericTestUtils;
import org.junit.Assert;
import org.junit.Test;
+import org.slf4j.LoggerFactory;
import java.io.File;
import java.util.ArrayList;
@@ -153,6 +154,82 @@ public class TestHealthyPipelineChillModeRule {
}
+ @Test
+ public void testHealthyPipelineChillModeRuleWithMixedPipelines()
+ throws Exception {
+
+ String storageDir = GenericTestUtils.getTempPath(
+ TestHealthyPipelineChillModeRule.class.getName() + UUID.randomUUID());
+
+ try {
+ EventQueue eventQueue = new EventQueue();
+ List<ContainerInfo> containers = new ArrayList<>();
+ containers.addAll(HddsTestUtils.getContainerInfo(1));
+
+ OzoneConfiguration config = new OzoneConfiguration();
+
+ // In Mock Node Manager, first 8 nodes are healthy, next 2 nodes are
+ // stale and last one is dead, and this repeats. So for a 12 node, 9
+ // healthy, 2 stale and one dead.
+ MockNodeManager nodeManager = new MockNodeManager(true, 12);
+ config.set(HddsConfigKeys.OZONE_METADATA_DIRS, storageDir);
+ // enable pipeline check
+ config.setBoolean(
+ HddsConfigKeys.HDDS_SCM_CHILLMODE_PIPELINE_AVAILABILITY_CHECK, true);
+
+
+ PipelineManager pipelineManager = new SCMPipelineManager(config,
+ nodeManager, eventQueue);
+
+ // Create 3 pipelines
+ Pipeline pipeline1 =
+ pipelineManager.createPipeline(HddsProtos.ReplicationType.RATIS,
+ HddsProtos.ReplicationFactor.ONE);
+ Pipeline pipeline2 =
+ pipelineManager.createPipeline(HddsProtos.ReplicationType.RATIS,
+ HddsProtos.ReplicationFactor.THREE);
+ Pipeline pipeline3 =
+ pipelineManager.createPipeline(HddsProtos.ReplicationType.RATIS,
+ HddsProtos.ReplicationFactor.THREE);
+
+
+ SCMChillModeManager scmChillModeManager = new SCMChillModeManager(
+ config, containers, pipelineManager, eventQueue);
+
+ HealthyPipelineChillModeRule healthyPipelineChillModeRule =
+ scmChillModeManager.getHealthyPipelineChillModeRule();
+
+
+ // No datanodes have sent pipelinereport from datanode
+ Assert.assertFalse(healthyPipelineChillModeRule.validate());
+
+
+ GenericTestUtils.LogCapturer logCapturer =
+ GenericTestUtils.LogCapturer.captureLogs(LoggerFactory.getLogger(
+ SCMChillModeManager.class));
+
+ // fire event with pipeline report with ratis type and factor 1
+ // pipeline, validate() should return false
+ firePipelineEvent(pipeline1, eventQueue);
+
+ GenericTestUtils.waitFor(() -> logCapturer.getOutput().contains(
+ "reported count is 0"),
+ 1000, 5000);
+ Assert.assertFalse(healthyPipelineChillModeRule.validate());
+
+ firePipelineEvent(pipeline2, eventQueue);
+ firePipelineEvent(pipeline3, eventQueue);
+
+ GenericTestUtils.waitFor(() -> healthyPipelineChillModeRule.validate(),
+ 1000, 5000);
+
+ } finally {
+ FileUtil.fullyDelete(new File(storageDir));
+ }
+
+ }
+
+
private void firePipelineEvent(Pipeline pipeline, EventQueue eventQueue) {
PipelineReportsProto.Builder reportBuilder = PipelineReportsProto
.newBuilder();
diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/chillmode/TestSCMChillModeManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/chillmode/TestSCMChillModeManager.java
index 7c8cafa..faf8fee 100644
--- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/chillmode/TestSCMChillModeManager.java
+++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/chillmode/TestSCMChillModeManager.java
@@ -237,7 +237,7 @@ public class TestSCMChillModeManager {
String storageDir = GenericTestUtils.getTempPath(
TestSCMChillModeManager.class.getName() + UUID.randomUUID());
try{
- MockNodeManager nodeManager = new MockNodeManager(true, 1);
+ MockNodeManager nodeManager = new MockNodeManager(true, 3);
config.set(HddsConfigKeys.OZONE_METADATA_DIRS, storageDir);
// enable pipeline check
config.setBoolean(
@@ -245,6 +245,15 @@ public class TestSCMChillModeManager {
PipelineManager pipelineManager = new SCMPipelineManager(config,
nodeManager, queue);
+
+ Pipeline pipeline = pipelineManager.createPipeline(
+ HddsProtos.ReplicationType.RATIS,
+ HddsProtos.ReplicationFactor.THREE);
+ PipelineReportsProto.Builder reportBuilder = PipelineReportsProto
+ .newBuilder();
+ reportBuilder.addPipelineReport(PipelineReport.newBuilder()
+ .setPipelineID(pipeline.getId().getProtobuf()));
+
scmChillModeManager = new SCMChillModeManager(
config, containers, pipelineManager, queue);
queue.addHandler(SCMEvents.NODE_REGISTRATION_CONT_REPORT,
@@ -254,17 +263,10 @@ public class TestSCMChillModeManager {
HddsTestUtils.createNodeRegistrationContainerReport(containers));
assertTrue(scmChillModeManager.getInChillMode());
- // simulation a pipeline report to trigger the rule check
- Pipeline pipeline = pipelineManager.createPipeline(
- HddsProtos.ReplicationType.STAND_ALONE,
- HddsProtos.ReplicationFactor.ONE);
- PipelineReportsProto.Builder reportBuilder = PipelineReportsProto
- .newBuilder();
- reportBuilder.addPipelineReport(PipelineReport.newBuilder()
- .setPipelineID(pipeline.getId().getProtobuf()));
-
- queue.fireEvent(SCMEvents.PIPELINE_REPORT, new PipelineReportFromDatanode(
- pipeline.getNodes().get(0), reportBuilder.build()));
+ // Trigger the processed pipeline report event
+ queue.fireEvent(SCMEvents.PROCESSED_PIPELINE_REPORT,
+ new PipelineReportFromDatanode(pipeline.getNodes().get(0),
+ reportBuilder.build()));
GenericTestUtils.waitFor(() -> {
return !scmChillModeManager.getInChillMode();
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org