You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ozone.apache.org by na...@apache.org on 2022/04/04 02:05:18 UTC
[ozone] branch master updated: HDDS-6397. Implement ContainerBalancer as an SCMService (#3153)
This is an automated email from the ASF dual-hosted git repository.
nanda pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new d93fbdfa64 HDDS-6397. Implement ContainerBalancer as an SCMService (#3153)
d93fbdfa64 is described below
commit d93fbdfa645b19274e3d909adff85a39d304601c
Author: Siddhant Sangwan <si...@gmail.com>
AuthorDate: Mon Apr 4 07:35:12 2022 +0530
HDDS-6397. Implement ContainerBalancer as an SCMService (#3153)
---
.../apache/hadoop/hdds/scm/client/ScmClient.java | 4 +-
.../protocol/StorageContainerLocationProtocol.java | 6 +-
...inerLocationProtocolClientSideTranslatorPB.java | 11 +-
.../src/main/proto/ScmAdminProtocol.proto | 1 +
.../scm/container/balancer/ContainerBalancer.java | 226 +++++++++++++++------
.../IllegalContainerBalancerStateException.java | 46 +++++
...lidContainerBalancerConfigurationException.java | 47 +++++
...inerLocationProtocolServerSideTranslatorPB.java | 9 +-
.../hdds/scm/server/SCMClientProtocolServer.java | 34 +++-
.../hdds/scm/server/StorageContainerManager.java | 2 +-
.../container/balancer/TestContainerBalancer.java | 91 +++++----
.../scm/cli/ContainerBalancerStartSubcommand.java | 14 +-
.../hdds/scm/cli/ContainerOperationClient.java | 3 +-
.../datanode/TestContainerBalancerSubCommand.java | 22 +-
14 files changed, 378 insertions(+), 138 deletions(-)
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java
index f1885f890e..e2a5c8c750 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.hdds.scm.client;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.hadoop.hdds.annotation.InterfaceStability;
+import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StartContainerBalancerResponseProto;
import org.apache.hadoop.hdds.scm.DatanodeAdminError;
import org.apache.hadoop.hdds.scm.container.ContainerReplicaInfo;
import org.apache.hadoop.hdds.scm.container.ReplicationManagerReport;
@@ -331,7 +332,8 @@ public interface ScmClient extends Closeable {
/**
* Start ContainerBalancer.
*/
- boolean startContainerBalancer(Optional<Double> threshold,
+ StartContainerBalancerResponseProto startContainerBalancer(
+ Optional<Double> threshold,
Optional<Integer> iterations,
Optional<Integer> maxDatanodesPercentageToInvolvePerIteration,
Optional<Long> maxSizeToMovePerIterationInGB,
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java
index 6f078bd532..1b07c5d3d6 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.hdds.scm.protocol;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
+import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StartContainerBalancerResponseProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.Type;
import org.apache.hadoop.hdds.scm.DatanodeAdminError;
import org.apache.hadoop.hdds.scm.ScmConfig;
@@ -329,8 +330,11 @@ public interface StorageContainerLocationProtocol extends Closeable {
/**
* Start ContainerBalancer.
+ * @return {@link StartContainerBalancerResponseProto} that contains the
+ * start status and an optional message.
*/
- boolean startContainerBalancer(Optional<Double> threshold,
+ StartContainerBalancerResponseProto startContainerBalancer(
+ Optional<Double> threshold,
Optional<Integer> iterations,
Optional<Integer> maxDatanodesPercentageToInvolvePerIteration,
Optional<Long> maxSizeToMovePerIterationInGB,
diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java
index 997271e67e..beb3b75e58 100644
--- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java
+++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java
@@ -775,7 +775,7 @@ public final class StorageContainerLocationProtocolClientSideTranslatorPB
}
@Override
- public boolean startContainerBalancer(
+ public StartContainerBalancerResponseProto startContainerBalancer(
Optional<Double> threshold, Optional<Integer> iterations,
Optional<Integer> maxDatanodesPercentageToInvolvePerIteration,
Optional<Long> maxSizeToMovePerIterationInGB,
@@ -830,13 +830,10 @@ public final class StorageContainerLocationProtocolClientSideTranslatorPB
builder.setMaxSizeLeavingSourceInGB(msls);
}
-
StartContainerBalancerRequestProto request = builder.build();
- StartContainerBalancerResponseProto response =
- submitRequest(Type.StartContainerBalancer,
- builder1 -> builder1.setStartContainerBalancerRequest(request))
- .getStartContainerBalancerResponse();
- return response.getStart();
+ return submitRequest(Type.StartContainerBalancer,
+ builder1 -> builder1.setStartContainerBalancerRequest(request))
+ .getStartContainerBalancerResponse();
}
@Override
diff --git a/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto b/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto
index e2d7b1663e..97e3dae83e 100644
--- a/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto
+++ b/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto
@@ -524,6 +524,7 @@ message StartContainerBalancerRequestProto {
message StartContainerBalancerResponseProto {
required bool start = 1;
+ optional string message = 2;
}
message StopContainerBalancerRequestProto {
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancer.java
index 42fdffe2a6..2e7b04feca 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancer.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancer.java
@@ -32,6 +32,7 @@ import org.apache.hadoop.hdds.scm.container.ContainerManager;
import org.apache.hadoop.hdds.scm.container.ReplicationManager;
import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMNodeStat;
import org.apache.hadoop.hdds.scm.ha.SCMContext;
+import org.apache.hadoop.hdds.scm.ha.SCMService;
import org.apache.hadoop.hdds.scm.net.NetworkTopology;
import org.apache.hadoop.hdds.scm.node.DatanodeUsageInfo;
import org.apache.hadoop.hdds.scm.node.NodeManager;
@@ -63,7 +64,7 @@ import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_NODE_REPORT_INTERVAL_DE
* Container balancer is a service in SCM to move containers between over- and
* under-utilized datanodes.
*/
-public class ContainerBalancer {
+public class ContainerBalancer implements SCMService {
public static final Logger LOG =
LoggerFactory.getLogger(ContainerBalancer.class);
@@ -137,39 +138,6 @@ public class ContainerBalancer {
findSourceStrategy = new FindSourceGreedy(nodeManager);
}
- /**
- * Starts ContainerBalancer. Current implementation is incomplete.
- *
- * @param balancerConfiguration Configuration values.
- */
- public boolean start(ContainerBalancerConfiguration balancerConfiguration) {
- lock.lock();
- try {
- if (balancerRunning || currentBalancingThread != null) {
- LOG.error("Container Balancer is already running.");
- return false;
- }
-
- this.config = balancerConfiguration;
- if (!validateConfiguration(config)) {
- return false;
- }
- ozoneConfiguration.setFromObject(balancerConfiguration);
- balancerRunning = true;
- LOG.info("Starting Container Balancer...{}", this);
-
- //we should start a new balancer thread async
- //and response to cli as soon as possible
- currentBalancingThread = new Thread(this::balance);
- currentBalancingThread.setName("ContainerBalancer");
- currentBalancingThread.setDaemon(true);
- currentBalancingThread.start();
- } finally {
- lock.unlock();
- }
- return true;
- }
-
/**
* Balances the cluster.
*/
@@ -212,7 +180,7 @@ public class ContainerBalancer {
// stop balancing if iteration is not initialized
if (!initializeIteration()) {
- stop();
+ stopBalancer();
return;
}
@@ -222,7 +190,7 @@ public class ContainerBalancer {
metrics.incrementNumIterations(1);
LOG.info("Result of this iteration of Container Balancer: {}", iR);
if (iR == IterationResult.CAN_NOT_BALANCE_ANY_MORE) {
- stop();
+ stopBalancer();
return;
}
@@ -246,7 +214,7 @@ public class ContainerBalancer {
}
}
}
- stop();
+ stopBalancer();
}
/**
@@ -824,35 +792,161 @@ public class ContainerBalancer {
}
/**
- * Stops ContainerBalancer.
+ * Receives a notification for raft or safe mode related status changes.
+ * Stops ContainerBalancer if it's running and the current SCM becomes a
+ * follower or enters safe mode.
*/
+ @Override
+ public void notifyStatusChanged() {
+ if (!scmContext.isLeader() || scmContext.isInSafeMode()) {
+ if (isBalancerRunning()) {
+ stopBalancingThread();
+ }
+ }
+ }
+
+ /**
+ * Checks if ContainerBalancer should run.
+ * @return false
+ */
+ @Override
+ public boolean shouldRun() {
+ return false;
+ }
+
+ /**
+ * @return Name of this service.
+ */
+ @Override
+ public String getServiceName() {
+ return ContainerBalancer.class.getSimpleName();
+ }
+
+ /**
+ * Starts ContainerBalancer as an SCMService.
+ */
+ @Override
+ public void start() {
+ if (shouldRun()) {
+ startBalancingThread();
+ }
+ }
+
+ /**
+ * Starts Container Balancer after checking its state and validating
+ * configuration.
+ *
+ * @throws IllegalContainerBalancerStateException if ContainerBalancer is
+ * not in a start-appropriate state
+ * @throws InvalidContainerBalancerConfigurationException if
+ * {@link ContainerBalancerConfiguration} config file is incorrectly
+ * configured
+ */
+ public void startBalancer() throws IllegalContainerBalancerStateException,
+ InvalidContainerBalancerConfigurationException {
+ lock.lock();
+ try {
+ validateState();
+ validateConfiguration(this.config);
+ startBalancingThread();
+ } finally {
+ lock.unlock();
+ }
+ }
+
+ /**
+ * Starts a new balancing thread asynchronously.
+ */
+ private void startBalancingThread() {
+ lock.lock();
+ try {
+ balancerRunning = true;
+ currentBalancingThread = new Thread(this::balance);
+ currentBalancingThread.setName("ContainerBalancer");
+ currentBalancingThread.setDaemon(true);
+ currentBalancingThread.start();
+ } finally {
+ lock.unlock();
+ }
+ LOG.info("Starting Container Balancer... {}", this);
+ }
+
+ /**
+ * Checks if ContainerBalancer can start.
+ * @throws IllegalContainerBalancerStateException if ContainerBalancer is
+ * already running, SCM is in safe mode, or SCM is not leader ready
+ */
+ private void validateState() throws IllegalContainerBalancerStateException {
+ if (!scmContext.isLeaderReady()) {
+ LOG.warn("SCM is not leader ready");
+ throw new IllegalContainerBalancerStateException("SCM is not leader " +
+ "ready");
+ }
+ if (scmContext.isInSafeMode()) {
+ LOG.warn("SCM is in safe mode");
+ throw new IllegalContainerBalancerStateException("SCM is in safe mode");
+ }
+ lock.lock();
+ try {
+ if (isBalancerRunning() || currentBalancingThread != null) {
+ LOG.warn("Cannot start ContainerBalancer because it's already running");
+ throw new IllegalContainerBalancerStateException(
+ "Cannot start ContainerBalancer because it's already running");
+ }
+ } finally {
+ lock.unlock();
+ }
+ }
+
+ /**
+ * Stops the SCM service.
+ */
+ @Override
public void stop() {
+ stopBalancer();
+ }
+
+ /**
+ * Stops ContainerBalancer gracefully.
+ */
+ public void stopBalancer() {
lock.lock();
try {
- // we should stop the balancer thread gracefully
- if (!balancerRunning) {
+ if (!isBalancerRunning()) {
LOG.info("Container Balancer is not running.");
return;
}
- balancerRunning = false;
+ stopBalancingThread();
} finally {
lock.unlock();
}
+ }
- // wait for currentBalancingThread to die
- if (Thread.currentThread().getId() != currentBalancingThread.getId()) {
- currentBalancingThread.interrupt();
+ private void stopBalancingThread() {
+ Thread balancingThread;
+ lock.lock();
+ try {
+ balancerRunning = false;
+ balancingThread = currentBalancingThread;
+ currentBalancingThread = null;
+ } finally {
+ lock.unlock();
+ }
+ // wait for balancingThread to die
+ if (balancingThread != null &&
+ balancingThread.getId() != Thread.currentThread().getId()) {
+ balancingThread.interrupt();
try {
- currentBalancingThread.join();
+ balancingThread.join();
} catch (InterruptedException exception) {
Thread.currentThread().interrupt();
}
}
- currentBalancingThread = null;
LOG.info("Container Balancer stopped successfully.");
}
- private boolean validateConfiguration(ContainerBalancerConfiguration conf) {
+ private void validateConfiguration(ContainerBalancerConfiguration conf)
+ throws InvalidContainerBalancerConfigurationException {
// maxSizeEnteringTarget and maxSizeLeavingSource should by default be
// greater than container size
long size = (long) ozoneConfiguration.getStorageSize(
@@ -860,25 +954,30 @@ public class ContainerBalancer {
ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE_DEFAULT, StorageUnit.BYTES);
if (conf.getMaxSizeEnteringTarget() <= size) {
- LOG.info("MaxSizeEnteringTarget should be larger than " +
- "ozone.scm.container.size");
- return false;
+ LOG.warn("hdds.container.balancer.size.entering.target.max {} should " +
+ "be greater than ozone.scm.container.size {}",
+ conf.getMaxSizeEnteringTarget(), size);
+ throw new InvalidContainerBalancerConfigurationException(
+ "hdds.container.balancer.size.entering.target.max should be greater" +
+ " than ozone.scm.container.size");
}
if (conf.getMaxSizeLeavingSource() <= size) {
- LOG.info("MaxSizeLeavingSource should be larger than " +
- "ozone.scm.container.size");
- return false;
+ LOG.warn("hdds.container.balancer.size.leaving.source.max {} should " +
+ "be greater than ozone.scm.container.size {}",
+ conf.getMaxSizeLeavingSource(), size);
+ throw new InvalidContainerBalancerConfigurationException(
+ "hdds.container.balancer.size.leaving.source.max should be greater" +
+ " than ozone.scm.container.size");
}
// balancing interval should be greater than DUFactory refresh period
DUFactory.Conf duConf = ozoneConfiguration.getObject(DUFactory.Conf.class);
- long balancingInterval = duConf.getRefreshPeriod().toMillis();
- if (conf.getBalancingInterval().toMillis() <= balancingInterval) {
- LOG.info("balancing.iteration.interval should be larger than " +
- "hdds.datanode.du.refresh.period.");
- return false;
+ long refreshPeriod = duConf.getRefreshPeriod().toMillis();
+ if (conf.getBalancingInterval().toMillis() <= refreshPeriod) {
+ LOG.warn("hdds.container.balancer.balancing.iteration.interval {} " +
+ "should be greater than hdds.datanode.du.refresh.period {}",
+ conf.getBalancingInterval(), refreshPeriod);
}
- return true;
}
public void setNodeManager(NodeManager nodeManager) {
@@ -900,6 +999,15 @@ public class ContainerBalancer {
this.ozoneConfiguration = ozoneConfiguration;
}
+ /**
+ * Sets the configuration that ContainerBalancer will use. This should be
+ * set before starting balancer.
+ * @param config ContainerBalancerConfiguration
+ */
+ public void setConfig(ContainerBalancerConfiguration config) {
+ this.config = config;
+ }
+
/**
* Gets the list of unBalanced nodes, that is, the over and under utilized
* nodes in the cluster.
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/IllegalContainerBalancerStateException.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/IllegalContainerBalancerStateException.java
new file mode 100644
index 0000000000..cc938e636f
--- /dev/null
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/IllegalContainerBalancerStateException.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hdds.scm.container.balancer;
+
+/**
+ * Signals that a state change cannot be performed on ContainerBalancer.
+ */
+public class IllegalContainerBalancerStateException extends Exception {
+
+ /**
+ * Constructs an IllegalContainerBalancerStateException with no detail
+ * message. A detail message is a String that describes this particular
+ * exception.
+ */
+ public IllegalContainerBalancerStateException() {
+ super();
+ }
+
+ /**
+ * Constructs an IllegalContainerBalancerStateException with the specified
+ * detail message. A detail message is a String that describes this particular
+ * exception.
+ *
+ * @param s the String that contains a detailed message
+ */
+ public IllegalContainerBalancerStateException(String s) {
+ super(s);
+ }
+
+}
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/InvalidContainerBalancerConfigurationException.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/InvalidContainerBalancerConfigurationException.java
new file mode 100644
index 0000000000..c6a6bf030b
--- /dev/null
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/InvalidContainerBalancerConfigurationException.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hdds.scm.container.balancer;
+
+/**
+ * Signals that {@link ContainerBalancerConfiguration} contains invalid
+ * configuration value(s).
+ */
+public class InvalidContainerBalancerConfigurationException extends Exception {
+
+ /**
+ * Constructs an InvalidContainerBalancerConfigurationException with no detail
+ * message. A detail message is a String that describes this particular
+ * exception.
+ */
+ public InvalidContainerBalancerConfigurationException() {
+ super();
+ }
+
+ /**
+ * Constructs an InvalidContainerBalancerConfigurationException with the
+ * specified detail message. A detail message is a String that describes
+ * this particular exception.
+ *
+ * @param s the String that contains a detailed message
+ */
+ public InvalidContainerBalancerConfigurationException(String s) {
+ super(s);
+ }
+
+}
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java
index ba4c2a6cf6..26b7c0bb4f 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java
@@ -794,11 +794,10 @@ public final class StorageContainerLocationProtocolServerSideTranslatorPB
Optional.of(request.getMaxSizeLeavingSourceInGB());
}
- return StartContainerBalancerResponseProto.newBuilder().
- setStart(impl.startContainerBalancer(threshold,
- iterations, maxDatanodesPercentageToInvolvePerIteration,
- maxSizeToMovePerIterationInGB,
- maxSizeEnteringTargetInGB, maxSizeLeavingSourceInGB)).build();
+ return impl.startContainerBalancer(threshold, iterations,
+ maxDatanodesPercentageToInvolvePerIteration,
+ maxSizeToMovePerIterationInGB, maxSizeEnteringTargetInGB,
+ maxSizeLeavingSourceInGB);
}
public StopContainerBalancerResponseProto stopContainerBalancer(
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java
index d7ee5368c1..a4d1ae4789 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java
@@ -34,6 +34,7 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos;
+import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StartContainerBalancerResponseProto;
import org.apache.hadoop.hdds.scm.DatanodeAdminError;
import org.apache.hadoop.hdds.scm.ScmInfo;
import org.apache.hadoop.hdds.scm.container.ContainerID;
@@ -41,7 +42,10 @@ import org.apache.hadoop.hdds.scm.container.ContainerInfo;
import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException;
import org.apache.hadoop.hdds.scm.container.ContainerReplica;
import org.apache.hadoop.hdds.scm.container.ReplicationManagerReport;
+import org.apache.hadoop.hdds.scm.container.balancer.ContainerBalancer;
import org.apache.hadoop.hdds.scm.container.balancer.ContainerBalancerConfiguration;
+import org.apache.hadoop.hdds.scm.container.balancer.IllegalContainerBalancerStateException;
+import org.apache.hadoop.hdds.scm.container.balancer.InvalidContainerBalancerConfigurationException;
import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline;
import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMNodeStat;
import org.apache.hadoop.hdds.scm.events.SCMEvents;
@@ -775,7 +779,7 @@ public class SCMClientProtocolServer implements
}
@Override
- public boolean startContainerBalancer(
+ public StartContainerBalancerResponseProto startContainerBalancer(
Optional<Double> threshold, Optional<Integer> iterations,
Optional<Integer> maxDatanodesPercentageToInvolvePerIteration,
Optional<Long> maxSizeToMovePerIterationInGB,
@@ -830,16 +834,24 @@ public class SCMClientProtocolServer implements
cbc.setMaxSizeLeavingSource(msls * OzoneConsts.GB);
}
-
- boolean isStartedSuccessfully = scm.getContainerBalancer().start(cbc);
- if (isStartedSuccessfully) {
- AUDIT.logWriteSuccess(buildAuditMessageForSuccess(
- SCMAction.START_CONTAINER_BALANCER, null));
- } else {
- AUDIT.logWriteFailure(buildAuditMessageForSuccess(
- SCMAction.START_CONTAINER_BALANCER, null));
+ ContainerBalancer containerBalancer = scm.getContainerBalancer();
+ containerBalancer.setConfig(cbc);
+ try {
+ containerBalancer.startBalancer();
+ } catch (IllegalContainerBalancerStateException |
+ InvalidContainerBalancerConfigurationException e) {
+ AUDIT.logWriteFailure(buildAuditMessageForFailure(
+ SCMAction.START_CONTAINER_BALANCER, null, e));
+ return StartContainerBalancerResponseProto.newBuilder()
+ .setStart(false)
+ .setMessage(e.getMessage())
+ .build();
}
- return isStartedSuccessfully;
+ AUDIT.logWriteSuccess(buildAuditMessageForSuccess(
+ SCMAction.START_CONTAINER_BALANCER, null));
+ return StartContainerBalancerResponseProto.newBuilder()
+ .setStart(true)
+ .build();
}
@Override
@@ -847,7 +859,7 @@ public class SCMClientProtocolServer implements
getScm().checkAdminAccess(getRemoteUser());
AUDIT.logWriteSuccess(buildAuditMessageForSuccess(
SCMAction.STOP_CONTAINER_BALANCER, null));
- scm.getContainerBalancer().stop();
+ scm.getContainerBalancer().stopBalancer();
}
@Override
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
index 05ac12cea0..e869754829 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
@@ -1412,7 +1412,7 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
public void stop() {
try {
LOG.info("Stopping Container Balancer service.");
- containerBalancer.stop();
+ containerBalancer.stopBalancer();
} catch (Exception e) {
LOG.error("Failed to stop Container Balancer service.");
}
diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancer.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancer.java
index f079a5eaad..0163152134 100644
--- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancer.java
+++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancer.java
@@ -192,7 +192,7 @@ public class TestContainerBalancer {
double randomThreshold = RANDOM.nextDouble() * 100;
balancerConfiguration.setThreshold(randomThreshold);
- containerBalancer.start(balancerConfiguration);
+ startBalancer(balancerConfiguration);
// waiting for balance completed.
// TODO: this is a temporary implementation for now
@@ -206,7 +206,7 @@ public class TestContainerBalancer {
unBalancedNodesAccordingToBalancer =
containerBalancer.getUnBalancedNodes();
- containerBalancer.stop();
+ containerBalancer.stopBalancer();
Assert.assertEquals(
expectedUnBalancedNodes.size(),
unBalancedNodesAccordingToBalancer.size());
@@ -225,11 +225,11 @@ public class TestContainerBalancer {
@Test
public void unBalancedNodesListShouldBeEmptyWhenClusterIsBalanced() {
balancerConfiguration.setThreshold(99.99);
- containerBalancer.start(balancerConfiguration);
+ startBalancer(balancerConfiguration);
sleepWhileBalancing(100);
- containerBalancer.stop();
+ containerBalancer.stopBalancer();
ContainerBalancerMetrics metrics = containerBalancer.getMetrics();
Assert.assertEquals(0, containerBalancer.getUnBalancedNodes().size());
Assert.assertEquals(0, metrics.getNumDatanodesUnbalanced());
@@ -247,7 +247,7 @@ public class TestContainerBalancer {
balancerConfiguration.setMaxSizeToMovePerIteration(100 * OzoneConsts.GB);
balancerConfiguration.setThreshold(1);
balancerConfiguration.setIterations(1);
- containerBalancer.start(balancerConfiguration);
+ startBalancer(balancerConfiguration);
sleepWhileBalancing(500);
@@ -258,7 +258,7 @@ public class TestContainerBalancer {
Assert.assertTrue(metrics.getNumDatanodesInvolvedInLatestIteration() > 0);
Assert.assertFalse(
metrics.getNumDatanodesInvolvedInLatestIteration() > number);
- containerBalancer.stop();
+ containerBalancer.stopBalancer();
}
@Test
@@ -268,9 +268,9 @@ public class TestContainerBalancer {
containerInfo.setState(HddsProtos.LifeCycleState.OPEN);
}
balancerConfiguration.setThreshold(10);
- containerBalancer.start(balancerConfiguration);
+ startBalancer(balancerConfiguration);
sleepWhileBalancing(500);
- containerBalancer.stop();
+ containerBalancer.stopBalancer();
// balancer should have identified unbalanced nodes
Assert.assertFalse(containerBalancer.getUnBalancedNodes().isEmpty());
@@ -288,9 +288,9 @@ public class TestContainerBalancer {
for (ContainerInfo containerInfo : cidToInfoMap.values()) {
containerInfo.setState(HddsProtos.LifeCycleState.CLOSED);
}
- containerBalancer.start(balancerConfiguration);
+ startBalancer(balancerConfiguration);
sleepWhileBalancing(500);
- containerBalancer.stop();
+ containerBalancer.stopBalancer();
// check whether all selected containers are closed
for (ContainerMoveSelection moveSelection:
@@ -306,7 +306,7 @@ public class TestContainerBalancer {
balancerConfiguration.setThreshold(1);
balancerConfiguration.setMaxSizeToMovePerIteration(10 * OzoneConsts.GB);
balancerConfiguration.setIterations(1);
- containerBalancer.start(balancerConfiguration);
+ startBalancer(balancerConfiguration);
sleepWhileBalancing(500);
@@ -318,7 +318,7 @@ public class TestContainerBalancer {
.getDataSizeMovedGBInLatestIteration();
Assert.assertTrue(size > 0);
Assert.assertFalse(size > 10);
- containerBalancer.stop();
+ containerBalancer.stopBalancer();
}
@Test
@@ -326,7 +326,7 @@ public class TestContainerBalancer {
balancerConfiguration.setThreshold(10);
balancerConfiguration.setMaxSizeToMovePerIteration(100 * OzoneConsts.GB);
balancerConfiguration.setMaxDatanodesPercentageToInvolvePerIteration(100);
- containerBalancer.start(balancerConfiguration);
+ startBalancer(balancerConfiguration);
// waiting for balance completed.
// TODO: this is a temporary implementation for now
@@ -335,7 +335,7 @@ public class TestContainerBalancer {
Thread.sleep(1000);
} catch (InterruptedException e) { }
- containerBalancer.stop();
+ containerBalancer.stopBalancer();
Map<DatanodeDetails, ContainerMoveSelection> sourceToTargetMap =
containerBalancer.getSourceToTargetMap();
for (ContainerMoveSelection moveSelection : sourceToTargetMap.values()) {
@@ -353,7 +353,7 @@ public class TestContainerBalancer {
balancerConfiguration.setThreshold(10);
balancerConfiguration.setMaxSizeToMovePerIteration(50 * OzoneConsts.GB);
balancerConfiguration.setMaxDatanodesPercentageToInvolvePerIteration(100);
- containerBalancer.start(balancerConfiguration);
+ startBalancer(balancerConfiguration);
// waiting for balance completed.
// TODO: this is a temporary implementation for now
@@ -362,7 +362,7 @@ public class TestContainerBalancer {
Thread.sleep(1000);
} catch (InterruptedException e) { }
- containerBalancer.stop();
+ containerBalancer.stopBalancer();
Map<DatanodeDetails, ContainerMoveSelection> sourceToTargetMap =
containerBalancer.getSourceToTargetMap();
@@ -396,7 +396,7 @@ public class TestContainerBalancer {
balancerConfiguration.setMaxDatanodesPercentageToInvolvePerIteration(100);
balancerConfiguration.setMaxSizeToMovePerIteration(50 * OzoneConsts.GB);
balancerConfiguration.setMaxSizeEnteringTarget(50 * OzoneConsts.GB);
- containerBalancer.start(balancerConfiguration);
+ startBalancer(balancerConfiguration);
// waiting for balance completed.
// TODO: this is a temporary implementation for now
@@ -406,7 +406,7 @@ public class TestContainerBalancer {
} catch (InterruptedException e) {
}
- containerBalancer.stop();
+ containerBalancer.stopBalancer();
for (ContainerMoveSelection moveSelection :
containerBalancer.getSourceToTargetMap().values()) {
DatanodeDetails target = moveSelection.getTargetNode();
@@ -424,7 +424,7 @@ public class TestContainerBalancer {
balancerConfiguration.setMaxSizeToMovePerIteration(50 * OzoneConsts.GB);
balancerConfiguration.setMaxSizeEnteringTarget(50 * OzoneConsts.GB);
- containerBalancer.start(balancerConfiguration);
+ startBalancer(balancerConfiguration);
// waiting for balance completed.
// TODO: this is a temporary implementation for now
@@ -433,7 +433,7 @@ public class TestContainerBalancer {
Thread.sleep(1000);
} catch (InterruptedException e) { }
- containerBalancer.stop();
+ containerBalancer.stopBalancer();
Set<ContainerID> containers = new HashSet<>();
for (ContainerMoveSelection moveSelection :
containerBalancer.getSourceToTargetMap().values()) {
@@ -451,7 +451,7 @@ public class TestContainerBalancer {
balancerConfiguration.setMaxSizeEnteringTarget(50 * OzoneConsts.GB);
balancerConfiguration.setExcludeContainers("1, 4, 5");
- containerBalancer.start(balancerConfiguration);
+ startBalancer(balancerConfiguration);
// waiting for balance completed.
// TODO: this is a temporary implementation for now
@@ -460,7 +460,7 @@ public class TestContainerBalancer {
Thread.sleep(1000);
} catch (InterruptedException e) { }
- containerBalancer.stop();
+ containerBalancer.stopBalancer();
Set<ContainerID> excludeContainers =
balancerConfiguration.getExcludeContainers();
for (ContainerMoveSelection moveSelection :
@@ -472,30 +472,31 @@ public class TestContainerBalancer {
@Test
public void balancerShouldObeyMaxSizeEnteringTargetLimit() {
+ conf.set("ozone.scm.container.size", "1MB");
+ balancerConfiguration =
+ conf.getObject(ContainerBalancerConfiguration.class);
balancerConfiguration.setThreshold(10);
balancerConfiguration.setMaxDatanodesPercentageToInvolvePerIteration(100);
balancerConfiguration.setMaxSizeToMovePerIteration(50 * OzoneConsts.GB);
- // no containers should be selected when the limit is zero
- balancerConfiguration.setMaxSizeEnteringTarget(0);
- boolean startResult = containerBalancer.start(balancerConfiguration);
+ // no containers should be selected when the limit is just 2 MB
+ balancerConfiguration.setMaxSizeEnteringTarget(2 * OzoneConsts.MB);
+ startBalancer(balancerConfiguration);
+ sleepWhileBalancing(500);
- Assert.assertFalse(startResult);
+ Assert.assertFalse(containerBalancer.getUnBalancedNodes().isEmpty());
+ Assert.assertTrue(containerBalancer.getSourceToTargetMap().isEmpty());
+ containerBalancer.stopBalancer();
// some containers should be selected when using default values
OzoneConfiguration ozoneConfiguration = new OzoneConfiguration();
ContainerBalancerConfiguration cbc = ozoneConfiguration.
getObject(ContainerBalancerConfiguration.class);
- containerBalancer.start(cbc);
+ startBalancer(cbc);
- // waiting for balance completed.
- // TODO: this is a temporary implementation for now
- // modify this after balancer is fully completed
- try {
- Thread.sleep(500);
- } catch (InterruptedException e) { }
+ sleepWhileBalancing(500);
- containerBalancer.stop();
+ containerBalancer.stopBalancer();
// balancer should have identified unbalanced nodes
Assert.assertFalse(containerBalancer.getUnBalancedNodes().isEmpty());
Assert.assertFalse(containerBalancer.getSourceToTargetMap().isEmpty());
@@ -512,8 +513,9 @@ public class TestContainerBalancer {
balancerConfiguration.setMaxSizeToMovePerIteration(6 * OzoneConsts.GB);
balancerConfiguration.setMaxDatanodesPercentageToInvolvePerIteration(100);
- containerBalancer.start(balancerConfiguration);
+ startBalancer(balancerConfiguration);
sleepWhileBalancing(500);
+ containerBalancer.stopBalancer();
ContainerBalancerMetrics metrics = containerBalancer.getMetrics();
Assert.assertEquals(determineExpectedUnBalancedNodes(
@@ -521,7 +523,6 @@ public class TestContainerBalancer {
metrics.getNumDatanodesUnbalanced());
Assert.assertTrue(metrics.getDataSizeMovedGBInLatestIteration() <= 6);
Assert.assertEquals(1, metrics.getNumIterations());
- containerBalancer.stop();
}
/**
@@ -565,9 +566,9 @@ public class TestContainerBalancer {
balancerConfiguration.setExcludeNodes(excludeNodes);
balancerConfiguration.setIncludeNodes(includeNodes);
- containerBalancer.start(balancerConfiguration);
+ startBalancer(balancerConfiguration);
sleepWhileBalancing(500);
- containerBalancer.stop();
+ containerBalancer.stopBalancer();
// finally, these should be the only nodes included in balancing
// (included - excluded)
@@ -611,7 +612,7 @@ public class TestContainerBalancer {
balancerConfiguration.setMaxSizeToMovePerIteration(100 * OzoneConsts.GB);
balancerConfiguration.setMaxDatanodesPercentageToInvolvePerIteration(100);
- containerBalancer.start(balancerConfiguration);
+ startBalancer(balancerConfiguration);
sleepWhileBalancing(1000);
/*
@@ -633,7 +634,7 @@ public class TestContainerBalancer {
ReplicationManager.MoveResult.REPLICATION_FAIL_NODE_UNHEALTHY));
balancerConfiguration.setMaxSizeToMovePerIteration(10 * OzoneConsts.GB);
- containerBalancer.start(balancerConfiguration);
+ startBalancer(balancerConfiguration);
sleepWhileBalancing(1000);
Assert.assertEquals(ContainerBalancer.IterationResult.ITERATION_COMPLETED,
@@ -823,4 +824,14 @@ public class TestContainerBalancer {
}
}
+ private void startBalancer(ContainerBalancerConfiguration config) {
+ containerBalancer.setConfig(config);
+ try {
+ containerBalancer.startBalancer();
+ } catch (IllegalContainerBalancerStateException |
+ InvalidContainerBalancerConfigurationException e) {
+ LOG.info("Could not start ContainerBalancer while testing", e);
+ }
+ }
+
}
diff --git a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerBalancerStartSubcommand.java b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerBalancerStartSubcommand.java
index c1e6806c3c..7b349d1426 100644
--- a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerBalancerStartSubcommand.java
+++ b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerBalancerStartSubcommand.java
@@ -18,6 +18,7 @@
package org.apache.hadoop.hdds.scm.cli;
import org.apache.hadoop.hdds.cli.HddsVersionProvider;
+import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StartContainerBalancerResponseProto;
import org.apache.hadoop.hdds.scm.client.ScmClient;
import picocli.CommandLine.Command;
import picocli.CommandLine.Option;
@@ -71,15 +72,18 @@ public class ContainerBalancerStartSubcommand extends ScmSubcommand {
@Override
public void execute(ScmClient scmClient) throws IOException {
- boolean result = scmClient.startContainerBalancer(threshold, iterations,
+ StartContainerBalancerResponseProto response = scmClient.
+ startContainerBalancer(threshold, iterations,
maxDatanodesPercentageToInvolvePerIteration,
maxSizeToMovePerIterationInGB, maxSizeEnteringTargetInGB,
maxSizeLeavingSourceInGB);
- if (result) {
+ if (response.getStart()) {
System.out.println("Container Balancer started successfully.");
- return;
+ } else {
+ System.out.println("Failed to start Container Balancer.");
+ if (response.hasMessage()) {
+ System.out.printf("Failure reason: %s", response.getMessage());
+ }
}
- System.out.println("Container Balancer is either already running or " +
- "failed to start.\nPlease check the logs for more info.");
}
}
\ No newline at end of file
diff --git a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java
index b12321b8bb..af7337afdc 100644
--- a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java
+++ b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java
@@ -26,6 +26,7 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto;
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ReadContainerResponseProto;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
+import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StartContainerBalancerResponseProto;
import org.apache.hadoop.hdds.scm.DatanodeAdminError;
import org.apache.hadoop.hdds.scm.ScmConfigKeys;
import org.apache.hadoop.hdds.scm.XceiverClientManager;
@@ -580,7 +581,7 @@ public class ContainerOperationClient implements ScmClient {
}
@Override
- public boolean startContainerBalancer(
+ public StartContainerBalancerResponseProto startContainerBalancer(
Optional<Double> threshold, Optional<Integer> iterations,
Optional<Integer> maxDatanodesPercentageToInvolvePerIteration,
Optional<Long> maxSizeToMovePerIterationInGB,
diff --git a/hadoop-hdds/tools/src/test/java/org/apache/hadoop/hdds/scm/cli/datanode/TestContainerBalancerSubCommand.java b/hadoop-hdds/tools/src/test/java/org/apache/hadoop/hdds/scm/cli/datanode/TestContainerBalancerSubCommand.java
index 8ce07ec3eb..a1bc25dab6 100644
--- a/hadoop-hdds/tools/src/test/java/org/apache/hadoop/hdds/scm/cli/datanode/TestContainerBalancerSubCommand.java
+++ b/hadoop-hdds/tools/src/test/java/org/apache/hadoop/hdds/scm/cli/datanode/TestContainerBalancerSubCommand.java
@@ -17,6 +17,7 @@
*/
package org.apache.hadoop.hdds.scm.cli.datanode;
+import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos;
import org.apache.hadoop.hdds.scm.cli.ContainerBalancerStopSubcommand;
import org.apache.hadoop.hdds.scm.cli.ContainerBalancerStartSubcommand;
import org.apache.hadoop.hdds.scm.cli.ContainerBalancerStatusSubcommand;
@@ -115,8 +116,12 @@ public class TestContainerBalancerSubCommand {
throws IOException {
ScmClient scmClient = mock(ScmClient.class);
Mockito.when(scmClient.startContainerBalancer(
- null, null, null, null, null, null))
- .thenAnswer(invocation -> true);
+ null, null, null, null, null, null))
+ .thenReturn(
+ StorageContainerLocationProtocolProtos
+ .StartContainerBalancerResponseProto.newBuilder()
+ .setStart(true)
+ .build());
startCmd.execute(scmClient);
Pattern p = Pattern.compile("^Container\\sBalancer\\sstarted" +
@@ -130,13 +135,16 @@ public class TestContainerBalancerSubCommand {
throws IOException {
ScmClient scmClient = mock(ScmClient.class);
Mockito.when(scmClient.startContainerBalancer(
- null, null, null, null, null, null))
- .thenAnswer(invocation -> false);
+ null, null, null, null, null, null))
+ .thenReturn(StorageContainerLocationProtocolProtos
+ .StartContainerBalancerResponseProto.newBuilder()
+ .setStart(false)
+ .setMessage("")
+ .build());
startCmd.execute(scmClient);
- Pattern p = Pattern.compile("^Container\\sBalancer\\sis\\seither" +
- "\\salready\\srunning\\sor\\sfailed\\sto\\sstart.\\nPlease\\scheck" +
- "\\sthe\\slogs\\sfor\\smore\\sinfo.");
+ Pattern p = Pattern.compile("^Failed\\sto\\sstart\\sContainer" +
+ "\\sBalancer.");
Matcher m = p.matcher(outContent.toString(DEFAULT_ENCODING));
assertTrue(m.find());
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@ozone.apache.org
For additional commands, e-mail: commits-help@ozone.apache.org