You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ozone.apache.org by ad...@apache.org on 2022/06/23 19:02:11 UTC
[ozone] branch master updated: HDDS-5916. Datanodes stuck in leader election in Kubernetes (#3186)
This is an automated email from the ASF dual-hosted git repository.
adoroszlai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 2ffbfff0c1 HDDS-5916. Datanodes stuck in leader election in Kubernetes (#3186)
2ffbfff0c1 is described below
commit 2ffbfff0c199898444c2ea3163e782b1dd9832e3
Author: sokui <xi...@gmail.com>
AuthorDate: Thu Jun 23 12:02:06 2022 -0700
HDDS-5916. Datanodes stuck in leader election in Kubernetes (#3186)
---
.../org/apache/hadoop/hdds/ratis/RatisHelper.java | 23 ++-
.../hadoop/hdds/scm/net/NetworkTopology.java | 8 +
.../hadoop/hdds/scm/net/NetworkTopologyImpl.java | 71 +++++++-
.../hdds/scm/net/TestNetworkTopologyImpl.java | 38 ++++
.../common/states/datanode/InitDatanodeState.java | 2 +-
.../hadoop/hdds/server/events/EventQueue.java | 24 ++-
.../apache/hadoop/hdds/scm/events/SCMEvents.java | 7 +
.../org/apache/hadoop/hdds/scm/ha/SCMService.java | 1 +
.../hadoop/hdds/scm/node/NewNodeHandler.java | 1 +
.../hdds/scm/node/NodeAddressUpdateHandler.java | 69 ++++++++
.../hadoop/hdds/scm/node/NodeStateManager.java | 22 +++
.../hadoop/hdds/scm/node/SCMNodeManager.java | 105 ++++++++---
.../hadoop/hdds/scm/node/states/NodeStateMap.java | 25 +++
.../scm/pipeline/BackgroundPipelineCreator.java | 9 +-
.../hadoop/hdds/scm/pipeline/PipelineManager.java | 15 ++
.../hdds/scm/pipeline/PipelineManagerImpl.java | 91 ++++++++--
.../pipeline/WritableRatisContainerProvider.java | 62 ++++---
.../hdds/scm/server/StorageContainerManager.java | 6 +
.../hadoop/hdds/scm/node/TestNodeStateManager.java | 38 ++++
.../hadoop/hdds/scm/node/TestSCMNodeManager.java | 67 +++++++
.../hdds/scm/pipeline/MockPipelineManager.java | 5 +
.../hdds/scm/pipeline/TestPipelineManagerImpl.java | 194 +++++++++++++++++++++
hadoop-ozone/dist/src/main/compose/restart/test.sh | 5 +
.../src/main/k8s/definitions/ozone/config.yaml | 1 +
.../main/k8s/examples/ozone/config-configmap.yaml | 1 +
.../k8s/examples/ozone/datanode-statefulset.yaml | 2 +
.../main/k8s/examples/ozone/om-statefulset.yaml | 1 +
.../main/k8s/examples/ozone/s3g-statefulset.yaml | 1 +
.../main/k8s/examples/ozone/scm-statefulset.yaml | 2 +
.../dist/src/main/k8s/examples/ozone/test.sh | 14 +-
.../freon/{validate.robot => generate-chunk.robot} | 9 +-
.../dist/src/main/smoketest/freon/generate.robot | 3 -
.../freon/{validate.robot => validate-chunk.robot} | 5 +-
.../dist/src/main/smoketest/freon/validate.robot | 3 -
.../dist/src/main/smoketest/ozone-lib/freon.robot | 4 +
35 files changed, 847 insertions(+), 87 deletions(-)
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/ratis/RatisHelper.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/ratis/RatisHelper.java
index 67a3ac14a4..be6076a918 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/ratis/RatisHelper.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/ratis/RatisHelper.java
@@ -31,6 +31,7 @@ import java.util.stream.Collectors;
import org.apache.hadoop.hdds.StringUtils;
import org.apache.hadoop.hdds.conf.ConfigurationSource;
+import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.DatanodeDetails.Port;
import org.apache.hadoop.hdds.ratis.conf.RatisClientConfig;
@@ -39,6 +40,7 @@ import org.apache.hadoop.hdds.scm.ScmConfigKeys;
import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
import org.apache.hadoop.hdds.security.x509.SecurityConfig;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.ratis.RaftConfigKeys;
import org.apache.ratis.client.RaftClient;
import org.apache.ratis.client.RaftClientConfigKeys;
@@ -65,6 +67,8 @@ public final class RatisHelper {
private static final Logger LOG = LoggerFactory.getLogger(RatisHelper.class);
+ private static final OzoneConfiguration CONF = new OzoneConfiguration();
+
// Prefix for Ratis Server GRPC and Ratis client conf.
public static final String HDDS_DATANODE_RATIS_PREFIX_KEY = "hdds.ratis";
@@ -97,7 +101,18 @@ public final class RatisHelper {
}
private static String toRaftPeerAddress(DatanodeDetails id, Port.Name port) {
- return id.getIpAddress() + ":" + id.getPort(port).getValue();
+ if (datanodeUseHostName()) {
+ final String address =
+ id.getHostName() + ":" + id.getPort(port).getValue();
+ LOG.debug("Datanode is using hostname for raft peer address: {}",
+ address);
+ return address;
+ } else {
+ final String address =
+ id.getIpAddress() + ":" + id.getPort(port).getValue();
+ LOG.debug("Datanode is using IP for raft peer address: {}", address);
+ return address;
+ }
}
public static RaftPeerId toRaftPeerId(DatanodeDetails id) {
@@ -369,6 +384,12 @@ public final class RatisHelper {
.min(Long::compareTo).orElse(null);
}
+ private static boolean datanodeUseHostName() {
+ return CONF.getBoolean(
+ DFSConfigKeys.DFS_DATANODE_USE_DN_HOSTNAME,
+ DFSConfigKeys.DFS_DATANODE_USE_DN_HOSTNAME_DEFAULT);
+ }
+
private static <U> Class<? extends U> getClass(String name,
Class<U> xface) {
try {
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/net/NetworkTopology.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/net/NetworkTopology.java
index 10184ae00e..c863dc3da5 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/net/NetworkTopology.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/net/NetworkTopology.java
@@ -39,6 +39,14 @@ public interface NetworkTopology {
*/
void add(Node node);
+ /**
+ * Update a node. This will be called when a datanode needs to be updated.
+ * If the old datanode does not exist, then just add the new datanode.
+ * @param oldNode node to be updated; can be null
+ * @param newNode node to update to; cannot be null
+ */
+ void update(Node oldNode, Node newNode);
+
/**
* Remove a node from the network topology. This will be called when a
* existing datanode is removed from the system.
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/net/NetworkTopologyImpl.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/net/NetworkTopologyImpl.java
index f13a50b9a3..a79c73d9bf 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/net/NetworkTopologyImpl.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/net/NetworkTopologyImpl.java
@@ -116,6 +116,59 @@ public class NetworkTopologyImpl implements NetworkTopology {
}
}
+ /**
+ * Update a leaf node. It is called when a datanode needs to be updated.
+ * If the old datanode does not exist, then just add the new datanode.
+ * @param oldNode node to be updated; can be null
+ * @param newNode node to update to; cannot be null
+ */
+ @Override
+ public void update(Node oldNode, Node newNode) {
+ Preconditions.checkArgument(newNode != null, "newNode cannot be null");
+ if (oldNode != null && oldNode instanceof InnerNode) {
+ throw new IllegalArgumentException(
+ "Not allowed to update an inner node: "
+ + oldNode.getNetworkFullPath());
+ }
+
+ if (newNode instanceof InnerNode) {
+ throw new IllegalArgumentException(
+ "Not allowed to update a leaf node to an inner node: "
+ + newNode.getNetworkFullPath());
+ }
+
+ int newDepth = NetUtils.locationToDepth(newNode.getNetworkLocation()) + 1;
+ // Check depth
+ if (maxLevel != newDepth) {
+ throw new InvalidTopologyException("Failed to update to " +
+ newNode.getNetworkFullPath()
+ + ": Its path depth is not "
+ + maxLevel);
+ }
+
+ netlock.writeLock().lock();
+ boolean add;
+ try {
+ boolean exist = false;
+ if (oldNode != null) {
+ exist = containsNode(oldNode);
+ }
+ if (exist) {
+ clusterTree.remove(oldNode);
+ }
+
+ add = clusterTree.add(newNode);
+ } finally {
+ netlock.writeLock().unlock();
+ }
+ if (add) {
+ LOG.info("Updated to the new node: {}", newNode.getNetworkFullPath());
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("NetworkTopology became:\n{}", this);
+ }
+ }
+ }
+
/**
* Remove a node from the network topology. This will be called when a
* existing datanode is removed from the system.
@@ -150,16 +203,20 @@ public class NetworkTopologyImpl implements NetworkTopology {
Preconditions.checkArgument(node != null, "node cannot be null");
netlock.readLock().lock();
try {
- Node parent = node.getParent();
- while (parent != null && parent != clusterTree) {
- parent = parent.getParent();
- }
- if (parent == clusterTree) {
- return true;
- }
+ return containsNode(node);
} finally {
netlock.readLock().unlock();
}
+ }
+
+ private boolean containsNode(Node node) {
+ Node parent = node.getParent();
+ while (parent != null && parent != clusterTree) {
+ parent = parent.getParent();
+ }
+ if (parent == clusterTree) {
+ return true;
+ }
return false;
}
diff --git a/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/scm/net/TestNetworkTopologyImpl.java b/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/scm/net/TestNetworkTopologyImpl.java
index 53991f3382..12b75d2b79 100644
--- a/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/scm/net/TestNetworkTopologyImpl.java
+++ b/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/scm/net/TestNetworkTopologyImpl.java
@@ -935,6 +935,44 @@ public class TestNetworkTopologyImpl {
}
@Test
+ public void testUpdateNode() {
+ List<NodeSchema> schemas = new ArrayList<>();
+ schemas.add(ROOT_SCHEMA);
+ schemas.add(DATACENTER_SCHEMA);
+ schemas.add(RACK_SCHEMA);
+ schemas.add(LEAF_SCHEMA);
+
+ NodeSchemaManager manager = NodeSchemaManager.getInstance();
+ manager.init(schemas.toArray(new NodeSchema[0]), true);
+ NetworkTopology newCluster =
+ new NetworkTopologyImpl(manager);
+ Node node = createDatanode("1.1.1.1", "/d1/r1");
+ newCluster.add(node);
+ assertTrue(newCluster.contains(node));
+
+ // update
+ Node newNode = createDatanode("1.1.1.2", "/d1/r1");
+ assertFalse(newCluster.contains(newNode));
+ newCluster.update(node, newNode);
+ assertFalse(newCluster.contains(node));
+ assertTrue(newCluster.contains(newNode));
+
+ // update a non-existing node
+ Node nodeExisting = createDatanode("1.1.1.3", "/d1/r1");
+ Node newNode2 = createDatanode("1.1.1.4", "/d1/r1");
+ assertFalse(newCluster.contains(nodeExisting));
+ assertFalse(newCluster.contains(newNode2));
+
+ newCluster.update(nodeExisting, newNode2);
+ assertFalse(newCluster.contains(nodeExisting));
+ assertTrue(newCluster.contains(newNode2));
+
+ // old node is null
+ Node newNode3 = createDatanode("1.1.1.5", "/d1/r1");
+ assertFalse(newCluster.contains(newNode3));
+ newCluster.update(null, newNode3);
+ assertTrue(newCluster.contains(newNode3));
+ }
public void testIsAncestor() {
NodeImpl r1 = new NodeImpl("r1", "/", NODE_COST_DEFAULT);
NodeImpl r12 = new NodeImpl("r12", "/", NODE_COST_DEFAULT);
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/datanode/InitDatanodeState.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/datanode/InitDatanodeState.java
index ff53088a5d..cd469620e1 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/datanode/InitDatanodeState.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/datanode/InitDatanodeState.java
@@ -125,7 +125,7 @@ public class InitDatanodeState implements DatanodeState,
File idPath = new File(dataNodeIDPath);
DatanodeDetails datanodeDetails = this.context.getParent()
.getDatanodeDetails();
- if (datanodeDetails != null && !idPath.exists()) {
+ if (datanodeDetails != null) {
try {
ContainerUtils.writeDatanodeDetailsTo(datanodeDetails, idPath);
} catch (IOException ex) {
diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/events/EventQueue.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/events/EventQueue.java
index e3a18b7427..a0b8ac9553 100644
--- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/events/EventQueue.java
+++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/events/EventQueue.java
@@ -26,6 +26,9 @@ import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Collectors;
import java.util.stream.Stream;
+import com.google.gson.ExclusionStrategy;
+import com.google.gson.FieldAttributes;
+import org.apache.hadoop.hdds.scm.net.NodeImpl;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Time;
@@ -58,10 +61,29 @@ public class EventQueue implements EventPublisher, AutoCloseable {
private boolean isRunning = true;
- private static final Gson TRACING_SERIALIZER = new GsonBuilder().create();
+ private static final Gson TRACING_SERIALIZER = new GsonBuilder()
+ .setExclusionStrategies(new DatanodeDetailsGsonExclusionStrategy())
+ .create();
private boolean isSilent = false;
+ // The field parent in DatanodeDetails class has the circular reference
+ // which will result in Gson infinite recursive parsing. We need to exclude
+ // this field when generating json string for DatanodeDetails object
+ static class DatanodeDetailsGsonExclusionStrategy
+ implements ExclusionStrategy {
+ @Override
+ public boolean shouldSkipField(FieldAttributes f) {
+ return f.getDeclaringClass() == NodeImpl.class
+ && f.getName().equals("parent");
+ }
+
+ @Override
+ public boolean shouldSkipClass(Class<?> aClass) {
+ return false;
+ }
+ }
+
/**
* Add new handler to the event queue.
* <p>
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/events/SCMEvents.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/events/SCMEvents.java
index 4bb59dab54..c51d792d4c 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/events/SCMEvents.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/events/SCMEvents.java
@@ -157,6 +157,13 @@ public final class SCMEvents {
public static final TypedEvent<DatanodeDetails> NEW_NODE =
new TypedEvent<>(DatanodeDetails.class, "New_Node");
+ /**
+ * This event will be triggered whenever a datanode is registered with
+ * SCM with a different Ip or host name.
+ */
+ public static final TypedEvent<DatanodeDetails> NODE_ADDRESS_UPDATE =
+ new TypedEvent<>(DatanodeDetails.class, "Node_Address_Update");
+
/**
* This event will be triggered whenever a datanode is moved from healthy to
* stale state.
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMService.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMService.java
index 2b185c9e4e..189d6befd5 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMService.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMService.java
@@ -60,6 +60,7 @@ public interface SCMService {
enum Event {
PRE_CHECK_COMPLETED,
NEW_NODE_HANDLER_TRIGGERED,
+ NODE_ADDRESS_UPDATE_HANDLER_TRIGGERED,
UNHEALTHY_TO_HEALTHY_NODE_HANDLER_TRIGGERED
}
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NewNodeHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NewNodeHandler.java
index 674cf2dfcc..612ab048c7 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NewNodeHandler.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NewNodeHandler.java
@@ -56,6 +56,7 @@ public class NewNodeHandler implements EventHandler<DatanodeDetails> {
public void onMessage(DatanodeDetails datanodeDetails,
EventPublisher publisher) {
try {
+ pipelineManager.closeStalePipelines(datanodeDetails);
serviceManager.notifyEventTriggered(Event.NEW_NODE_HANDLER_TRIGGERED);
if (datanodeDetails.getPersistedOpState()
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeAddressUpdateHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeAddressUpdateHandler.java
new file mode 100644
index 0000000000..7c9c628176
--- /dev/null
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeAddressUpdateHandler.java
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.hadoop.hdds.scm.node;
+
+import org.apache.hadoop.hdds.protocol.DatanodeDetails;
+import org.apache.hadoop.hdds.scm.ha.SCMService;
+import org.apache.hadoop.hdds.scm.ha.SCMServiceManager;
+import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException;
+import org.apache.hadoop.hdds.scm.pipeline.PipelineManager;
+import org.apache.hadoop.hdds.server.events.EventHandler;
+import org.apache.hadoop.hdds.server.events.EventPublisher;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Handles datanode ip or hostname change event.
+ */
+public class NodeAddressUpdateHandler
+ implements EventHandler<DatanodeDetails> {
+ private static final Logger LOG =
+ LoggerFactory.getLogger(NodeAddressUpdateHandler.class);
+
+ private final PipelineManager pipelineManager;
+ private final NodeDecommissionManager decommissionManager;
+ private final SCMServiceManager serviceManager;
+
+ public NodeAddressUpdateHandler(PipelineManager pipelineManager,
+ NodeDecommissionManager
+ decommissionManager,
+ SCMServiceManager serviceManager) {
+ this.pipelineManager = pipelineManager;
+ this.decommissionManager = decommissionManager;
+ this.serviceManager = serviceManager;
+ }
+
+ @Override
+ public void onMessage(DatanodeDetails datanodeDetails,
+ EventPublisher publisher) {
+ try {
+ LOG.info("Closing stale pipelines for datanode: {}", datanodeDetails);
+ pipelineManager.closeStalePipelines(datanodeDetails);
+ serviceManager.notifyEventTriggered(SCMService.Event
+ .NODE_ADDRESS_UPDATE_HANDLER_TRIGGERED);
+
+ decommissionManager.continueAdminForNode(datanodeDetails);
+ } catch (NodeNotFoundException e) {
+ // Should not happen, as the node has just registered to call this event
+ // handler.
+ LOG.error(
+ "NodeNotFound when updating the node Ip or host name to the " +
+ "decommissionManager",
+ e);
+ }
+ }
+}
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java
index 1844a5e73b..da4337f68a 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java
@@ -402,6 +402,28 @@ public class NodeStateManager implements Runnable, Closeable {
.updateLastKnownLayoutVersion(layoutInfo);
}
+ /**
+ * Update node.
+ *
+ * @param datanodeDetails the datanode details
+ * @param layoutInfo the layoutInfo
+ * @throws NodeNotFoundException the node not found exception
+ */
+ public void updateNode(DatanodeDetails datanodeDetails,
+ LayoutVersionProto layoutInfo)
+ throws NodeNotFoundException {
+ DatanodeInfo datanodeInfo =
+ nodeStateMap.getNodeInfo(datanodeDetails.getUuid());
+ NodeStatus newNodeStatus = newNodeStatus(datanodeDetails, layoutInfo);
+ LOG.info("updating node {} from {} to {} with status {}",
+ datanodeDetails.getUuidString(),
+ datanodeInfo,
+ datanodeDetails,
+ newNodeStatus);
+ nodeStateMap.updateNode(datanodeDetails, newNodeStatus, layoutInfo);
+ updateLastKnownLayoutVersion(datanodeDetails, layoutInfo);
+ }
+
/**
* Returns the current state of the node.
*
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java
index 73b9dbe91f..78a1b632f2 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java
@@ -362,33 +362,36 @@ public class SCMNodeManager implements NodeManager {
.build();
}
- if (!isNodeRegistered(datanodeDetails)) {
- InetAddress dnAddress = Server.getRemoteIp();
- if (dnAddress != null) {
- // Mostly called inside an RPC, update ip and peer hostname
+ InetAddress dnAddress = Server.getRemoteIp();
+ if (dnAddress != null) {
+ // Mostly called inside an RPC, update ip
+ if (!useHostname) {
datanodeDetails.setHostName(dnAddress.getHostName());
- datanodeDetails.setIpAddress(dnAddress.getHostAddress());
}
- try {
- String dnsName;
- String networkLocation;
- datanodeDetails.setNetworkName(datanodeDetails.getUuidString());
- if (useHostname) {
- dnsName = datanodeDetails.getHostName();
- } else {
- dnsName = datanodeDetails.getIpAddress();
- }
- networkLocation = nodeResolve(dnsName);
- if (networkLocation != null) {
- datanodeDetails.setNetworkLocation(networkLocation);
- }
+ datanodeDetails.setIpAddress(dnAddress.getHostAddress());
+ }
+ String dnsName;
+ String networkLocation;
+ datanodeDetails.setNetworkName(datanodeDetails.getUuidString());
+ if (useHostname) {
+ dnsName = datanodeDetails.getHostName();
+ } else {
+ dnsName = datanodeDetails.getIpAddress();
+ }
+ networkLocation = nodeResolve(dnsName);
+ if (networkLocation != null) {
+ datanodeDetails.setNetworkLocation(networkLocation);
+ }
+
+ if (!isNodeRegistered(datanodeDetails)) {
+ try {
clusterMap.add(datanodeDetails);
nodeStateManager.addNode(datanodeDetails, layoutInfo);
// Check that datanode in nodeStateManager has topology parent set
DatanodeDetails dn = nodeStateManager.getNode(datanodeDetails);
Preconditions.checkState(dn.getParent() != null);
- addEntryTodnsToUuidMap(dnsName, datanodeDetails.getUuidString());
+ addEntryToDnsToUuidMap(dnsName, datanodeDetails.getUuidString());
// Updating Node Report, as registration is successful
processNodeReport(datanodeDetails, nodeReport);
LOG.info("Registered Data node : {}", datanodeDetails);
@@ -402,6 +405,42 @@ public class SCMNodeManager implements NodeManager {
LOG.error("Cannot find datanode {} from nodeStateManager",
datanodeDetails.toString());
}
+ } else {
+ // Update datanode if it is registered but the ip or hostname changes
+ try {
+ final DatanodeInfo datanodeInfo =
+ nodeStateManager.getNode(datanodeDetails);
+ if (!datanodeInfo.getIpAddress().equals(datanodeDetails.getIpAddress())
+ || !datanodeInfo.getHostName()
+ .equals(datanodeDetails.getHostName())) {
+ LOG.info("Updating data node {} from {} to {}",
+ datanodeDetails.getUuidString(),
+ datanodeInfo,
+ datanodeDetails);
+ clusterMap.update(datanodeInfo, datanodeDetails);
+
+ String oldDnsName;
+ if (useHostname) {
+ oldDnsName = datanodeInfo.getHostName();
+ } else {
+ oldDnsName = datanodeInfo.getIpAddress();
+ }
+ updateEntryFromDnsToUuidMap(oldDnsName,
+ dnsName,
+ datanodeDetails.getUuidString());
+
+ nodeStateManager.updateNode(datanodeDetails, layoutInfo);
+ DatanodeDetails dn = nodeStateManager.getNode(datanodeDetails);
+ Preconditions.checkState(dn.getParent() != null);
+ processNodeReport(datanodeDetails, nodeReport);
+ LOG.info("Updated Datanode to: {}", dn);
+ scmNodeEventPublisher
+ .fireEvent(SCMEvents.NODE_ADDRESS_UPDATE, dn);
+ }
+ } catch (NodeNotFoundException e) {
+ LOG.error("Cannot find datanode {} from nodeStateManager",
+ datanodeDetails);
+ }
}
return RegisteredCommand.newBuilder().setErrorCode(ErrorCode.success)
@@ -418,11 +457,9 @@ public class SCMNodeManager implements NodeManager {
* @param dnsName String representing the hostname or IP of the node
* @param uuid String representing the UUID of the registered node.
*/
- @SuppressFBWarnings(value = "AT_OPERATION_SEQUENCE_ON_CONCURRENT_ABSTRACTION",
- justification = "The method is synchronized and this is the only place " +
- "dnsToUuidMap is modified")
- private synchronized void addEntryTodnsToUuidMap(
- String dnsName, String uuid) {
+ @SuppressFBWarnings(value = "AT_OPERATION_SEQUENCE_ON_CONCURRENT_ABSTRACTION")
+ private synchronized void addEntryToDnsToUuidMap(
+ String dnsName, String uuid) {
Set<String> dnList = dnsToUuidMap.get(dnsName);
if (dnList == null) {
dnList = ConcurrentHashMap.newKeySet();
@@ -431,6 +468,26 @@ public class SCMNodeManager implements NodeManager {
dnList.add(uuid);
}
+ private synchronized void removeEntryFromDnsToUuidMap(String dnsName) {
+ if (!dnsToUuidMap.containsKey(dnsName)) {
+ return;
+ }
+ Set<String> dnSet = dnsToUuidMap.get(dnsName);
+ if (dnSet.contains(dnsName)) {
+ dnSet.remove(dnsName);
+ }
+ if (dnSet.isEmpty()) {
+ dnsToUuidMap.remove(dnsName);
+ }
+ }
+
+ private synchronized void updateEntryFromDnsToUuidMap(String oldDnsName,
+ String newDnsName,
+ String uuid) {
+ removeEntryFromDnsToUuidMap(oldDnsName);
+ addEntryToDnsToUuidMap(newDnsName, uuid);
+ }
+
/**
* Send heartbeat to indicate the datanode is alive and doing well.
*
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/states/NodeStateMap.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/states/NodeStateMap.java
index 0d8580dde7..9531c9bda9 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/states/NodeStateMap.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/states/NodeStateMap.java
@@ -91,6 +91,31 @@ public class NodeStateMap {
}
}
+ /**
+ * Update a node in NodeStateMap.
+ *
+ * @param datanodeDetails DatanodeDetails
+ * @param nodeStatus initial NodeStatus
+ * @param layoutInfo initial LayoutVersionProto
+ *
+ */
+ public void updateNode(DatanodeDetails datanodeDetails, NodeStatus nodeStatus,
+ LayoutVersionProto layoutInfo)
+
+ throws NodeNotFoundException {
+ lock.writeLock().lock();
+ try {
+ UUID id = datanodeDetails.getUuid();
+ if (!nodeMap.containsKey(id)) {
+ throw new NodeNotFoundException("Node UUID: " + id);
+ }
+ nodeMap.put(id, new DatanodeInfo(datanodeDetails, nodeStatus,
+ layoutInfo));
+ } finally {
+ lock.writeLock().unlock();
+ }
+ }
+
/**
* Updates the node health state.
*
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/BackgroundPipelineCreator.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/BackgroundPipelineCreator.java
index 2b97eaeb47..cf3ab0baf5 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/BackgroundPipelineCreator.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/BackgroundPipelineCreator.java
@@ -44,6 +44,7 @@ import java.util.concurrent.locks.ReentrantLock;
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType.RATIS;
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType.STAND_ALONE;
+import static org.apache.hadoop.hdds.scm.ha.SCMService.Event.NODE_ADDRESS_UPDATE_HANDLER_TRIGGERED;
import static org.apache.hadoop.hdds.scm.ha.SCMService.Event.UNHEALTHY_TO_HEALTHY_NODE_HANDLER_TRIGGERED;
import static org.apache.hadoop.hdds.scm.ha.SCMService.Event.NEW_NODE_HANDLER_TRIGGERED;
import static org.apache.hadoop.hdds.scm.ha.SCMService.Event.PRE_CHECK_COMPLETED;
@@ -64,7 +65,8 @@ public class BackgroundPipelineCreator implements SCMService {
* SCMService related variables.
* 1) after leaving safe mode, BackgroundPipelineCreator needs to
* wait for a while before really take effect.
- * 2) NewNodeHandler, NonHealthyToHealthyNodeHandler, PreCheckComplete
+ * 2) NewNodeHandler, NodeAddressUpdateHandler,
+ * NonHealthyToHealthyNodeHandler, PreCheckComplete
* will trigger a one-shot run of BackgroundPipelineCreator,
* no matter in safe mode or not.
*/
@@ -267,8 +269,9 @@ public class BackgroundPipelineCreator implements SCMService {
return;
}
if (event == NEW_NODE_HANDLER_TRIGGERED
- || event == UNHEALTHY_TO_HEALTHY_NODE_HANDLER_TRIGGERED
- || event == PRE_CHECK_COMPLETED) {
+ || event == NODE_ADDRESS_UPDATE_HANDLER_TRIGGERED
+ || event == UNHEALTHY_TO_HEALTHY_NODE_HANDLER_TRIGGERED
+ || event == PRE_CHECK_COMPLETED) {
LOG.info("trigger a one-shot run on {}.", THREAD_NAME);
serviceLock.lock();
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineManager.java
index ffce3146f5..afc663a893 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineManager.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineManager.java
@@ -111,6 +111,8 @@ public interface PipelineManager extends Closeable, PipelineManagerMXBean {
void closePipeline(Pipeline pipeline, boolean onTimeout) throws IOException;
+ void closeStalePipelines(DatanodeDetails datanodeDetails);
+
void scrubPipelines() throws IOException;
void startPipelineCreator();
@@ -150,6 +152,19 @@ public interface PipelineManager extends Closeable, PipelineManagerMXBean {
throws IOException {
}
+ /**
+ * Wait one pipeline to be OPEN among a collection pipelines.
+ * @param pipelineIDs ID collection of the pipelines to wait for
+ * @param timeout wait timeout(millisecond), if 0, use default timeout
+ * @return Pipeline the pipeline which is OPEN
+ * @throws IOException in case of any Exception, such as timeout
+ */
+ default Pipeline waitOnePipelineReady(Collection<PipelineID> pipelineIDs,
+ long timeout)
+ throws IOException {
+ return null;
+ }
+
/**
* Get SafeMode status.
* @return boolean
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineManagerImpl.java
index 4cb96b1d2f..4de7d658c2 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineManagerImpl.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineManagerImpl.java
@@ -43,6 +43,7 @@ import org.apache.hadoop.hdds.utils.db.Table;
import org.apache.hadoop.metrics2.util.MBeans;
import org.apache.hadoop.ozone.ClientVersion;
import org.apache.hadoop.ozone.common.statemachine.InvalidStateTransitionException;
+import org.apache.hadoop.thirdparty.com.google.common.collect.Lists;
import org.apache.ratis.protocol.exceptions.NotLeaderException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -62,6 +63,7 @@ import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.locks.ReentrantReadWriteLock;
+import java.util.stream.Collectors;
/**
* SCM Pipeline Manager implementation.
@@ -439,6 +441,46 @@ public class PipelineManagerImpl implements PipelineManager {
}
}
+ /** close the pipelines whose nodes' IPs are stale.
+ *
+ * @param datanodeDetails new datanodeDetails
+ */
+ @Override
+ public void closeStalePipelines(DatanodeDetails datanodeDetails) {
+ List<Pipeline> pipelinesWithStaleIpOrHostname =
+ getStalePipelines(datanodeDetails);
+ if (pipelinesWithStaleIpOrHostname.isEmpty()) {
+ LOG.debug("No stale pipelines for datanode {}",
+ datanodeDetails.getUuidString());
+ return;
+ }
+ LOG.info("Found {} stale pipelines",
+ pipelinesWithStaleIpOrHostname.size());
+ pipelinesWithStaleIpOrHostname.forEach(p -> {
+ try {
+ LOG.info("Closing the stale pipeline: {}", p.getId());
+ closePipeline(p, false);
+ LOG.info("Closed the stale pipeline: {}", p.getId());
+ } catch (IOException e) {
+ LOG.error("Closing the stale pipeline failed: {}", p, e);
+ }
+ });
+ }
+
+ @VisibleForTesting
+ List<Pipeline> getStalePipelines(DatanodeDetails datanodeDetails) {
+ List<Pipeline> pipelines = getPipelines();
+ return pipelines.stream()
+ .filter(p -> p.getNodes().stream()
+ .anyMatch(n -> n.getUuid()
+ .equals(datanodeDetails.getUuid())
+ && (!n.getIpAddress()
+ .equals(datanodeDetails.getIpAddress())
+ || !n.getHostName()
+ .equals(datanodeDetails.getHostName()))))
+ .collect(Collectors.toList());
+ }
+
/**
* Scrub pipelines.
*/
@@ -550,34 +592,57 @@ public class PipelineManagerImpl implements PipelineManager {
@Override
public void waitPipelineReady(PipelineID pipelineID, long timeout)
throws IOException {
+ waitOnePipelineReady(Lists.newArrayList(pipelineID), timeout);
+ }
+
+ @Override
+ public Pipeline waitOnePipelineReady(Collection<PipelineID> pipelineIDs,
+ long timeout)
+ throws IOException {
long st = clock.millis();
if (timeout == 0) {
timeout = pipelineWaitDefaultTimeout;
}
-
- boolean ready;
- Pipeline pipeline;
+ List<String> pipelineIDStrs =
+ pipelineIDs.stream()
+ .map(id -> id.getId().toString())
+ .collect(Collectors.toList());
+ String piplineIdsStr = String.join(",", pipelineIDStrs);
+ Pipeline pipeline = null;
do {
- try {
- pipeline = stateManager.getPipeline(pipelineID);
- } catch (PipelineNotFoundException e) {
- throw new PipelineNotFoundException(String.format(
- "Pipeline %s cannot be found", pipelineID));
+ boolean found = false;
+ for (PipelineID pipelineID : pipelineIDs) {
+ try {
+ Pipeline tempPipeline = stateManager.getPipeline(pipelineID);
+ found = true;
+ if (tempPipeline.isOpen()) {
+ pipeline = tempPipeline;
+ break;
+ }
+ } catch (PipelineNotFoundException e) {
+ LOG.warn("Pipeline {} cannot be found", pipelineID);
+ }
}
- ready = pipeline.isOpen();
- if (!ready) {
+
+ if (!found) {
+ throw new PipelineNotFoundException("The input pipeline IDs " +
+ piplineIdsStr + " cannot be found");
+ }
+
+ if (pipeline == null) {
try {
Thread.sleep((long)100);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
}
- } while (!ready && clock.millis() - st < timeout);
+ } while (pipeline == null && clock.millis() - st < timeout);
- if (!ready) {
+ if (pipeline == null) {
throw new IOException(String.format("Pipeline %s is not ready in %d ms",
- pipelineID, timeout));
+ piplineIdsStr, timeout));
}
+ return pipeline;
}
@Override
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/WritableRatisContainerProvider.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/WritableRatisContainerProvider.java
index e5605ccdf5..a113a0cd14 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/WritableRatisContainerProvider.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/WritableRatisContainerProvider.java
@@ -34,6 +34,7 @@ import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.List;
+import java.util.stream.Collectors;
/**
* Class to obtain a writable container for Ratis and Standalone pipelines.
@@ -92,15 +93,10 @@ public class WritableRatisContainerProvider
// mentioned in HDDS-5655.
pipelineManager.acquireReadLock();
try {
- availablePipelines = pipelineManager.getPipelines(repConfig,
- Pipeline.PipelineState.OPEN, excludeList.getDatanodes(),
- excludeList.getPipelineIds());
- if (availablePipelines.size() == 0 && !excludeList.isEmpty()) {
- // if no pipelines can be found, try finding pipeline without
- // exclusion
- availablePipelines = pipelineManager
- .getPipelines(repConfig, Pipeline.PipelineState.OPEN);
- }
+ availablePipelines =
+ findPipelinesByState(repConfig,
+ excludeList,
+ Pipeline.PipelineState.OPEN);
if (availablePipelines.size() != 0) {
containerInfo = selectContainer(availablePipelines, size, owner,
excludeList);
@@ -123,8 +119,25 @@ public class WritableRatisContainerProvider
} catch (SCMException se) {
LOG.warn("Pipeline creation failed for repConfig {} " +
- "Datanodes may be used up.", repConfig, se);
- break;
+ "Datanodes may be used up. Try to see if any pipeline is in " +
+ "ALLOCATED state, and then will wait for it to be OPEN",
+ repConfig, se);
+ List<Pipeline> allocatedPipelines = findPipelinesByState(repConfig,
+ excludeList,
+ Pipeline.PipelineState.ALLOCATED);
+ if (!allocatedPipelines.isEmpty()) {
+ List<PipelineID> allocatedPipelineIDs =
+ allocatedPipelines.stream()
+ .map(p -> p.getId())
+ .collect(Collectors.toList());
+ try {
+ pipelineManager
+ .waitOnePipelineReady(allocatedPipelineIDs, 0);
+ } catch (IOException e) {
+ LOG.warn("Waiting for one of pipelines {} to be OPEN failed. ",
+ allocatedPipelineIDs, e);
+ }
+ }
} catch (IOException e) {
LOG.warn("Pipeline creation failed for repConfig: {}. "
+ "Retrying get pipelines call once.", repConfig, e);
@@ -134,15 +147,9 @@ public class WritableRatisContainerProvider
try {
// If Exception occurred or successful creation of pipeline do one
// final try to fetch pipelines.
- availablePipelines = pipelineManager
- .getPipelines(repConfig, Pipeline.PipelineState.OPEN,
- excludeList.getDatanodes(), excludeList.getPipelineIds());
- if (availablePipelines.size() == 0 && !excludeList.isEmpty()) {
- // if no pipelines can be found, try finding pipeline without
- // exclusion
- availablePipelines = pipelineManager
- .getPipelines(repConfig, Pipeline.PipelineState.OPEN);
- }
+ availablePipelines = findPipelinesByState(repConfig,
+ excludeList,
+ Pipeline.PipelineState.OPEN);
if (availablePipelines.size() == 0) {
LOG.info("Could not find available pipeline of repConfig: {} "
+ "even after retrying", repConfig);
@@ -167,6 +174,21 @@ public class WritableRatisContainerProvider
return null;
}
+ private List<Pipeline> findPipelinesByState(
+ final ReplicationConfig repConfig,
+ final ExcludeList excludeList,
+ final Pipeline.PipelineState pipelineState) {
+ List<Pipeline> pipelines = pipelineManager.getPipelines(repConfig,
+ pipelineState, excludeList.getDatanodes(),
+ excludeList.getPipelineIds());
+ if (pipelines.size() == 0 && !excludeList.isEmpty()) {
+ // if no pipelines can be found, try finding pipeline without
+ // exclusion
+ pipelines = pipelineManager.getPipelines(repConfig, pipelineState);
+ }
+ return pipelines;
+ }
+
private ContainerInfo selectContainer(List<Pipeline> availablePipelines,
long size, String owner, ExcludeList excludeList) {
Pipeline pipeline;
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
index 9d9af56035..7ea21a2627 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
@@ -57,6 +57,7 @@ import org.apache.hadoop.hdds.scm.ha.SCMRatisServerImpl;
import org.apache.hadoop.hdds.scm.ha.SCMHAUtils;
import org.apache.hadoop.hdds.scm.ha.SequenceIdGenerator;
import org.apache.hadoop.hdds.scm.ScmInfo;
+import org.apache.hadoop.hdds.scm.node.NodeAddressUpdateHandler;
import org.apache.hadoop.hdds.scm.server.upgrade.FinalizationManager;
import org.apache.hadoop.hdds.scm.server.upgrade.FinalizationManagerImpl;
import org.apache.hadoop.hdds.scm.node.CommandQueueReportHandler;
@@ -422,6 +423,9 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
NewNodeHandler newNodeHandler = new NewNodeHandler(pipelineManager,
scmDecommissionManager, configuration, serviceManager);
+ NodeAddressUpdateHandler nodeAddressUpdateHandler =
+ new NodeAddressUpdateHandler(pipelineManager,
+ scmDecommissionManager, serviceManager);
StaleNodeHandler staleNodeHandler =
new StaleNodeHandler(scmNodeManager, pipelineManager, configuration);
DeadNodeHandler deadNodeHandler = new DeadNodeHandler(scmNodeManager,
@@ -485,6 +489,8 @@ public final class StorageContainerManager extends ServiceRuntimeInfoImpl
eventQueue.addHandler(SCMEvents.CONTAINER_ACTIONS, actionsHandler);
eventQueue.addHandler(SCMEvents.CLOSE_CONTAINER, closeContainerHandler);
eventQueue.addHandler(SCMEvents.NEW_NODE, newNodeHandler);
+ eventQueue.addHandler(SCMEvents.NODE_ADDRESS_UPDATE,
+ nodeAddressUpdateHandler);
eventQueue.addHandler(SCMEvents.STALE_NODE, staleNodeHandler);
eventQueue.addHandler(SCMEvents.HEALTHY_READONLY_TO_HEALTHY_NODE,
readOnlyHealthyToHealthyNodeHandler);
diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestNodeStateManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestNodeStateManager.java
index d5fccff919..2cd2689a07 100644
--- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestNodeStateManager.java
+++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestNodeStateManager.java
@@ -22,6 +22,7 @@ import org.apache.hadoop.hdds.conf.ConfigurationSource;
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState;
+import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos;
import org.apache.hadoop.hdds.scm.container.ContainerID;
import org.apache.hadoop.hdds.scm.ha.SCMContext;
import org.apache.hadoop.hdds.scm.server.upgrade.FinalizationCheckpoint;
@@ -350,6 +351,43 @@ public class TestNodeStateManager {
}
}
+ @Test
+ public void testUpdateNode() throws NodeAlreadyExistsException,
+ NodeNotFoundException {
+ UUID dnUuid = UUID.randomUUID();
+ String ipAddress = "1.2.3.4";
+ String hostName = "test-host";
+ StorageContainerDatanodeProtocolProtos.LayoutVersionProto
+ layoutVersionProto =
+ UpgradeUtils.toLayoutVersionProto(1, 2);
+ DatanodeDetails dn = DatanodeDetails.newBuilder()
+ .setUuid(dnUuid)
+ .setIpAddress(ipAddress)
+ .setHostName(hostName)
+ .setPersistedOpState(HddsProtos.NodeOperationalState.IN_MAINTENANCE)
+ .build();
+ nsm.addNode(dn, layoutVersionProto);
+
+ String newIpAddress = "2.3.4.5";
+ String newHostName = "new-host";
+ StorageContainerDatanodeProtocolProtos.LayoutVersionProto
+ newLayoutVersionProto = UpgradeUtils.defaultLayoutVersionProto();
+ DatanodeDetails newDn = DatanodeDetails.newBuilder()
+ .setUuid(dnUuid)
+ .setIpAddress(newIpAddress)
+ .setHostName(newHostName)
+ .setPersistedOpState(HddsProtos.NodeOperationalState.IN_SERVICE)
+ .build();
+ nsm.updateNode(newDn, newLayoutVersionProto);
+
+ DatanodeInfo updatedDn = nsm.getNode(dn);
+ assertEquals(newIpAddress, updatedDn.getIpAddress());
+ assertEquals(newHostName, updatedDn.getHostName());
+ assertEquals(HddsProtos.NodeOperationalState.IN_SERVICE,
+ updatedDn.getPersistedOpState());
+ assertEquals(NodeStatus.inServiceHealthy(), updatedDn.getNodeStatus());
+ }
+
private DatanodeDetails generateDatanode() {
return DatanodeDetails.newBuilder().setUuid(UUID.randomUUID()).build();
}
diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java
index e4b2977fa1..a936e5652f 100644
--- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java
+++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java
@@ -1921,4 +1921,71 @@ public class TestSCMNodeManager {
}
}
+ /**
+ * Test node register with updated IP and host name.
+ */
+ @Test
+ public void testScmRegisterNodeWithUpdatedIpAndHostname()
+ throws IOException, InterruptedException, AuthenticationException {
+ OzoneConfiguration conf = getConf();
+ conf.setTimeDuration(OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL, 1000,
+ MILLISECONDS);
+
+ // create table mapping file
+ String hostName = "host1";
+ String ipAddress = "1.2.3.4";
+ String mapFile = this.getClass().getClassLoader()
+ .getResource("nodegroup-mapping").getPath();
+ conf.set(NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY,
+ "org.apache.hadoop.net.TableMapping");
+ conf.set(NET_TOPOLOGY_TABLE_MAPPING_FILE_KEY, mapFile);
+ conf.set(ScmConfigKeys.OZONE_SCM_NETWORK_TOPOLOGY_SCHEMA_FILE,
+ "network-topology-nodegroup.xml");
+
+ // use default IP address to resolve node
+ try (SCMNodeManager nodeManager = createNodeManager(conf)) {
+ String nodeUuid = UUID.randomUUID().toString();
+ DatanodeDetails node = createDatanodeDetails(
+ nodeUuid, hostName, ipAddress, null);
+ nodeManager.register(node, null, null);
+
+ // verify network topology cluster has all the registered nodes
+ Thread.sleep(2 * 1000);
+ NetworkTopology clusterMap = scm.getClusterMap();
+ assertEquals(1,
+ nodeManager.getNodeCount(NodeStatus.inServiceHealthy()));
+ assertEquals(1, clusterMap.getNumOfLeafNode(""));
+ assertEquals(4, clusterMap.getMaxLevel());
+ List<DatanodeDetails> nodeList = nodeManager.getAllNodes();
+ assertEquals(1, nodeList.size());
+
+ DatanodeDetails returnedNode = nodeList.get(0);
+ assertEquals(ipAddress, returnedNode.getIpAddress());
+ assertEquals(hostName, returnedNode.getHostName());
+ assertTrue(returnedNode.getNetworkLocation()
+ .startsWith("/rack1/ng"));
+ assertTrue(returnedNode.getParent() != null);
+
+ // test updating ip address and host name
+ String updatedIpAddress = "2.3.4.5";
+ String updatedHostName = "host2";
+ DatanodeDetails updatedNode = createDatanodeDetails(
+ nodeUuid, updatedHostName, updatedIpAddress, null);
+ nodeManager.register(updatedNode, null, null);
+
+ assertEquals(1,
+ nodeManager.getNodeCount(NodeStatus.inServiceHealthy()));
+ assertEquals(1, clusterMap.getNumOfLeafNode(""));
+ assertEquals(4, clusterMap.getMaxLevel());
+ List<DatanodeDetails> updatedNodeList = nodeManager.getAllNodes();
+ assertEquals(1, updatedNodeList.size());
+
+ DatanodeDetails returnedUpdatedNode = updatedNodeList.get(0);
+ assertEquals(updatedIpAddress, returnedUpdatedNode.getIpAddress());
+ assertEquals(updatedHostName, returnedUpdatedNode.getHostName());
+ assertTrue(returnedUpdatedNode.getNetworkLocation()
+ .startsWith("/rack1/ng"));
+ assertTrue(returnedUpdatedNode.getParent() != null);
+ }
+ }
}
diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/MockPipelineManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/MockPipelineManager.java
index 932d2d5387..f805470a25 100644
--- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/MockPipelineManager.java
+++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/MockPipelineManager.java
@@ -217,6 +217,11 @@ public class MockPipelineManager implements PipelineManager {
HddsProtos.PipelineState.PIPELINE_CLOSED);
}
+ @Override
+ public void closeStalePipelines(DatanodeDetails datanodeDetails) {
+
+ }
+
@Override
public void scrubPipelines() {
diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineManagerImpl.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineManagerImpl.java
index eb273800ab..0e60921da5 100644
--- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineManagerImpl.java
+++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineManagerImpl.java
@@ -21,17 +21,20 @@ import com.google.common.base.Supplier;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.hdds.HddsConfigKeys;
import org.apache.hadoop.hdds.client.RatisReplicationConfig;
+import org.apache.hadoop.hdds.client.ReplicationConfig;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos;
import org.apache.hadoop.hdds.scm.HddsTestUtils;
+import org.apache.hadoop.hdds.scm.PipelineChoosePolicy;
import org.apache.hadoop.hdds.scm.container.ContainerID;
import org.apache.hadoop.hdds.scm.container.ContainerInfo;
import org.apache.hadoop.hdds.scm.container.ContainerManager;
import org.apache.hadoop.hdds.scm.container.ContainerReplica;
import org.apache.hadoop.hdds.scm.container.MockNodeManager;
+import org.apache.hadoop.hdds.scm.container.common.helpers.ExcludeList;
import org.apache.hadoop.hdds.scm.exceptions.SCMException;
import org.apache.hadoop.hdds.scm.ha.SCMHADBTransactionBuffer;
import org.apache.hadoop.hdds.scm.ha.SCMHADBTransactionBufferStub;
@@ -40,6 +43,7 @@ import org.apache.hadoop.hdds.scm.ha.SCMContext;
import org.apache.hadoop.hdds.scm.ha.SCMServiceManager;
import org.apache.hadoop.hdds.scm.metadata.SCMDBDefinition;
import org.apache.hadoop.hdds.scm.node.NodeStatus;
+import org.apache.hadoop.hdds.scm.pipeline.choose.algorithms.HealthyPipelineChoosePolicy;
import org.apache.hadoop.hdds.scm.safemode.SCMSafeModeManager;
import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher;
import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
@@ -52,10 +56,12 @@ import org.apache.hadoop.ozone.container.common.SCMTestUtils;
import org.apache.ozone.test.GenericTestUtils;
import org.apache.ozone.test.TestClock;
import org.apache.ratis.protocol.exceptions.NotLeaderException;
+import org.assertj.core.util.Lists;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
+import org.mockito.ArgumentCaptor;
import org.slf4j.LoggerFactory;
import java.io.File;
@@ -63,6 +69,7 @@ import java.io.IOException;
import java.time.Instant;
import java.time.ZoneOffset;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
@@ -74,10 +81,24 @@ import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_PIPELINE_L
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT_DEFAULT;
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_PIPELINE_ALLOCATED_TIMEOUT;
import static org.apache.hadoop.hdds.scm.pipeline.Pipeline.PipelineState.ALLOCATED;
+import static org.apache.hadoop.hdds.scm.pipeline.Pipeline.PipelineState.OPEN;
import static org.apache.hadoop.test.MetricsAsserts.getLongCounter;
import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;
+import static org.mockito.ArgumentMatchers.anyLong;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.anyString;
+import static org.mockito.ArgumentMatchers.eq;
+import static org.mockito.ArgumentMatchers.anyList;
+import static org.mockito.Mockito.doReturn;
+import static org.mockito.Mockito.doThrow;
+import static org.mockito.Mockito.doAnswer;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.spy;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
/**
* Tests for PipelineManagerImpl.
@@ -772,6 +793,179 @@ public class TestPipelineManagerImpl {
}
@Test
+ public void testGetStalePipelines() throws IOException {
+ SCMHADBTransactionBuffer buffer =
+ new SCMHADBTransactionBufferStub(dbStore);
+ PipelineManagerImpl pipelineManager =
+ spy(createPipelineManager(true, buffer));
+
+ // For existing pipelines
+ List<Pipeline> pipelines = new ArrayList<>();
+ UUID[] uuids = new UUID[3];
+ String[] ipAddresses = new String[3];
+ String[] hostNames = new String[3];
+ for (int i = 0; i < 3; i++) {
+ uuids[i] = UUID.randomUUID();
+ ipAddresses[i] = "1.2.3." + (i + 1);
+ hostNames[i] = "host" + i;
+
+ Pipeline pipeline = mock(Pipeline.class);
+ DatanodeDetails datanodeDetails = mock(DatanodeDetails.class);
+ when(datanodeDetails.getUuid()).thenReturn(uuids[i]);
+ when(datanodeDetails.getIpAddress()).thenReturn(ipAddresses[i]);
+ when(datanodeDetails.getHostName()).thenReturn(hostNames[i]);
+ List<DatanodeDetails> nodes = new ArrayList<>();
+ nodes.add(datanodeDetails);
+ when(pipeline.getNodes()).thenReturn(nodes);
+ pipelines.add(pipeline);
+ }
+
+ List<DatanodeDetails> nodes = new ArrayList<>();
+ nodes.add(pipelines.get(0).getNodes().get(0));
+ nodes.add(pipelines.get(1).getNodes().get(0));
+ nodes.add(pipelines.get(2).getNodes().get(0));
+ Pipeline pipeline = mock(Pipeline.class);
+ when(pipeline.getNodes()).thenReturn(nodes);
+ pipelines.add(pipeline);
+
+ doReturn(pipelines).when(pipelineManager).getPipelines();
+
+ // node with changed uuid
+ DatanodeDetails node0 = mock(DatanodeDetails.class);
+ UUID changedUUID = UUID.randomUUID();
+ when(node0.getUuid()).thenReturn(changedUUID);
+ when(node0.getIpAddress()).thenReturn(ipAddresses[0]);
+ when(node0.getHostName()).thenReturn(hostNames[0]);
+
+ // test uuid change
+ assertTrue(pipelineManager.getStalePipelines(node0).isEmpty());
+
+ // node with changed IP
+ DatanodeDetails node1 = mock(DatanodeDetails.class);
+ when(node1.getUuid()).thenReturn(uuids[0]);
+ when(node1.getIpAddress()).thenReturn("1.2.3.100");
+ when(node1.getHostName()).thenReturn(hostNames[0]);
+
+ // test IP change
+ List<Pipeline> pipelineList1 = pipelineManager.getStalePipelines(node1);
+ Assertions.assertEquals(2, pipelineList1.size());
+ Assertions.assertEquals(pipelines.get(0), pipelineList1.get(0));
+ Assertions.assertEquals(pipelines.get(3), pipelineList1.get(1));
+
+ // node with changed host name
+ DatanodeDetails node2 = mock(DatanodeDetails.class);
+ when(node2.getUuid()).thenReturn(uuids[0]);
+ when(node2.getIpAddress()).thenReturn(ipAddresses[0]);
+ when(node2.getHostName()).thenReturn("host100");
+
+ // test IP change
+ List<Pipeline> pipelineList2 = pipelineManager.getStalePipelines(node2);
+ Assertions.assertEquals(2, pipelineList2.size());
+ Assertions.assertEquals(pipelines.get(0), pipelineList2.get(0));
+ Assertions.assertEquals(pipelines.get(3), pipelineList2.get(1));
+ }
+
+ @Test
+ public void testCloseStalePipelines() throws IOException {
+ SCMHADBTransactionBuffer buffer =
+ new SCMHADBTransactionBufferStub(dbStore);
+ PipelineManagerImpl pipelineManager =
+ spy(createPipelineManager(true, buffer));
+
+ Pipeline pipeline0 = mock(Pipeline.class);
+ Pipeline pipeline1 = mock(Pipeline.class);
+ when(pipeline0.getId()).thenReturn(mock(PipelineID.class));
+ when(pipeline1.getId()).thenReturn(mock(PipelineID.class));
+ DatanodeDetails datanodeDetails = mock(DatanodeDetails.class);
+ List<Pipeline> stalePipelines = Lists.newArrayList(pipeline0, pipeline1);
+ doReturn(stalePipelines).when(pipelineManager)
+ .getStalePipelines(datanodeDetails);
+
+ pipelineManager.closeStalePipelines(datanodeDetails);
+ verify(pipelineManager, times(1))
+ .closePipeline(stalePipelines.get(0), false);
+ verify(pipelineManager, times(1))
+ .closePipeline(stalePipelines.get(1), false);
+ }
+
+ @Test
+ public void testWaitForAllocatedPipeline() throws IOException {
+ SCMHADBTransactionBuffer buffer =
+ new SCMHADBTransactionBufferStub(dbStore);
+ PipelineManagerImpl pipelineManager =
+ createPipelineManager(true, buffer);
+
+ PipelineManagerImpl pipelineManagerSpy = spy(pipelineManager);
+ ReplicationConfig repConfig =
+ RatisReplicationConfig.getInstance(HddsProtos.ReplicationFactor.THREE);
+ PipelineChoosePolicy pipelineChoosingPolicy
+ = new HealthyPipelineChoosePolicy();
+ ContainerManager containerManager
+ = mock(ContainerManager.class);
+
+ WritableContainerProvider<ReplicationConfig> provider;
+ String owner = "TEST";
+ Pipeline allocatedPipeline;
+
+ // Throw on pipeline creates, so no new pipelines can be created
+ doThrow(SCMException.class).when(pipelineManagerSpy)
+ .createPipeline(any(), any(), anyList());
+ provider = new WritableRatisContainerProvider(
+ conf, pipelineManagerSpy, containerManager, pipelineChoosingPolicy);
+
+ // Add a single pipeline to manager, (in the allocated state)
+ allocatedPipeline = pipelineManager.createPipeline(repConfig);
+ pipelineManager.getStateManager()
+ .updatePipelineState(allocatedPipeline.getId()
+ .getProtobuf(), HddsProtos.PipelineState.PIPELINE_ALLOCATED);
+
+ // Assign a container to that pipeline
+ ContainerInfo container = HddsTestUtils.
+ getContainer(HddsProtos.LifeCycleState.OPEN,
+ allocatedPipeline.getId());
+
+ pipelineManager.addContainerToPipeline(
+ allocatedPipeline.getId(), container.containerID());
+ doReturn(container).when(containerManager).getMatchingContainer(anyLong(),
+ anyString(), eq(allocatedPipeline), any());
+
+
+ Assertions.assertTrue(pipelineManager.getPipelines(repConfig, OPEN)
+ .isEmpty(), "No open pipelines exist");
+ Assertions.assertTrue(pipelineManager.getPipelines(repConfig, ALLOCATED)
+ .contains(allocatedPipeline), "An allocated pipeline exists");
+
+ // Instrument waitOnePipelineReady to open pipeline a bit after it is called
+ Runnable r = () -> {
+ try {
+ Thread.sleep(100);
+ pipelineManager.openPipeline(allocatedPipeline.getId());
+ } catch (Exception e) {
+ fail("exception on opening pipeline", e);
+ }
+ };
+ doAnswer(call -> {
+ new Thread(r).start();
+ return call.callRealMethod();
+ }).when(pipelineManagerSpy).waitOnePipelineReady(any(), anyLong());
+
+
+ ContainerInfo c = provider.getContainer(1, repConfig,
+ owner, new ExcludeList());
+ Assertions.assertTrue(c.equals(container),
+ "Expected container was returned");
+
+ // Confirm that waitOnePipelineReady was called on allocated pipelines
+ ArgumentCaptor<Collection<PipelineID>> captor =
+ ArgumentCaptor.forClass(Collection.class);
+ verify(pipelineManagerSpy, times(1))
+ .waitOnePipelineReady(captor.capture(), anyLong());
+ Collection<PipelineID> coll = captor.getValue();
+ Assertions.assertTrue(coll.contains(allocatedPipeline.getId()),
+ "waitOnePipelineReady() was called on allocated pipeline");
+ pipelineManager.close();
+ }
+
public void testCreatePipelineForRead() throws IOException {
PipelineManager pipelineManager = createPipelineManager(true);
List<DatanodeDetails> dns = nodeManager
diff --git a/hadoop-ozone/dist/src/main/compose/restart/test.sh b/hadoop-ozone/dist/src/main/compose/restart/test.sh
index cf0f53242d..f73c263b5c 100644
--- a/hadoop-ozone/dist/src/main/compose/restart/test.sh
+++ b/hadoop-ozone/dist/src/main/compose/restart/test.sh
@@ -33,15 +33,20 @@ fix_data_dir_permissions
start_docker_env
execute_robot_test scm -v PREFIX:pre freon/generate.robot
execute_robot_test scm -v PREFIX:pre freon/validate.robot
+execute_robot_test scm -v PREFIX:pre freon/generate-chunk.robot
+execute_robot_test scm -v PREFIX:pre freon/validate-chunk.robot
KEEP_RUNNING=false stop_docker_env
# re-start cluster with new version and check after upgrade
export OZONE_KEEP_RESULTS=true
start_docker_env
execute_robot_test scm -v PREFIX:pre freon/validate.robot
+execute_robot_test scm -v PREFIX:pre freon/validate-chunk.robot
# test write key to old bucket after upgrade
execute_robot_test scm -v PREFIX:post freon/generate.robot
execute_robot_test scm -v PREFIX:post freon/validate.robot
+execute_robot_test scm -v PREFIX:post freon/generate-chunk.robot
+execute_robot_test scm -v PREFIX:post freon/validate-chunk.robot
stop_docker_env
generate_report
diff --git a/hadoop-ozone/dist/src/main/k8s/definitions/ozone/config.yaml b/hadoop-ozone/dist/src/main/k8s/definitions/ozone/config.yaml
index 7b65a3ecc4..88a36835c2 100644
--- a/hadoop-ozone/dist/src/main/k8s/definitions/ozone/config.yaml
+++ b/hadoop-ozone/dist/src/main/k8s/definitions/ozone/config.yaml
@@ -28,6 +28,7 @@ data:
OZONE-SITE.XML_ozone.scm.names: "scm-0.scm"
OZONE-SITE.XML_hdds.scm.safemode.min.datanode: "3"
OZONE-SITE.XML_ozone.datanode.pipeline.limit: "1"
+ OZONE-SITE.XML_dfs.datanode.use.datanode.hostname: "true"
LOG4J.PROPERTIES_log4j.rootLogger: "INFO, stdout"
LOG4J.PROPERTIES_log4j.appender.stdout: "org.apache.log4j.ConsoleAppender"
LOG4J.PROPERTIES_log4j.appender.stdout.layout: "org.apache.log4j.PatternLayout"
diff --git a/hadoop-ozone/dist/src/main/k8s/examples/ozone/config-configmap.yaml b/hadoop-ozone/dist/src/main/k8s/examples/ozone/config-configmap.yaml
index c7ac5344d1..92fe9166d0 100644
--- a/hadoop-ozone/dist/src/main/k8s/examples/ozone/config-configmap.yaml
+++ b/hadoop-ozone/dist/src/main/k8s/examples/ozone/config-configmap.yaml
@@ -28,6 +28,7 @@ data:
OZONE-SITE.XML_ozone.scm.names: scm-0.scm
OZONE-SITE.XML_hdds.scm.safemode.min.datanode: "3"
OZONE-SITE.XML_ozone.datanode.pipeline.limit: "1"
+ OZONE-SITE.XML_dfs.datanode.use.datanode.hostname: "true"
LOG4J.PROPERTIES_log4j.rootLogger: INFO, stdout
LOG4J.PROPERTIES_log4j.appender.stdout: org.apache.log4j.ConsoleAppender
LOG4J.PROPERTIES_log4j.appender.stdout.layout: org.apache.log4j.PatternLayout
diff --git a/hadoop-ozone/dist/src/main/k8s/examples/ozone/datanode-statefulset.yaml b/hadoop-ozone/dist/src/main/k8s/examples/ozone/datanode-statefulset.yaml
index d7599c60d5..4d510ba26b 100644
--- a/hadoop-ozone/dist/src/main/k8s/examples/ozone/datanode-statefulset.yaml
+++ b/hadoop-ozone/dist/src/main/k8s/examples/ozone/datanode-statefulset.yaml
@@ -27,6 +27,7 @@ spec:
component: datanode
serviceName: datanode
replicas: 3
+ podManagementPolicy: Parallel
template:
metadata:
labels:
@@ -52,6 +53,7 @@ spec:
containers:
- name: datanode
image: '@docker.image@'
+ imagePullPolicy: Always
args:
- ozone
- datanode
diff --git a/hadoop-ozone/dist/src/main/k8s/examples/ozone/om-statefulset.yaml b/hadoop-ozone/dist/src/main/k8s/examples/ozone/om-statefulset.yaml
index ad0b16eaca..84b81dd1bf 100644
--- a/hadoop-ozone/dist/src/main/k8s/examples/ozone/om-statefulset.yaml
+++ b/hadoop-ozone/dist/src/main/k8s/examples/ozone/om-statefulset.yaml
@@ -42,6 +42,7 @@ spec:
containers:
- name: om
image: '@docker.image@'
+ imagePullPolicy: Always
args:
- ozone
- om
diff --git a/hadoop-ozone/dist/src/main/k8s/examples/ozone/s3g-statefulset.yaml b/hadoop-ozone/dist/src/main/k8s/examples/ozone/s3g-statefulset.yaml
index 6e96fb7dbc..8a17c72c38 100644
--- a/hadoop-ozone/dist/src/main/k8s/examples/ozone/s3g-statefulset.yaml
+++ b/hadoop-ozone/dist/src/main/k8s/examples/ozone/s3g-statefulset.yaml
@@ -36,6 +36,7 @@ spec:
containers:
- name: s3g
image: '@docker.image@'
+ imagePullPolicy: Always
args:
- ozone
- s3g
diff --git a/hadoop-ozone/dist/src/main/k8s/examples/ozone/scm-statefulset.yaml b/hadoop-ozone/dist/src/main/k8s/examples/ozone/scm-statefulset.yaml
index d4d651349f..6efc374d00 100644
--- a/hadoop-ozone/dist/src/main/k8s/examples/ozone/scm-statefulset.yaml
+++ b/hadoop-ozone/dist/src/main/k8s/examples/ozone/scm-statefulset.yaml
@@ -42,6 +42,7 @@ spec:
initContainers:
- name: init
image: '@docker.image@'
+ imagePullPolicy: Always
args:
- ozone
- scm
@@ -55,6 +56,7 @@ spec:
containers:
- name: scm
image: '@docker.image@'
+ imagePullPolicy: Always
args:
- ozone
- scm
diff --git a/hadoop-ozone/dist/src/main/k8s/examples/ozone/test.sh b/hadoop-ozone/dist/src/main/k8s/examples/ozone/test.sh
index 7d6bdfb981..fd5b0331d0 100755
--- a/hadoop-ozone/dist/src/main/k8s/examples/ozone/test.sh
+++ b/hadoop-ozone/dist/src/main/k8s/examples/ozone/test.sh
@@ -28,7 +28,19 @@ regenerate_resources
start_k8s_env
-execute_robot_test scm-0 smoketest/basic/basic.robot
+export SCM=scm-0
+
+execute_robot_test ${SCM} -v PREFIX:pre smoketest/freon/generate.robot
+execute_robot_test ${SCM} -v PREFIX:pre smoketest/freon/validate.robot
+
+# restart datanodes
+kubectl delete pod datanode-0 datanode-1 datanode-2
+
+wait_for_startup
+
+execute_robot_test ${SCM} -v PREFIX:pre smoketest/freon/validate.robot
+execute_robot_test ${SCM} -v PREFIX:post smoketest/freon/generate.robot
+execute_robot_test ${SCM} -v PREFIX:post smoketest/freon/validate.robot
combine_reports
diff --git a/hadoop-ozone/dist/src/main/smoketest/freon/validate.robot b/hadoop-ozone/dist/src/main/smoketest/freon/generate-chunk.robot
similarity index 83%
copy from hadoop-ozone/dist/src/main/smoketest/freon/validate.robot
copy to hadoop-ozone/dist/src/main/smoketest/freon/generate-chunk.robot
index 4f782a5bfe..5742338c39 100644
--- a/hadoop-ozone/dist/src/main/smoketest/freon/validate.robot
+++ b/hadoop-ozone/dist/src/main/smoketest/freon/generate-chunk.robot
@@ -14,7 +14,7 @@
# limitations under the License.
*** Settings ***
-Documentation Test freon data validation commands
+Documentation Test freon chunk generation commands
Resource ../ozone-lib/freon.robot
Test Timeout 5 minutes
@@ -22,8 +22,5 @@ Test Timeout 5 minutes
${PREFIX} ${EMPTY}
*** Test Cases ***
-Ozone Client Key Validator
- Freon OCKV prefix=ockg${PREFIX}
-
-DN Chunk Validator
- Freon DCV prefix=dcg${PREFIX} n=100
+DN Chunk Generator
+ Freon DCG prefix=dcg${PREFIX} n=100
diff --git a/hadoop-ozone/dist/src/main/smoketest/freon/generate.robot b/hadoop-ozone/dist/src/main/smoketest/freon/generate.robot
index 4611a3c33e..7af2003c51 100644
--- a/hadoop-ozone/dist/src/main/smoketest/freon/generate.robot
+++ b/hadoop-ozone/dist/src/main/smoketest/freon/generate.robot
@@ -30,6 +30,3 @@ OM Key Generator
OM Bucket Generator
Freon OMBG prefix=ombg${PREFIX}
-
-DN Chunk Generator
- Freon DCG prefix=dcg${PREFIX} n=100
diff --git a/hadoop-ozone/dist/src/main/smoketest/freon/validate.robot b/hadoop-ozone/dist/src/main/smoketest/freon/validate-chunk.robot
similarity index 89%
copy from hadoop-ozone/dist/src/main/smoketest/freon/validate.robot
copy to hadoop-ozone/dist/src/main/smoketest/freon/validate-chunk.robot
index 4f782a5bfe..26d3c2aac0 100644
--- a/hadoop-ozone/dist/src/main/smoketest/freon/validate.robot
+++ b/hadoop-ozone/dist/src/main/smoketest/freon/validate-chunk.robot
@@ -14,7 +14,7 @@
# limitations under the License.
*** Settings ***
-Documentation Test freon data validation commands
+Documentation Test freon chunk validation commands
Resource ../ozone-lib/freon.robot
Test Timeout 5 minutes
@@ -22,8 +22,5 @@ Test Timeout 5 minutes
${PREFIX} ${EMPTY}
*** Test Cases ***
-Ozone Client Key Validator
- Freon OCKV prefix=ockg${PREFIX}
-
DN Chunk Validator
Freon DCV prefix=dcg${PREFIX} n=100
diff --git a/hadoop-ozone/dist/src/main/smoketest/freon/validate.robot b/hadoop-ozone/dist/src/main/smoketest/freon/validate.robot
index 4f782a5bfe..243da4ba1e 100644
--- a/hadoop-ozone/dist/src/main/smoketest/freon/validate.robot
+++ b/hadoop-ozone/dist/src/main/smoketest/freon/validate.robot
@@ -24,6 +24,3 @@ ${PREFIX} ${EMPTY}
*** Test Cases ***
Ozone Client Key Validator
Freon OCKV prefix=ockg${PREFIX}
-
-DN Chunk Validator
- Freon DCV prefix=dcg${PREFIX} n=100
diff --git a/hadoop-ozone/dist/src/main/smoketest/ozone-lib/freon.robot b/hadoop-ozone/dist/src/main/smoketest/ozone-lib/freon.robot
index 563af1ed3c..8d10cc81e9 100644
--- a/hadoop-ozone/dist/src/main/smoketest/ozone-lib/freon.robot
+++ b/hadoop-ozone/dist/src/main/smoketest/ozone-lib/freon.robot
@@ -16,6 +16,10 @@
*** Settings ***
Resource ../lib/os.robot
+*** Variables ***
+${OM_HA_PARAM} ${EMPTY}
+${SECURITY_ENABLED} false
+
*** Keywords ***
Freon DCG
[arguments] ${prefix}=dcg ${n}=1 ${threads}=1 ${args}=${EMPTY}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@ozone.apache.org
For additional commands, e-mail: commits-help@ozone.apache.org