You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by we...@apache.org on 2022/05/31 23:55:57 UTC
[hadoop] branch branch-3.2 updated: HDFS-16583. DatanodeAdminDefaultMonitor can get stuck in an infinite loop (#4394)
This is an automated email from the ASF dual-hosted git repository.
weichiu pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/branch-3.2 by this push:
new 52e405390e7 HDFS-16583. DatanodeAdminDefaultMonitor can get stuck in an infinite loop (#4394)
52e405390e7 is described below
commit 52e405390e76377a1669c841debe4b8d72246245
Author: Stephen O'Donnell <st...@gmail.com>
AuthorDate: Wed Jun 1 00:55:48 2022 +0100
HDFS-16583. DatanodeAdminDefaultMonitor can get stuck in an infinite loop (#4394)
Co-authored-by: S O'Donnell <so...@cloudera.com>
---
.../blockmanagement/DatanodeAdminManager.java | 25 ++++++++++++++++++++--
1 file changed, 23 insertions(+), 2 deletions(-)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeAdminManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeAdminManager.java
index 6b176b4acea..dd1a2dc078d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeAdminManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeAdminManager.java
@@ -21,6 +21,7 @@ import static com.google.common.base.Preconditions.checkArgument;
import static org.apache.hadoop.util.Time.monotonicNow;
import java.util.AbstractList;
+import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.Iterator;
@@ -138,6 +139,11 @@ public class DatanodeAdminManager {
* outOfServiceNodeBlocks. Additional nodes wait in pendingNodes.
*/
private final PriorityQueue<DatanodeDescriptor> pendingNodes;
+ /**
+ * Any nodes where decommission or maintenance has been cancelled are added
+ * to this queue for later processing.
+ */
+ private final Queue<DatanodeDescriptor> cancelledNodes = new ArrayDeque<>();
private Monitor monitor = null;
DatanodeAdminManager(final Namesystem namesystem,
@@ -251,7 +257,7 @@ public class DatanodeAdminManager {
}
// Remove from tracking in DatanodeAdminManager
pendingNodes.remove(node);
- outOfServiceNodeBlocks.remove(node);
+ cancelledNodes.add(node);
} else {
LOG.trace("stopDecommission: Node {} in {}, nothing to do.",
node, node.getAdminState());
@@ -330,7 +336,7 @@ public class DatanodeAdminManager {
// Remove from tracking in DatanodeAdminManager
pendingNodes.remove(node);
- outOfServiceNodeBlocks.remove(node);
+ cancelledNodes.add(node);
} else {
LOG.trace("stopMaintenance: Node {} in {}, nothing to do.",
node, node.getAdminState());
@@ -513,6 +519,7 @@ public class DatanodeAdminManager {
// Check decommission or maintenance progress.
namesystem.writeLock();
try {
+ processCancelledNodes();
processPendingNodes();
check();
} catch (Exception e) {
@@ -541,6 +548,20 @@ public class DatanodeAdminManager {
}
}
+ /**
+ * Process any nodes which have had their decommission or maintenance mode
+ * cancelled by an administrator.
+ *
+ * This method must be executed under the write lock to prevent the
+ * internal structures being modified concurrently.
+ */
+ private void processCancelledNodes() {
+ while(!cancelledNodes.isEmpty()) {
+ DatanodeDescriptor dn = cancelledNodes.poll();
+ outOfServiceNodeBlocks.remove(dn);
+ }
+ }
+
private void check() {
final Iterator<Map.Entry<DatanodeDescriptor, AbstractList<BlockInfo>>>
it = new CyclicIteration<>(outOfServiceNodeBlocks,
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org