You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by we...@apache.org on 2022/05/31 23:55:57 UTC

[hadoop] branch branch-3.2 updated: HDFS-16583. DatanodeAdminDefaultMonitor can get stuck in an infinite loop (#4394)

This is an automated email from the ASF dual-hosted git repository.

weichiu pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/branch-3.2 by this push:
     new 52e405390e7 HDFS-16583. DatanodeAdminDefaultMonitor can get stuck in an infinite loop (#4394)
52e405390e7 is described below

commit 52e405390e76377a1669c841debe4b8d72246245
Author: Stephen O'Donnell <st...@gmail.com>
AuthorDate: Wed Jun 1 00:55:48 2022 +0100

    HDFS-16583. DatanodeAdminDefaultMonitor can get stuck in an infinite loop (#4394)
    
    Co-authored-by: S O'Donnell <so...@cloudera.com>
---
 .../blockmanagement/DatanodeAdminManager.java      | 25 ++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeAdminManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeAdminManager.java
index 6b176b4acea..dd1a2dc078d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeAdminManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeAdminManager.java
@@ -21,6 +21,7 @@ import static com.google.common.base.Preconditions.checkArgument;
 import static org.apache.hadoop.util.Time.monotonicNow;
 
 import java.util.AbstractList;
+import java.util.ArrayDeque;
 import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.Iterator;
@@ -138,6 +139,11 @@ public class DatanodeAdminManager {
    * outOfServiceNodeBlocks. Additional nodes wait in pendingNodes.
    */
   private final PriorityQueue<DatanodeDescriptor> pendingNodes;
+  /**
+   * Any nodes where decommission or maintenance has been cancelled are added
+   * to this queue for later processing.
+   */
+  private final Queue<DatanodeDescriptor> cancelledNodes = new ArrayDeque<>();
   private Monitor monitor = null;
 
   DatanodeAdminManager(final Namesystem namesystem,
@@ -251,7 +257,7 @@ public class DatanodeAdminManager {
       }
       // Remove from tracking in DatanodeAdminManager
       pendingNodes.remove(node);
-      outOfServiceNodeBlocks.remove(node);
+      cancelledNodes.add(node);
     } else {
       LOG.trace("stopDecommission: Node {} in {}, nothing to do.",
           node, node.getAdminState());
@@ -330,7 +336,7 @@ public class DatanodeAdminManager {
 
       // Remove from tracking in DatanodeAdminManager
       pendingNodes.remove(node);
-      outOfServiceNodeBlocks.remove(node);
+      cancelledNodes.add(node);
     } else {
       LOG.trace("stopMaintenance: Node {} in {}, nothing to do.",
           node, node.getAdminState());
@@ -513,6 +519,7 @@ public class DatanodeAdminManager {
       // Check decommission or maintenance progress.
       namesystem.writeLock();
       try {
+        processCancelledNodes();
         processPendingNodes();
         check();
       } catch (Exception e) {
@@ -541,6 +548,20 @@ public class DatanodeAdminManager {
       }
     }
 
+    /**
+     * Process any nodes which have had their decommission or maintenance mode
+     * cancelled by an administrator.
+     *
+     * This method must be executed under the write lock to prevent the
+     * internal structures being modified concurrently.
+     */
+    private void processCancelledNodes() {
+      while(!cancelledNodes.isEmpty()) {
+        DatanodeDescriptor dn = cancelledNodes.poll();
+        outOfServiceNodeBlocks.remove(dn);
+      }
+    }
+
     private void check() {
       final Iterator<Map.Entry<DatanodeDescriptor, AbstractList<BlockInfo>>>
           it = new CyclicIteration<>(outOfServiceNodeBlocks,


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org