You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by su...@apache.org on 2020/01/15 11:37:52 UTC

[hadoop] branch branch-3.1 updated: YARN-8373. RM Received RMFatalEvent of type CRITICAL_THREAD_CRASH. Contributed by Wilfred Spiegelenburg.

This is an automated email from the ASF dual-hosted git repository.

sunilg pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new af89b5b  YARN-8373. RM Received RMFatalEvent of type CRITICAL_THREAD_CRASH. Contributed by Wilfred Spiegelenburg.
af89b5b is described below

commit af89b5b0864f8e861e9879393b37efad5b714585
Author: Sunil G <su...@apache.org>
AuthorDate: Wed Jan 15 17:07:34 2020 +0530

    YARN-8373. RM Received RMFatalEvent of type CRITICAL_THREAD_CRASH. Contributed by Wilfred Spiegelenburg.
---
 .../scheduler/ClusterNodeTracker.java                | 20 ++++++++++----------
 .../scheduler/fair/FairScheduler.java                | 15 +++++++++------
 .../scheduler/fair/TestContinuousScheduling.java     |  2 --
 3 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ClusterNodeTracker.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ClusterNodeTracker.java
index be7b3a2..704893a 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ClusterNodeTracker.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ClusterNodeTracker.java
@@ -33,11 +33,11 @@ import org.apache.hadoop.yarn.util.resource.ResourceUtils;
 
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.TreeSet;
 import java.util.concurrent.locks.Lock;
 import java.util.concurrent.locks.ReadWriteLock;
 import java.util.concurrent.locks.ReentrantReadWriteLock;
@@ -363,21 +363,21 @@ public class ClusterNodeTracker<N extends SchedulerNode> {
 
   /**
    * Convenience method to sort nodes.
+   * Nodes can change while being sorted. Using a standard sort will fail
+   * without locking each node, the TreeSet handles this without locks.
    *
-   * Note that the sort is performed without holding a lock. We are sorting
-   * here instead of on the caller to allow for future optimizations (e.g.
-   * sort once every x milliseconds).
+   * @param comparator the comparator to sort the nodes with
+   * @return sorted set of nodes in the form of a TreeSet
    */
-  public List<N> sortedNodeList(Comparator<N> comparator) {
-    List<N> sortedList = null;
+  public TreeSet<N> sortedNodeSet(Comparator<N> comparator) {
+    TreeSet<N> sortedSet = new TreeSet<>(comparator);
     readLock.lock();
     try {
-      sortedList = new ArrayList(nodes.values());
+      sortedSet.addAll(nodes.values());
     } finally {
       readLock.unlock();
     }
-    Collections.sort(sortedList, comparator);
-    return sortedList;
+    return sortedSet;
   }
 
   /**
@@ -431,4 +431,4 @@ public class ClusterNodeTracker<N extends SchedulerNode> {
     }
     return retNodes;
   }
-}
\ No newline at end of file
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java
index caa6aa0..013cf9d 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java
@@ -103,6 +103,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Set;
+import java.util.TreeSet;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.locks.ReentrantReadWriteLock;
 import java.util.concurrent.locks.ReentrantReadWriteLock.ReadLock;
@@ -945,15 +946,17 @@ public class FairScheduler extends
   @Deprecated
   void continuousSchedulingAttempt() throws InterruptedException {
     long start = getClock().getTime();
-    List<FSSchedulerNode> nodeIdList;
-    // Hold a lock to prevent comparator order changes due to changes of node
-    // unallocated resources
-    synchronized (this) {
-      nodeIdList = nodeTracker.sortedNodeList(nodeAvailableResourceComparator);
+    TreeSet<FSSchedulerNode> nodeIdSet;
+    // Hold a lock to prevent node changes as much as possible.
+    readLock.lock();
+    try {
+      nodeIdSet = nodeTracker.sortedNodeSet(nodeAvailableResourceComparator);
+    } finally {
+      readLock.unlock();
     }
 
     // iterate all nodes
-    for (FSSchedulerNode node : nodeIdList) {
+    for (FSSchedulerNode node : nodeIdSet) {
       try {
         if (Resources.fitsIn(minimumAllocation,
             node.getUnallocatedResource())) {
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestContinuousScheduling.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestContinuousScheduling.java
index e6a841a..6760d9c 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestContinuousScheduling.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestContinuousScheduling.java
@@ -323,9 +323,7 @@ public class TestContinuousScheduling extends FairSchedulerTestBase {
         for (int j = 0; j < 100; j++) {
           for (FSSchedulerNode node : clusterNodeTracker.getAllNodes()) {
             int i = ThreadLocalRandom.current().nextInt(-30, 30);
-            synchronized (scheduler) {
               node.deductUnallocatedResource(Resource.newInstance(i * 1024, i));
-            }
           }
         }
       }


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org