You are viewing a plain text version of this content. The canonical link for it is here.
Posted to yarn-commits@hadoop.apache.org by sa...@apache.org on 2014/01/28 09:36:19 UTC

svn commit: r1561998 - in /hadoop/common/branches/branch-2/hadoop-yarn-project: ./ hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/ hadoop-yarn/hadoop-yarn-ser...

Author: sandy
Date: Tue Jan 28 08:36:19 2014
New Revision: 1561998

URL: http://svn.apache.org/r1561998
Log:
YARN-1629. IndexOutOfBoundsException in MaxRunningAppsEnforcer (Sandy Ryza)

Modified:
    hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt
    hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java
    hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/MaxRunningAppsEnforcer.java
    hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestMaxRunningAppsEnforcer.java

Modified: hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt?rev=1561998&r1=1561997&r2=1561998&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt (original)
+++ hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt Tue Jan 28 08:36:19 2014
@@ -350,6 +350,8 @@ Release 2.4.0 - UNRELEASED
 
     YARN-1642. RMDTRenewer#getRMClient should use ClientRMProxy (kasha)
 
+    YARN-1629. IndexOutOfBoundsException in MaxRunningAppsEnforcer (Sandy Ryza)
+
 Release 2.3.0 - UNRELEASED
 
   INCOMPATIBLE CHANGES

Modified: hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java?rev=1561998&r1=1561997&r2=1561998&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java (original)
+++ hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java Tue Jan 28 08:36:19 2014
@@ -91,15 +91,6 @@ public class FSLeafQueue extends FSQueue
     }
   }
   
-  public void makeAppRunnable(AppSchedulable appSched) {
-    if (!nonRunnableAppScheds.remove(appSched)) {
-      throw new IllegalStateException("Can't make app runnable that does not " +
-      		"already exist in queue as non-runnable" + appSched);
-    }
-    
-    runnableAppScheds.add(appSched);
-  }
-  
   public Collection<AppSchedulable> getRunnableAppSchedulables() {
     return runnableAppScheds;
   }

Modified: hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/MaxRunningAppsEnforcer.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/MaxRunningAppsEnforcer.java?rev=1561998&r1=1561997&r2=1561998&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/MaxRunningAppsEnforcer.java (original)
+++ hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/MaxRunningAppsEnforcer.java Tue Jan 28 08:36:19 2014
@@ -24,6 +24,9 @@ import java.util.List;
 import java.util.Map;
 import java.util.PriorityQueue;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.collect.ArrayListMultimap;
 import com.google.common.collect.ListMultimap;
@@ -33,6 +36,8 @@ import com.google.common.collect.ListMul
  * constraints
  */
 public class MaxRunningAppsEnforcer {
+  private static final Log LOG = LogFactory.getLog(FairScheduler.class);
+  
   private final FairScheduler scheduler;
 
   // Tracks the number of running applications by user.
@@ -163,7 +168,7 @@ public class MaxRunningAppsEnforcer {
     Iterator<FSSchedulerApp> iter = new MultiListStartTimeIterator(
         appsNowMaybeRunnable);
     FSSchedulerApp prev = null;
-    int numNowRunnable = 0;
+    List<AppSchedulable> noLongerPendingApps = new ArrayList<AppSchedulable>();
     while (iter.hasNext()) {
       FSSchedulerApp next = iter.next();
       if (next == prev) {
@@ -173,21 +178,34 @@ public class MaxRunningAppsEnforcer {
       if (canAppBeRunnable(next.getQueue(), next.getUser())) {
         trackRunnableApp(next);
         AppSchedulable appSched = next.getAppSchedulable();
-        next.getQueue().makeAppRunnable(appSched);
-        if (!usersNonRunnableApps.remove(next.getUser(), appSched)) {
-          throw new IllegalStateException("Waiting app " + next
-              + " expected to be in usersNonRunnableApps");
-        }
+        next.getQueue().getRunnableAppSchedulables().add(appSched);
+        noLongerPendingApps.add(appSched);
 
         // No more than one app per list will be able to be made runnable, so
         // we can stop looking after we've found that many
-        if (numNowRunnable >= appsNowMaybeRunnable.size()) {
+        if (noLongerPendingApps.size() >= appsNowMaybeRunnable.size()) {
           break;
         }
       }
 
       prev = next;
     }
+    
+    // We remove the apps from their pending lists afterwards so that we don't
+    // pull them out from under the iterator.  If they are not in these lists
+    // in the first place, there is a bug.
+    for (AppSchedulable appSched : noLongerPendingApps) {
+      if (!appSched.getApp().getQueue().getNonRunnableAppSchedulables()
+          .remove(appSched)) {
+        LOG.error("Can't make app runnable that does not already exist in queue"
+            + " as non-runnable: " + appSched + ". This should never happen.");
+      }
+      
+      if (!usersNonRunnableApps.remove(appSched.getApp().getUser(), appSched)) {
+        LOG.error("Waiting app " + appSched + " expected to be in "
+        		+ "usersNonRunnableApps, but was not. This should never happen.");
+      }
+    }
   }
   
   /**
@@ -225,7 +243,7 @@ public class MaxRunningAppsEnforcer {
    * This allows us to pick which list to advance in O(log(num lists)) instead
    * of O(num lists) time.
    */
-  private static class MultiListStartTimeIterator implements
+  static class MultiListStartTimeIterator implements
       Iterator<FSSchedulerApp> {
 
     private List<AppSchedulable>[] appLists;

Modified: hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestMaxRunningAppsEnforcer.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestMaxRunningAppsEnforcer.java?rev=1561998&r1=1561997&r2=1561998&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestMaxRunningAppsEnforcer.java (original)
+++ hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestMaxRunningAppsEnforcer.java Tue Jan 28 08:36:19 2014
@@ -21,6 +21,10 @@ import static org.junit.Assert.assertEqu
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.when;
 
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
 import java.util.Map;
 
 import org.apache.hadoop.conf.Configuration;
@@ -152,4 +156,41 @@ public class TestMaxRunningAppsEnforcer 
     assertEquals(0, leaf2.getNonRunnableAppSchedulables().size());
   }
   
+  @Test
+  public void testMultipleAppsWaitingOnCousinQueue() {
+    FSLeafQueue leaf1 = queueManager.getLeafQueue("root.queue1.subqueue1.leaf1", true);
+    FSLeafQueue leaf2 = queueManager.getLeafQueue("root.queue1.subqueue2.leaf2", true);
+    queueMaxApps.put("root.queue1", 2);
+    FSSchedulerApp app1 = addApp(leaf1, "user");
+    addApp(leaf2, "user");
+    addApp(leaf2, "user");
+    addApp(leaf2, "user");
+    assertEquals(1, leaf1.getRunnableAppSchedulables().size());
+    assertEquals(1, leaf2.getRunnableAppSchedulables().size());
+    assertEquals(2, leaf2.getNonRunnableAppSchedulables().size());
+    removeApp(app1);
+    assertEquals(0, leaf1.getRunnableAppSchedulables().size());
+    assertEquals(2, leaf2.getRunnableAppSchedulables().size());
+    assertEquals(1, leaf2.getNonRunnableAppSchedulables().size());
+  }
+  
+  @Test
+  public void testMultiListStartTimeIteratorEmptyAppLists() {
+    List<List<AppSchedulable>> lists = new ArrayList<List<AppSchedulable>>();
+    lists.add(Arrays.asList(mockAppSched(1)));
+    lists.add(Arrays.asList(mockAppSched(2)));
+    Iterator<FSSchedulerApp> iter =
+        new MaxRunningAppsEnforcer.MultiListStartTimeIterator(lists);
+    assertEquals(1, iter.next().getAppSchedulable().getStartTime());
+    assertEquals(2, iter.next().getAppSchedulable().getStartTime());
+  }
+  
+  private AppSchedulable mockAppSched(long startTime) {
+    AppSchedulable appSched = mock(AppSchedulable.class);
+    when(appSched.getStartTime()).thenReturn(startTime);
+    FSSchedulerApp schedApp = mock(FSSchedulerApp.class);
+    when(schedApp.getAppSchedulable()).thenReturn(appSched);
+    when(appSched.getApp()).thenReturn(schedApp);
+    return appSched;
+  }
 }