You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by iw...@apache.org on 2021/01/09 12:20:31 UTC

[hadoop] 02/03: YARN-10536. Client in distributedShell swallows interrupt exceptions (#2554)

This is an automated email from the ASF dual-hosted git repository.

iwasakims pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/hadoop.git

commit a40eabd7174efb30fc60b184b611a20c11dfbd45
Author: Ahmed Hussein <50...@users.noreply.github.com>
AuthorDate: Thu Dec 17 18:13:28 2020 -0500

    YARN-10536. Client in distributedShell swallows interrupt exceptions (#2554)
    
    
    (cherry picked from commit 7a88f45366722932211514a9ce0c13492a0bd576)
---
 .../yarn/applications/distributedshell/Client.java | 53 ++++++++++-------
 .../distributedshell/TestDistributedShell.java     | 66 ++++++++++++++++++++--
 2 files changed, 92 insertions(+), 27 deletions(-)

diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java
index 7262b80..d7114d0 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java
@@ -143,6 +143,9 @@ public class Client {
   private static final int DEFAULT_AM_VCORES = 1;
   private static final int DEFAULT_CONTAINER_MEMORY = 10;
   private static final int DEFAULT_CONTAINER_VCORES = 1;
+
+  // check the application once per second.
+  private static final int APP_MONITOR_INTERVAL = 1000;
   
   // Configuration
   private Configuration conf;
@@ -209,7 +212,7 @@ public class Client {
   private String rollingFilesPattern = "";
 
   // Start time for client
-  private final long clientStartTime = System.currentTimeMillis();
+  private long clientStartTime = System.currentTimeMillis();
   // Timeout threshold for client. Kill app after time interval expires.
   private long clientTimeout = 600000;
 
@@ -670,6 +673,8 @@ public class Client {
 
     LOG.info("Running Client");
     yarnClient.start();
+    // set the client start time.
+    clientStartTime = System.currentTimeMillis();
 
     YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics();
     LOG.info("Got Cluster metric info from ASM" 
@@ -983,7 +988,6 @@ public class Client {
     if (keepContainers) {
       vargs.add("--keep_containers_across_application_attempts");
     }
-
     for (Map.Entry<String, String> entry : shellEnv.entrySet()) {
       vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue());
     }
@@ -1110,13 +1114,17 @@ public class Client {
   private boolean monitorApplication(ApplicationId appId)
       throws YarnException, IOException {
 
+    boolean res = false;
+    boolean needForceKill = false;
     while (true) {
-
       // Check app status every 1 second.
       try {
-        Thread.sleep(1000);
+        Thread.sleep(APP_MONITOR_INTERVAL);
       } catch (InterruptedException e) {
-        LOG.debug("Thread sleep in monitoring loop interrupted");
+        LOG.warn("Thread sleep in monitoring loop interrupted");
+        // if the application is to be killed when client times out;
+        // then set needForceKill to true
+        break;
       }
 
       // Get application report for the appId we are interested in 
@@ -1139,22 +1147,20 @@ public class Client {
       FinalApplicationStatus dsStatus = report.getFinalApplicationStatus();
       if (YarnApplicationState.FINISHED == state) {
         if (FinalApplicationStatus.SUCCEEDED == dsStatus) {
-          LOG.info("Application has completed successfully. Breaking monitoring loop");
-          return true;        
-        }
-        else {
-          LOG.info("Application did finished unsuccessfully."
-              + " YarnState=" + state.toString() + ", DSFinalStatus=" + dsStatus.toString()
-              + ". Breaking monitoring loop");
-          return false;
+          LOG.info("Application has completed successfully. "
+                  + "Breaking monitoring loop");
+          res = true;
+        } else {
+          LOG.info("Application did finished unsuccessfully. "
+                  + "YarnState={}, DSFinalStatus={}. Breaking monitoring loop",
+              state, dsStatus);
         }
-      }
-      else if (YarnApplicationState.KILLED == state
+        break;
+      } else if (YarnApplicationState.KILLED == state
           || YarnApplicationState.FAILED == state) {
-        LOG.info("Application did not finish."
-            + " YarnState=" + state.toString() + ", DSFinalStatus=" + dsStatus.toString()
-            + ". Breaking monitoring loop");
-        return false;
+        LOG.info("Application did not finish. YarnState={}, DSFinalStatus={}. "
+                + "Breaking monitoring loop", state, dsStatus);
+        break;
       }
 
       // The value equal or less than 0 means no timeout
@@ -1162,11 +1168,16 @@ public class Client {
           && System.currentTimeMillis() > (clientStartTime + clientTimeout)) {
         LOG.info("Reached client specified timeout for application. " +
             "Killing application");
-        forceKillApplication(appId);
-        return false;
+        needForceKill = true;
+        break;
       }
     }
 
+    if (needForceKill) {
+      forceKillApplication(appId);
+    }
+
+    return res;
   }
 
   /**
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java
index 8634f79..f778785 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java
@@ -107,6 +107,7 @@ import org.junit.Before;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.TemporaryFolder;
+import org.junit.rules.TestName;
 import org.junit.rules.Timeout;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -139,6 +140,13 @@ public class TestDistributedShell {
   @Rule
   public TemporaryFolder tmpFolder = new TemporaryFolder();
 
+  @Rule
+  public TestName name = new TestName();
+
+  private String generateAppName() {
+    return name.getMethodName().replaceFirst("test", "");
+  }
+
   @Before
   public void setup() throws Exception {
     setupInternal(NUM_NMS, timelineVersionWatcher.getTimelineVersion());
@@ -737,6 +745,8 @@ public class TestDistributedShell {
   @Test
   public void testDSRestartWithPreviousRunningContainers() throws Exception {
     String[] args = {
+        "--appname",
+        generateAppName(),
         "--jar",
         APPMASTER_JAR,
         "--num_containers",
@@ -772,6 +782,8 @@ public class TestDistributedShell {
   @Test
   public void testDSAttemptFailuresValidityIntervalSucess() throws Exception {
     String[] args = {
+        "--appname",
+        generateAppName(),
         "--jar",
         APPMASTER_JAR,
         "--num_containers",
@@ -810,6 +822,8 @@ public class TestDistributedShell {
   @Test
   public void testDSAttemptFailuresValidityIntervalFailed() throws Exception {
     String[] args = {
+        "--appname",
+        generateAppName(),
         "--jar",
         APPMASTER_JAR,
         "--num_containers",
@@ -857,6 +871,8 @@ public class TestDistributedShell {
     fileWriter.write("log4j.rootLogger=debug,stdout");
     fileWriter.close();
     String[] args = {
+        "--appname",
+        generateAppName(),
         "--jar",
         APPMASTER_JAR,
         "--num_containers",
@@ -906,6 +922,8 @@ public class TestDistributedShell {
   public void testSpecifyingLogAggregationContext() throws Exception {
     String regex = ".*(foo|bar)\\d";
     String[] args = {
+        "--appname",
+        generateAppName(),
         "--jar",
         APPMASTER_JAR,
         "--shell_command",
@@ -928,6 +946,8 @@ public class TestDistributedShell {
   public void testDSShellWithCommands() throws Exception {
 
     String[] args = {
+        "--appname",
+        generateAppName(),
         "--jar",
         APPMASTER_JAR,
         "--num_containers",
@@ -960,6 +980,8 @@ public class TestDistributedShell {
   @Test
   public void testDSShellWithMultipleArgs() throws Exception {
     String[] args = {
+        "--appname",
+        generateAppName(),
         "--jar",
         APPMASTER_JAR,
         "--num_containers",
@@ -1010,6 +1032,8 @@ public class TestDistributedShell {
     fileWriter.close();
     System.out.println(customShellScript.getAbsolutePath());
     String[] args = {
+        "--appname",
+        generateAppName(),
         "--jar",
         APPMASTER_JAR,
         "--num_containers",
@@ -1055,6 +1079,8 @@ public class TestDistributedShell {
     LOG.info("Initializing DS Client with no jar file");
     try {
       String[] args = {
+          "--appname",
+          generateAppName(),
           "--num_containers",
           "2",
           "--shell_command",
@@ -1263,6 +1289,8 @@ public class TestDistributedShell {
   @Test
   public void testContainerLaunchFailureHandling() throws Exception {
     String[] args = {
+        "--appname",
+        generateAppName(),
       "--jar",
       APPMASTER_JAR,
       "--num_containers",
@@ -1291,6 +1319,8 @@ public class TestDistributedShell {
   @Test
   public void testDebugFlag() throws Exception {
     String[] args = {
+        "--appname",
+        generateAppName(),
         "--jar",
         APPMASTER_JAR,
         "--num_containers",
@@ -1388,14 +1418,18 @@ public class TestDistributedShell {
 
   @Test
   public void testDistributedShellResourceProfiles() throws Exception {
+    String appName = generateAppName();
     String[][] args = {
-        {"--jar", APPMASTER_JAR, "--num_containers", "1", "--shell_command",
+        {"--appname", appName + "-0", "--jar", APPMASTER_JAR,
+            "--num_containers", "1", "--shell_command",
             Shell.WINDOWS ? "dir" : "ls", "--container_resource_profile",
             "maximum" },
-        {"--jar", APPMASTER_JAR, "--num_containers", "1", "--shell_command",
+        {"--appname", appName + "-1", "--jar", APPMASTER_JAR,
+            "--num_containers", "1", "--shell_command",
             Shell.WINDOWS ? "dir" : "ls", "--master_resource_profile",
             "default" },
-        {"--jar", APPMASTER_JAR, "--num_containers", "1", "--shell_command",
+        {"--appname", appName + "-2", "--jar", APPMASTER_JAR,
+            "--num_containers", "1", "--shell_command",
             Shell.WINDOWS ? "dir" : "ls", "--master_resource_profile",
             "default", "--container_resource_profile", "maximum" }
         };
@@ -1419,6 +1453,8 @@ public class TestDistributedShell {
     Client client = new Client(new Configuration(yarnCluster.getConfig()));
     try {
       String[] args = {
+          "--appname",
+          generateAppName(),
           "--jar",
           APPMASTER_JAR,
           "--num_containers",
@@ -1449,6 +1485,8 @@ public class TestDistributedShell {
     Client client = new Client(new Configuration(yarnCluster.getConfig()));
     try {
       String[] args = {
+          "--appname",
+          generateAppName(),
           "--jar",
           APPMASTER_JAR,
           "--num_containers",
@@ -1569,6 +1607,8 @@ public class TestDistributedShell {
     }
 
     String[] args = {
+        "--appname",
+        generateAppName(),
         "--jar",
         APPMASTER_JAR,
         "--num_containers",
@@ -1650,6 +1690,8 @@ public class TestDistributedShell {
   public void testDistributedShellAMResourcesWithIllegalArguments()
       throws Exception {
     String[] args = {
+        "--appname",
+        generateAppName(),
         "--jar",
         APPMASTER_JAR,
         "--num_containers",
@@ -1667,6 +1709,8 @@ public class TestDistributedShell {
   public void testDistributedShellAMResourcesWithMissingArgumentValue()
       throws Exception {
     String[] args = {
+        "--appname",
+        generateAppName(),
         "--jar",
         APPMASTER_JAR,
         "--num_containers",
@@ -1683,6 +1727,8 @@ public class TestDistributedShell {
   public void testDistributedShellAMResourcesWithUnknownResource()
       throws Exception {
     String[] args = {
+        "--appname",
+        generateAppName(),
         "--jar",
         APPMASTER_JAR,
         "--num_containers",
@@ -1701,6 +1747,8 @@ public class TestDistributedShell {
   public void testDistributedShellNonExistentQueue()
       throws Exception {
     String[] args = {
+        "--appname",
+        generateAppName(),
         "--jar",
         APPMASTER_JAR,
         "--num_containers",
@@ -1719,6 +1767,8 @@ public class TestDistributedShell {
   public void testDistributedShellWithSingleFileLocalization()
       throws Exception {
     String[] args = {
+        "--appname",
+        generateAppName(),
         "--jar",
         APPMASTER_JAR,
         "--num_containers",
@@ -1740,6 +1790,8 @@ public class TestDistributedShell {
   public void testDistributedShellWithMultiFileLocalization()
       throws Exception {
     String[] args = {
+        "--appname",
+        generateAppName(),
         "--jar",
         APPMASTER_JAR,
         "--num_containers",
@@ -1761,6 +1813,8 @@ public class TestDistributedShell {
   public void testDistributedShellWithNonExistentFileLocalization()
       throws Exception {
     String[] args = {
+        "--appname",
+        generateAppName(),
         "--jar",
         APPMASTER_JAR,
         "--num_containers",
@@ -1784,14 +1838,14 @@ public class TestDistributedShell {
       throws Exception {
     String appName = "DistributedShellCleanup";
     String[] args = {
+        "--appname",
+        generateAppName(),
         "--jar",
         APPMASTER_JAR,
         "--num_containers",
         "1",
         "--shell_command",
-        Shell.WINDOWS ? "dir" : "ls",
-        "--appname",
-        appName
+        Shell.WINDOWS ? "dir" : "ls"
     };
     Configuration config = new Configuration(yarnCluster.getConfig());
     Client client = new Client(config);


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org