You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by vb...@apache.org on 2014/12/08 19:23:20 UTC

ambari git commit: AMBARI-8587. Ambari UI hangs for more than 20 minutes during installation.(vbrodetskyi)

Repository: ambari
Updated Branches:
  refs/heads/trunk 4a42b6d5b -> 3ba1889f4


AMBARI-8587. Ambari UI hangs for more than 20 minutes during installation.(vbrodetskyi)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/3ba1889f
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/3ba1889f
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/3ba1889f

Branch: refs/heads/trunk
Commit: 3ba1889f454ffcd14352954ef30ad0393b02dc4a
Parents: 4a42b6d
Author: Vitaly Brodetskyi <vb...@hortonworks.com>
Authored: Mon Dec 8 20:21:02 2014 +0200
Committer: Vitaly Brodetskyi <vb...@hortonworks.com>
Committed: Mon Dec 8 20:23:03 2014 +0200

----------------------------------------------------------------------
 .../server/actionmanager/ActionScheduler.java   | 38 +++++++++++++++++---
 .../ambari/server/agent/HeartBeatHandler.java   |  4 +++
 .../actionmanager/TestActionScheduler.java      | 24 +++++++++----
 3 files changed, 55 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/3ba1889f/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionScheduler.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionScheduler.java b/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionScheduler.java
index c23440e..b3dcca4 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionScheduler.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionScheduler.java
@@ -465,10 +465,6 @@ class ActionScheduler implements Runnable {
     // Map to track role status
     Map<String, RoleStats> roleStats = initRoleStats(s);
     long now = System.currentTimeMillis();
-    long taskTimeout = actionTimeout;
-    if (taskTimeoutAdjustment) {
-      taskTimeout = actionTimeout + s.getStageTimeout();
-    }
 
     Cluster cluster = null;
     if (null != s.getClusterName()) {
@@ -522,6 +518,20 @@ class ActionScheduler implements Runnable {
           }
         }
 
+        //basic timeout for stage
+        long commandTimeout = actionTimeout;
+        if (taskTimeoutAdjustment) {
+          Map<String, String> commandParams = c.getCommandParams();
+          String timeoutKey = ExecutionCommand.KeyNames.COMMAND_TIMEOUT;
+          if (commandParams != null && commandParams.containsKey(timeoutKey)) {
+            String timeoutStr = commandParams.get(timeoutKey);
+            commandTimeout += Long.parseLong(timeoutStr) * 1000; // Converting to milliseconds
+          } else {
+            LOG.error("Execution command has no timeout parameter" +
+                    c.toString());
+          }
+        }
+
         // Check that service host component is not deleted
         if (hostDeleted) {
 
@@ -537,7 +547,7 @@ class ActionScheduler implements Runnable {
           // We don't need to send CANCEL_COMMANDs in this case
           db.abortHostRole(host, s.getRequestId(), s.getStageId(), c.getRole(), message);
           status = HostRoleStatus.ABORTED;
-        } else if (timeOutActionNeeded(status, s, hostObj, roleStr, now, taskTimeout)) {
+        } else if (timeOutActionNeeded(status, s, hostObj, roleStr, now, commandTimeout)) {
           // Process command timeouts
           LOG.info("Host:" + host + ", role:" + roleStr + ", actionId:" + s.getActionId() + " timed out");
           if (s.getAttemptCount(host, roleStr) >= maxAttempts) {
@@ -677,6 +687,11 @@ class ActionScheduler implements Runnable {
       LOG.debug("Timing out action since agent is not heartbeating.");
       return true;
     }
+    // If we have other command in progress for this stage do not timeout this one
+    if (hasCommandInProgress(stage, host.getHostName())
+            && !status.equals(HostRoleStatus.IN_PROGRESS)) {
+      return false;
+    }
     if (currentTime > stage.getLastAttemptTime(host.getHostName(), role)
         + taskTimeout) {
       return true;
@@ -684,6 +699,19 @@ class ActionScheduler implements Runnable {
     return false;
   }
 
+  private boolean hasCommandInProgress(Stage stage, String host) {
+    List<ExecutionCommandWrapper> commandWrappers = stage.getExecutionCommands(host);
+    for (ExecutionCommandWrapper wrapper : commandWrappers) {
+      ExecutionCommand c = wrapper.getExecutionCommand();
+      String roleStr = c.getRole();
+      HostRoleStatus status = stage.getHostRoleStatus(host, roleStr);
+      if (status == HostRoleStatus.IN_PROGRESS) {
+        return true;
+      }
+    }
+    return false;
+  }
+
   private ListMultimap<String, ServiceComponentHostEvent> formEventMap(Stage s, List<ExecutionCommand> commands) {
     ListMultimap<String, ServiceComponentHostEvent> serviceEventMap = ArrayListMultimap.create();
     for (ExecutionCommand cmd : commands) {

http://git-wip-us.apache.org/repos/asf/ambari/blob/3ba1889f/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java b/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
index b32a252..d482109 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
@@ -387,6 +387,10 @@ public class HeartBeatHandler {
       if (hostRoleCommand.getStatus() == HostRoleStatus.ABORTED) {
         continue;
       }
+      if (hostRoleCommand.getStatus() == HostRoleStatus.QUEUED &&
+              report.getStatus().equals("IN_PROGRESS")) {
+        hostRoleCommand.setStartTime(now);
+      }
       //pass custom STAR, STOP and RESTART
       if (RoleCommand.ACTIONEXECUTE.toString().equals(report.getRoleCommand()) ||
          (RoleCommand.CUSTOM_COMMAND.toString().equals(report.getRoleCommand()) &&

http://git-wip-us.apache.org/repos/asf/ambari/blob/3ba1889f/ambari-server/src/test/java/org/apache/ambari/server/actionmanager/TestActionScheduler.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/java/org/apache/ambari/server/actionmanager/TestActionScheduler.java b/ambari-server/src/test/java/org/apache/ambari/server/actionmanager/TestActionScheduler.java
index 0640c34..e61c1a9 100644
--- a/ambari-server/src/test/java/org/apache/ambari/server/actionmanager/TestActionScheduler.java
+++ b/ambari-server/src/test/java/org/apache/ambari/server/actionmanager/TestActionScheduler.java
@@ -17,9 +17,7 @@
  */
 package org.apache.ambari.server.actionmanager;
 
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.*;
 import static org.mockito.Matchers.any;
 import static org.mockito.Matchers.anyLong;
 import static org.mockito.Matchers.anyString;
@@ -211,6 +209,10 @@ public class TestActionScheduler {
     List<Stage> stages = new ArrayList<Stage>();
     final Stage s = StageUtils.getATestStage(1, 977, hostname, CLUSTER_HOST_INFO,
       "{\"host_param\":\"param_value\"}", "{\"stage_param\":\"param_value\"}");
+    s.addHostRoleExecutionCommand(hostname, Role.SECONDARY_NAMENODE, RoleCommand.INSTALL,
+            new ServiceComponentHostInstallEvent("SECONDARY_NAMENODE", hostname, System.currentTimeMillis(), "HDP-1.2.0"),
+            "cluster1", "HDFS");
+    s.setHostRoleStatus(hostname, "SECONDARY_NAMENODE", HostRoleStatus.IN_PROGRESS);
     stages.add(s);
 
     ActionDBAccessor db = mock(ActionDBAccessor.class);
@@ -238,12 +240,22 @@ public class TestActionScheduler {
     // Start the thread
 
     int cycleCount = 0;
-    while (!stages.get(0).getHostRoleStatus(hostname, "NAMENODE")
+    scheduler.doWork();
+    //Check that in_progress command is rescheduled
+    assertEquals(HostRoleStatus.QUEUED, stages.get(0).getHostRoleStatus(hostname, "SECONDARY_NAMENODE"));
+
+    //Switch command back to IN_PROGRESS status and check that other command is not rescheduled
+    stages.get(0).setHostRoleStatus(hostname, "SECONDARY_NAMENODE", HostRoleStatus.IN_PROGRESS);
+    scheduler.doWork();
+    assertEquals(1, stages.get(0).getAttemptCount(hostname, "NAMENODE"));
+    assertEquals(2, stages.get(0).getAttemptCount(hostname, "SECONDARY_NAMENODE"));
+
+    while (!stages.get(0).getHostRoleStatus(hostname, "SECONDARY_NAMENODE")
         .equals(HostRoleStatus.TIMEDOUT) && cycleCount++ <= MAX_CYCLE_ITERATIONS) {
       scheduler.doWork();
     }
-    assertEquals(stages.get(0).getHostRoleStatus(hostname, "NAMENODE"),
-        HostRoleStatus.TIMEDOUT);
+    assertEquals(HostRoleStatus.TIMEDOUT,
+            stages.get(0).getHostRoleStatus(hostname, "SECONDARY_NAMENODE"));
 
     verify(db, times(1)).startRequest(eq(1L));
     verify(db, times(1)).abortOperation(1L);