You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by vb...@apache.org on 2014/12/08 19:23:20 UTC
ambari git commit: AMBARI-8587. Ambari UI hangs for more than 20
minutes during installation.(vbrodetskyi)
Repository: ambari
Updated Branches:
refs/heads/trunk 4a42b6d5b -> 3ba1889f4
AMBARI-8587. Ambari UI hangs for more than 20 minutes during installation.(vbrodetskyi)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/3ba1889f
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/3ba1889f
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/3ba1889f
Branch: refs/heads/trunk
Commit: 3ba1889f454ffcd14352954ef30ad0393b02dc4a
Parents: 4a42b6d
Author: Vitaly Brodetskyi <vb...@hortonworks.com>
Authored: Mon Dec 8 20:21:02 2014 +0200
Committer: Vitaly Brodetskyi <vb...@hortonworks.com>
Committed: Mon Dec 8 20:23:03 2014 +0200
----------------------------------------------------------------------
.../server/actionmanager/ActionScheduler.java | 38 +++++++++++++++++---
.../ambari/server/agent/HeartBeatHandler.java | 4 +++
.../actionmanager/TestActionScheduler.java | 24 +++++++++----
3 files changed, 55 insertions(+), 11 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/ambari/blob/3ba1889f/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionScheduler.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionScheduler.java b/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionScheduler.java
index c23440e..b3dcca4 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionScheduler.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionScheduler.java
@@ -465,10 +465,6 @@ class ActionScheduler implements Runnable {
// Map to track role status
Map<String, RoleStats> roleStats = initRoleStats(s);
long now = System.currentTimeMillis();
- long taskTimeout = actionTimeout;
- if (taskTimeoutAdjustment) {
- taskTimeout = actionTimeout + s.getStageTimeout();
- }
Cluster cluster = null;
if (null != s.getClusterName()) {
@@ -522,6 +518,20 @@ class ActionScheduler implements Runnable {
}
}
+ //basic timeout for stage
+ long commandTimeout = actionTimeout;
+ if (taskTimeoutAdjustment) {
+ Map<String, String> commandParams = c.getCommandParams();
+ String timeoutKey = ExecutionCommand.KeyNames.COMMAND_TIMEOUT;
+ if (commandParams != null && commandParams.containsKey(timeoutKey)) {
+ String timeoutStr = commandParams.get(timeoutKey);
+ commandTimeout += Long.parseLong(timeoutStr) * 1000; // Converting to milliseconds
+ } else {
+ LOG.error("Execution command has no timeout parameter" +
+ c.toString());
+ }
+ }
+
// Check that service host component is not deleted
if (hostDeleted) {
@@ -537,7 +547,7 @@ class ActionScheduler implements Runnable {
// We don't need to send CANCEL_COMMANDs in this case
db.abortHostRole(host, s.getRequestId(), s.getStageId(), c.getRole(), message);
status = HostRoleStatus.ABORTED;
- } else if (timeOutActionNeeded(status, s, hostObj, roleStr, now, taskTimeout)) {
+ } else if (timeOutActionNeeded(status, s, hostObj, roleStr, now, commandTimeout)) {
// Process command timeouts
LOG.info("Host:" + host + ", role:" + roleStr + ", actionId:" + s.getActionId() + " timed out");
if (s.getAttemptCount(host, roleStr) >= maxAttempts) {
@@ -677,6 +687,11 @@ class ActionScheduler implements Runnable {
LOG.debug("Timing out action since agent is not heartbeating.");
return true;
}
+ // If we have other command in progress for this stage do not timeout this one
+ if (hasCommandInProgress(stage, host.getHostName())
+ && !status.equals(HostRoleStatus.IN_PROGRESS)) {
+ return false;
+ }
if (currentTime > stage.getLastAttemptTime(host.getHostName(), role)
+ taskTimeout) {
return true;
@@ -684,6 +699,19 @@ class ActionScheduler implements Runnable {
return false;
}
+ private boolean hasCommandInProgress(Stage stage, String host) {
+ List<ExecutionCommandWrapper> commandWrappers = stage.getExecutionCommands(host);
+ for (ExecutionCommandWrapper wrapper : commandWrappers) {
+ ExecutionCommand c = wrapper.getExecutionCommand();
+ String roleStr = c.getRole();
+ HostRoleStatus status = stage.getHostRoleStatus(host, roleStr);
+ if (status == HostRoleStatus.IN_PROGRESS) {
+ return true;
+ }
+ }
+ return false;
+ }
+
private ListMultimap<String, ServiceComponentHostEvent> formEventMap(Stage s, List<ExecutionCommand> commands) {
ListMultimap<String, ServiceComponentHostEvent> serviceEventMap = ArrayListMultimap.create();
for (ExecutionCommand cmd : commands) {
http://git-wip-us.apache.org/repos/asf/ambari/blob/3ba1889f/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java b/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
index b32a252..d482109 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
@@ -387,6 +387,10 @@ public class HeartBeatHandler {
if (hostRoleCommand.getStatus() == HostRoleStatus.ABORTED) {
continue;
}
+ if (hostRoleCommand.getStatus() == HostRoleStatus.QUEUED &&
+ report.getStatus().equals("IN_PROGRESS")) {
+ hostRoleCommand.setStartTime(now);
+ }
//pass custom STAR, STOP and RESTART
if (RoleCommand.ACTIONEXECUTE.toString().equals(report.getRoleCommand()) ||
(RoleCommand.CUSTOM_COMMAND.toString().equals(report.getRoleCommand()) &&
http://git-wip-us.apache.org/repos/asf/ambari/blob/3ba1889f/ambari-server/src/test/java/org/apache/ambari/server/actionmanager/TestActionScheduler.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/java/org/apache/ambari/server/actionmanager/TestActionScheduler.java b/ambari-server/src/test/java/org/apache/ambari/server/actionmanager/TestActionScheduler.java
index 0640c34..e61c1a9 100644
--- a/ambari-server/src/test/java/org/apache/ambari/server/actionmanager/TestActionScheduler.java
+++ b/ambari-server/src/test/java/org/apache/ambari/server/actionmanager/TestActionScheduler.java
@@ -17,9 +17,7 @@
*/
package org.apache.ambari.server.actionmanager;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.*;
import static org.mockito.Matchers.any;
import static org.mockito.Matchers.anyLong;
import static org.mockito.Matchers.anyString;
@@ -211,6 +209,10 @@ public class TestActionScheduler {
List<Stage> stages = new ArrayList<Stage>();
final Stage s = StageUtils.getATestStage(1, 977, hostname, CLUSTER_HOST_INFO,
"{\"host_param\":\"param_value\"}", "{\"stage_param\":\"param_value\"}");
+ s.addHostRoleExecutionCommand(hostname, Role.SECONDARY_NAMENODE, RoleCommand.INSTALL,
+ new ServiceComponentHostInstallEvent("SECONDARY_NAMENODE", hostname, System.currentTimeMillis(), "HDP-1.2.0"),
+ "cluster1", "HDFS");
+ s.setHostRoleStatus(hostname, "SECONDARY_NAMENODE", HostRoleStatus.IN_PROGRESS);
stages.add(s);
ActionDBAccessor db = mock(ActionDBAccessor.class);
@@ -238,12 +240,22 @@ public class TestActionScheduler {
// Start the thread
int cycleCount = 0;
- while (!stages.get(0).getHostRoleStatus(hostname, "NAMENODE")
+ scheduler.doWork();
+ //Check that in_progress command is rescheduled
+ assertEquals(HostRoleStatus.QUEUED, stages.get(0).getHostRoleStatus(hostname, "SECONDARY_NAMENODE"));
+
+ //Switch command back to IN_PROGRESS status and check that other command is not rescheduled
+ stages.get(0).setHostRoleStatus(hostname, "SECONDARY_NAMENODE", HostRoleStatus.IN_PROGRESS);
+ scheduler.doWork();
+ assertEquals(1, stages.get(0).getAttemptCount(hostname, "NAMENODE"));
+ assertEquals(2, stages.get(0).getAttemptCount(hostname, "SECONDARY_NAMENODE"));
+
+ while (!stages.get(0).getHostRoleStatus(hostname, "SECONDARY_NAMENODE")
.equals(HostRoleStatus.TIMEDOUT) && cycleCount++ <= MAX_CYCLE_ITERATIONS) {
scheduler.doWork();
}
- assertEquals(stages.get(0).getHostRoleStatus(hostname, "NAMENODE"),
- HostRoleStatus.TIMEDOUT);
+ assertEquals(HostRoleStatus.TIMEDOUT,
+ stages.get(0).getHostRoleStatus(hostname, "SECONDARY_NAMENODE"));
verify(db, times(1)).startRequest(eq(1L));
verify(db, times(1)).abortOperation(1L);