You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by sw...@apache.org on 2014/02/28 07:02:36 UTC
[1/2] git commit: AMBARI-4878. On a retry all the install commands
succeed but the status of one of the components in INSTALL_FAILED which
causes the next START command to fail on UI. (swagle)
Repository: ambari
Updated Branches:
refs/heads/trunk 450022aee -> 595b721b2
AMBARI-4878. On a retry all the install commands succeed but the status of one of the components in INSTALL_FAILED which causes the next START command to fail on UI. (swagle)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/044a29f9
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/044a29f9
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/044a29f9
Branch: refs/heads/trunk
Commit: 044a29f9b1dd70969355edfa09c17f814927ec49
Parents: 450022a
Author: Siddharth Wagle <sw...@hortonworks.com>
Authored: Thu Feb 27 21:31:31 2014 -0800
Committer: Siddharth Wagle <sw...@hortonworks.com>
Committed: Thu Feb 27 21:38:23 2014 -0800
----------------------------------------------------------------------
.../server/actionmanager/ActionManager.java | 15 +++
.../ambari/server/agent/HeartBeatHandler.java | 9 +-
.../server/agent/TestHeartbeatHandler.java | 105 ++++++++++++++++++-
3 files changed, 126 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/ambari/blob/044a29f9/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionManager.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionManager.java b/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionManager.java
index 789bbd6..7b9a922 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionManager.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionManager.java
@@ -149,6 +149,21 @@ public class ActionManager {
db.updateHostRoleStates(reportsToProcess);
}
+ /**
+ * Find if the command report is for an in progress command
+ * @param report
+ * @return
+ */
+ public boolean isInProgressCommand(CommandReport report) {
+ HostRoleCommand command = db.getTask(report.getTaskId());
+ if (command == null) {
+ LOG.warn("The task " + report.getTaskId() + " is invalid");
+ return false;
+ }
+ return command.getStatus().equals(HostRoleStatus.IN_PROGRESS)
+ || command.getStatus().equals(HostRoleStatus.QUEUED);
+ }
+
public void handleLostHost(String host) {
//Do nothing, the task will timeout anyway.
//The actions can be failed faster as an optimization
http://git-wip-us.apache.org/repos/asf/ambari/blob/044a29f9/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java b/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
index 0044c8e..0d4cfe1 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
@@ -345,8 +345,13 @@ public class HeartBeatHandler {
} else if (report.getStatus().equals("FAILED")) {
LOG.warn("Operation failed - may be retried. Service component host: "
+ schName + ", host: " + hostname + " Action id" + report.getActionId());
- scHost.handleEvent(new ServiceComponentHostOpFailedEvent(schName,
- hostname, now));
+ if (actionManager.isInProgressCommand(report)) {
+ scHost.handleEvent(new ServiceComponentHostOpFailedEvent
+ (schName, hostname, now));
+ } else {
+ LOG.info("Report arrived after command is no longer running. " +
+ "Ignoring report. " + report);
+ }
} else if (report.getStatus().equals("IN_PROGRESS")) {
scHost.handleEvent(new ServiceComponentHostOpInProgressEvent(schName,
hostname, now));
http://git-wip-us.apache.org/repos/asf/ambari/blob/044a29f9/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatHandler.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatHandler.java b/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatHandler.java
index 490df5e..6014bbf 100644
--- a/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatHandler.java
+++ b/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatHandler.java
@@ -77,7 +77,9 @@ import org.apache.ambari.server.state.ServiceComponentHost;
import org.apache.ambari.server.state.StackId;
import org.apache.ambari.server.state.State;
import org.apache.ambari.server.state.fsm.InvalidStateTransitionException;
+import org.apache.ambari.server.state.svccomphost.ServiceComponentHostInstallEvent;
import org.apache.ambari.server.state.svccomphost.ServiceComponentHostStartEvent;
+import org.apache.ambari.server.state.svccomphost.ServiceComponentHostUpgradeEvent;
import org.apache.ambari.server.utils.StageUtils;
import org.codehaus.jackson.JsonGenerationException;
import org.junit.After;
@@ -761,7 +763,6 @@ public class TestHeartbeatHandler {
getServiceComponent(DATANODE).getServiceComponentHost(DummyHostname1);
serviceComponentHost1.setState(State.INSTALLING);
-
HeartBeat hb = new HeartBeat();
hb.setTimestamp(System.currentTimeMillis());
hb.setResponseId(0);
@@ -788,6 +789,71 @@ public class TestHeartbeatHandler {
assertEquals("Host state should still be installing", State.INSTALLING, componentState1);
}
+ @Test
+ public void testOPFailedEventForAbortedTask() throws AmbariException, InvalidStateTransitionException {
+ ActionManager am = getMockActionManager();
+ Cluster cluster = getDummyCluster();
+
+ @SuppressWarnings("serial")
+ Set<String> hostNames = new HashSet<String>(){{
+ add(DummyHostname1);
+ }};
+ clusters.mapHostsToCluster(hostNames, DummyCluster);
+ Service hdfs = cluster.addService(HDFS);
+ hdfs.persist();
+ hdfs.addServiceComponent(DATANODE).persist();
+ hdfs.getServiceComponent(DATANODE).addServiceComponentHost(DummyHostname1).persist();
+ hdfs.addServiceComponent(NAMENODE).persist();
+ hdfs.getServiceComponent(NAMENODE).addServiceComponentHost(DummyHostname1).persist();
+ hdfs.addServiceComponent(SECONDARY_NAMENODE).persist();
+ hdfs.getServiceComponent(SECONDARY_NAMENODE).addServiceComponentHost(DummyHostname1).persist();
+
+ ActionQueue aq = new ActionQueue();
+ HeartBeatHandler handler = getHeartBeatHandler(am, aq);
+
+ ServiceComponentHost serviceComponentHost1 = clusters.getCluster(DummyCluster).getService(HDFS).
+ getServiceComponent(DATANODE).getServiceComponentHost(DummyHostname1);
+ serviceComponentHost1.setState(State.INSTALLING);
+
+ Stage s = new Stage(1, "/a/b", "cluster1", "action manager test",
+ "clusterHostInfo");
+ s.setStageId(1);
+ s.addHostRoleExecutionCommand(DummyHostname1, Role.DATANODE, RoleCommand.INSTALL,
+ new ServiceComponentHostInstallEvent(Role.DATANODE.toString(),
+ DummyHostname1, System.currentTimeMillis(), "HDP-1.3.0"),
+ DummyCluster, "HDFS");
+ List<Stage> stages = new ArrayList<Stage>();
+ stages.add(s);
+ Request request = new Request(stages, clusters);
+ actionDBAccessor.persistActions(request);
+ actionDBAccessor.abortHostRole(DummyHostname1, 1, 1, Role.DATANODE.name());
+
+ HeartBeat hb = new HeartBeat();
+ hb.setTimestamp(System.currentTimeMillis());
+ hb.setResponseId(0);
+ hb.setHostname(DummyHostname1);
+ hb.setNodeStatus(new HostStatus(Status.HEALTHY, DummyHostStatus));
+
+ List<CommandReport> reports = new ArrayList<CommandReport>();
+ CommandReport cr = new CommandReport();
+ cr.setActionId(StageUtils.getActionId(1, 1));
+ cr.setTaskId(1);
+ cr.setClusterName(DummyCluster);
+ cr.setServiceName(HDFS);
+ cr.setRole(DATANODE);
+ cr.setStatus("FAILED");
+ cr.setStdErr("none");
+ cr.setStdOut("dummy output");
+ cr.setExitCode(777);
+ reports.add(cr);
+ hb.setReports(reports);
+ hb.setComponentStatus(new ArrayList<ComponentStatus>());
+ handler.handleHeartBeat(hb);
+ State componentState1 = serviceComponentHost1.getState();
+ assertEquals("Host state should still be installing", State.INSTALLING,
+ componentState1);
+ }
+
/**
* Tests the fact that when START and STOP commands are in progress, and heartbeat
* forces the host component state to STARTED or INSTALLED, there are no undesired
@@ -1265,6 +1331,37 @@ public class TestHeartbeatHandler {
serviceComponentHost1.setDesiredStackVersion(stack130);
serviceComponentHost2.setStackVersion(stack122);
+ Stage s = new Stage(requestId, "/a/b", "cluster1", "action manager test",
+ "clusterHostInfo");
+ s.setStageId(stageId);
+ s.addHostRoleExecutionCommand(DummyHostname1, Role.DATANODE, RoleCommand.UPGRADE,
+ new ServiceComponentHostUpgradeEvent(Role.DATANODE.toString(),
+ DummyHostname1, System.currentTimeMillis(), "HDP-1.3.0"),
+ DummyCluster, "HDFS");
+ s.addHostRoleExecutionCommand(DummyHostname1, Role.NAMENODE, RoleCommand.INSTALL,
+ new ServiceComponentHostInstallEvent(Role.NAMENODE.toString(),
+ DummyHostname1, System.currentTimeMillis(), "HDP-1.3.0"),
+ DummyCluster, "HDFS");
+ List<Stage> stages = new ArrayList<Stage>();
+ stages.add(s);
+ Request request = new Request(stages, clusters);
+ actionDBAccessor.persistActions(request);
+ CommandReport cr = new CommandReport();
+ cr.setActionId(StageUtils.getActionId(requestId, stageId));
+ cr.setTaskId(1);
+ cr.setClusterName(DummyCluster);
+ cr.setServiceName(HDFS);
+ cr.setRole(DATANODE);
+ cr.setStatus(HostRoleStatus.IN_PROGRESS.toString());
+ cr.setStdErr("none");
+ cr.setStdOut("dummy output");
+ actionDBAccessor.updateHostRoleState(DummyHostname1, requestId, stageId,
+ Role.DATANODE.name(), cr);
+ cr.setRole(NAMENODE);
+ cr.setTaskId(2);
+ actionDBAccessor.updateHostRoleState(DummyHostname1, requestId, stageId,
+ Role.NAMENODE.name(), cr);
+
HeartBeat hb = new HeartBeat();
hb.setTimestamp(System.currentTimeMillis());
hb.setResponseId(0);
@@ -1281,6 +1378,9 @@ public class TestHeartbeatHandler {
cr1.setStdOut("dummy output");
cr1.setExitCode(0);
+// actionDBAccessor.updateHostRoleState(DummyHostname1, requestId, stageId,
+// Role.DATANODE.name(), cr1);
+
CommandReport cr2 = new CommandReport();
cr2.setActionId(StageUtils.getActionId(requestId, stageId));
cr2.setTaskId(2);
@@ -1296,6 +1396,9 @@ public class TestHeartbeatHandler {
reports.add(cr2);
hb.setReports(reports);
+// actionDBAccessor.updateHostRoleState(DummyHostname1, requestId, stageId,
+// Role.NAMENODE.name(), cr2);
+
ActionQueue aq = new ActionQueue();
HeartBeatHandler handler = getHeartBeatHandler(am, aq);
handler.handleHeartBeat(hb);
[2/2] git commit: AMBARI-4878. On a retry all the install commands
succeed but the status of one of the components in INSTALL_FAILED which
causes the next START command to fail on UI. Fixed log. (swagle)
Posted by sw...@apache.org.
AMBARI-4878. On a retry all the install commands succeed but the status of one of the components in INSTALL_FAILED which causes the next START command to fail on UI. Fixed log. (swagle)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/595b721b
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/595b721b
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/595b721b
Branch: refs/heads/trunk
Commit: 595b721b2226eac4ed240ff931441786018eb27a
Parents: 044a29f
Author: Siddharth Wagle <sw...@hortonworks.com>
Authored: Thu Feb 27 22:01:59 2014 -0800
Committer: Siddharth Wagle <sw...@hortonworks.com>
Committed: Thu Feb 27 22:01:59 2014 -0800
----------------------------------------------------------------------
.../java/org/apache/ambari/server/agent/HeartBeatHandler.java | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/ambari/blob/595b721b/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java b/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
index 0d4cfe1..4666a29 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
@@ -349,8 +349,7 @@ public class HeartBeatHandler {
scHost.handleEvent(new ServiceComponentHostOpFailedEvent
(schName, hostname, now));
} else {
- LOG.info("Report arrived after command is no longer running. " +
- "Ignoring report. " + report);
+ LOG.info("Received report for a command that is no longer active. " + report);
}
} else if (report.getStatus().equals("IN_PROGRESS")) {
scHost.handleEvent(new ServiceComponentHostOpInProgressEvent(schName,