You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by sm...@apache.org on 2014/01/02 19:09:44 UTC
git commit: AMBARI 4206. Significant lag between host status update
and slave/master component start/stop
Updated Branches:
refs/heads/branch-1.4.3 3f6e460e4 -> 3c3402a36
AMBARI 4206. Significant lag between host status update and slave/master component start/stop
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/3c3402a3
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/3c3402a3
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/3c3402a3
Branch: refs/heads/branch-1.4.3
Commit: 3c3402a3685fc629b15fd83fc1b005a66b442955
Parents: 3f6e460
Author: Sumit Mohanty <sm...@hortonworks.com>
Authored: Thu Jan 2 10:06:56 2014 -0800
Committer: Sumit Mohanty <sm...@hortonworks.com>
Committed: Thu Jan 2 10:06:56 2014 -0800
----------------------------------------------------------------------
.../test/python/ambari_agent/TestHardware.py | 20 ++++--
.../ambari/server/agent/HeartBeatHandler.java | 74 ++++++++++++--------
.../internal/HostResourceProvider.java | 62 ----------------
.../ambari/server/state/host/HostImpl.java | 13 ++--
.../apache/ambari/server/utils/StageUtils.java | 2 -
.../server/agent/TestHeartbeatHandler.java | 58 ++++++++++++++-
6 files changed, 122 insertions(+), 107 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/ambari/blob/3c3402a3/ambari-agent/src/test/python/ambari_agent/TestHardware.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/test/python/ambari_agent/TestHardware.py b/ambari-agent/src/test/python/ambari_agent/TestHardware.py
index e4937ec..2e82a8a 100644
--- a/ambari-agent/src/test/python/ambari_agent/TestHardware.py
+++ b/ambari-agent/src/test/python/ambari_agent/TestHardware.py
@@ -22,6 +22,8 @@ from unittest import TestCase
from ambari_agent.Hardware import Hardware
from mock.mock import patch
from ambari_agent.Facter import Facter
+import unittest
+import socket
class TestHardware(TestCase):
def test_build(self):
@@ -75,14 +77,18 @@ class TestHardware(TestCase):
self.assertEquals(result, None)
- @patch.object(Facter, "getFqdn")
- def test_fqdnDomainHostname(self, facter_getFqdn_mock):
- facter_getFqdn_mock.return_value = "ambari.apache.org"
+ '''
+ This test validates the current behavior where hostname and fqdn are
+ the same.
+ '''
+ @patch.object(socket, "getfqdn")
+ def test_fqdnDomainHostname(self, socket_getFqdn_mock):
+ socket_getFqdn_mock.return_value = "ambari.apache.org"
result = Facter().facterInfo()
- self.assertEquals(result['hostname'], "ambari")
- self.assertEquals(result['domain'], "apache.org")
- self.assertEquals(result['fqdn'], (result['hostname'] + '.' + result['domain']))
+ self.assertEquals(result['hostname'], "ambari.apache.org")
+ self.assertEquals(result['domain'], "")
+ self.assertEquals(result['fqdn'], (result['hostname'] + result['domain']))
@patch.object(Facter, "setDataUpTimeOutput")
def test_uptimeSecondsHoursDays(self, facter_setDataUpTimeOutput_mock):
@@ -154,4 +160,6 @@ lo Link encap:Local Loopback
self.assertEquals(result['netmask'], '255.255.255.0')
self.assertEquals(result['interfaces'], 'eth0,eth1,lo')
+if __name__ == "__main__":
+ unittest.main()
http://git-wip-us.apache.org/repos/asf/ambari/blob/3c3402a3/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java b/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
index 2501f61..b421bff 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
@@ -17,7 +17,6 @@
*/
package org.apache.ambari.server.agent;
-import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
@@ -182,7 +181,8 @@ public class HeartBeatHandler {
// Examine heartbeart for component live status reports
processStatusReports(heartbeat, hostname, clusterFsm);
- //Calculate host status
+ // Calculate host status
+ // NOTE: This step must be after processing command/status reports
processHostStatus(heartbeat, hostname);
// Send commands if node is active
@@ -196,44 +196,61 @@ public class HeartBeatHandler {
protected void processHostStatus(HeartBeat heartbeat, String hostname) throws AmbariException {
Host host = clusterFsm.getHost(hostname);
-
-
HealthStatus healthStatus = host.getHealthStatus().getHealthStatus();
-
+
if (!healthStatus.equals(HostHealthStatus.HealthStatus.UNKNOWN)) {
List<ComponentStatus> componentStatuses = heartbeat.getComponentStatus();
//Host status info could be calculated only if agent returned statuses in heartbeat
+ //Or, if a command is executed that can change component status
+ boolean calculateHostStatus = false;
+ String clusterName = null;
if (componentStatuses.size() > 0) {
+ calculateHostStatus = true;
+ for (ComponentStatus componentStatus : componentStatuses) {
+ clusterName = componentStatus.getClusterName();
+ break;
+ }
+ }
+
+ if (!calculateHostStatus) {
+ List<CommandReport> reports = heartbeat.getReports();
+ for (CommandReport report : reports) {
+ if (RoleCommand.ACTIONEXECUTE.toString().equals(report.getRoleCommand())) {
+ continue;
+ }
+
+ String service = report.getServiceName();
+ if (actionMetadata.getActions(service.toLowerCase()).contains(report.getRole())) {
+ continue;
+ }
+ if (report.getStatus().equals("COMPLETED")) {
+ calculateHostStatus = true;
+ clusterName = report.getClusterName();
+ break;
+ }
+ }
+ }
+ if (calculateHostStatus) {
+ //Use actual component status to compute the host status
int masterCount = 0;
int mastersRunning = 0;
int slaveCount = 0;
int slavesRunning = 0;
- Map<String, StackId> stackIdsByClusters =
- new HashMap<String, StackId>();
-
- for (ComponentStatus componentStatus : componentStatuses) {
-
- String clusterName = componentStatus.getClusterName();
-
- StackId stackId;
- if (stackIdsByClusters.containsKey(clusterName)) {
- stackId = stackIdsByClusters.get(clusterName);
-
- } else {
- Cluster cluster = clusterFsm.getCluster(clusterName);
- stackId = cluster.getDesiredStackVersion();
- stackIdsByClusters.put(clusterName, stackId);
- }
+ StackId stackId;
+ Cluster cluster = clusterFsm.getCluster(clusterName);
+ stackId = cluster.getDesiredStackVersion();
+ List<ServiceComponentHost> scHosts = cluster.getServiceComponentHosts(heartbeat.getHostname());
+ for (ServiceComponentHost scHost : scHosts) {
ComponentInfo componentInfo =
ambariMetaInfo.getComponent(stackId.getStackName(),
- stackId.getStackVersion(), componentStatus.getServiceName(),
- componentStatus.getComponentName());
+ stackId.getStackVersion(), scHost.getServiceName(),
+ scHost.getServiceComponentName());
- String status = componentStatus.getStatus();
+ String status = scHost.getState().name();
String category = componentInfo.getCategory();
@@ -249,7 +266,7 @@ public class HeartBeatHandler {
}
}
}
-
+
if (masterCount == mastersRunning && slaveCount == slavesRunning) {
healthStatus = HostHealthStatus.HealthStatus.HEALTHY;
} else if (masterCount > 0 && mastersRunning < masterCount) {
@@ -257,18 +274,17 @@ public class HeartBeatHandler {
} else {
healthStatus = HostHealthStatus.HealthStatus.ALERT;
}
-
+
host.setStatus(healthStatus.name());
host.persist();
}
-
+
//If host doesn't belongs to any cluster
if ((clusterFsm.getClustersForHost(host.getHostName())).size() == 0) {
healthStatus = HostHealthStatus.HealthStatus.HEALTHY;
host.setStatus(healthStatus.name());
host.persist();
- }
-
+ }
}
}
http://git-wip-us.apache.org/repos/asf/ambari/blob/3c3402a3/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/HostResourceProvider.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/HostResourceProvider.java b/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/HostResourceProvider.java
index 0c9c1f6..1759826 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/HostResourceProvider.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/HostResourceProvider.java
@@ -664,66 +664,4 @@ public class HostResourceProvider extends AbstractControllerResourceProvider {
}
}
}
-
- // calculate the host status, accounting for the state of the host components
- private String calculateHostStatus(HostResponse response) throws AmbariException {
- HostHealthStatus.HealthStatus healthStatus = response.getHealthStatus().getHealthStatus();
-
- if (!healthStatus.equals(HostHealthStatus.HealthStatus.UNKNOWN)) {
- AmbariManagementController controller = getManagementController();
- AmbariMetaInfo ambariMetaInfo = controller.getAmbariMetaInfo();
- Clusters clusters = controller.getClusters();
- String clusterName = response.getClusterName();
-
- if (clusterName != null && clusterName.length() > 0) {
- Cluster cluster = clusters.getCluster(clusterName);
- if (cluster != null) {
- StackId stackId = cluster.getDesiredStackVersion();
-
- ServiceComponentHostRequest request = new ServiceComponentHostRequest(clusterName,
- null, null, response.getHostname(), null);
-
- Set<ServiceComponentHostResponse> hostComponentResponses =
- controller.getHostComponents(Collections.singleton(request));
-
- int masterCount = 0;
- int mastersRunning = 0;
- int slaveCount = 0;
- int slavesRunning = 0;
-
- for (ServiceComponentHostResponse hostComponentResponse : hostComponentResponses ) {
- ComponentInfo componentInfo = ambariMetaInfo.getComponentCategory(stackId.getStackName(),
- stackId.getStackVersion(), hostComponentResponse.getServiceName(),
- hostComponentResponse.getComponentName());
-
- if (componentInfo != null) {
- String category = componentInfo.getCategory();
- String state = hostComponentResponse.getLiveState();
-
- if (category.equals("MASTER")) {
- ++masterCount;
- if (state.equals("STARTED")) {
- ++mastersRunning;
- }
- } else if (category.equals("SLAVE")) {
- ++slaveCount;
- if (state.equals("STARTED")) {
- ++slavesRunning;
- }
- }
- }
- }
-
- if (masterCount == mastersRunning && slaveCount == slavesRunning) {
- healthStatus = HostHealthStatus.HealthStatus.HEALTHY;
- } else if (masterCount > 0 && mastersRunning < masterCount ) {
- healthStatus = HostHealthStatus.HealthStatus.UNHEALTHY;
- } else {
- healthStatus = HostHealthStatus.HealthStatus.ALERT;
- }
- }
- }
- }
- return healthStatus.toString();
- }
}
http://git-wip-us.apache.org/repos/asf/ambari/blob/3c3402a3/ambari-server/src/main/java/org/apache/ambari/server/state/host/HostImpl.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/state/host/HostImpl.java b/ambari-server/src/main/java/org/apache/ambari/server/state/host/HostImpl.java
index d2ac9c0..78f18fa 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/state/host/HostImpl.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/state/host/HostImpl.java
@@ -1009,12 +1009,13 @@ public class HostImpl implements Host {
@Override
public void setStatus(String status) {
- try {
- writeLock.lock();
- this.status = status;
- }
- finally {
- writeLock.unlock();
+ if (status != null && !status.equals(this.status)) {
+ try {
+ writeLock.lock();
+ this.status = status;
+ } finally {
+ writeLock.unlock();
+ }
}
}
http://git-wip-us.apache.org/repos/asf/ambari/blob/3c3402a3/ambari-server/src/main/java/org/apache/ambari/server/utils/StageUtils.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/utils/StageUtils.java b/ambari-server/src/main/java/org/apache/ambari/server/utils/StageUtils.java
index f696357..4ea8fa6 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/utils/StageUtils.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/utils/StageUtils.java
@@ -250,8 +250,6 @@ public class StageUtils {
//Add index of host to current host role
hostsForComponentsHost.add(hostIndex);
}
- else
- LOG.warn("Component " + componentName + " doesn't have mapped role name for cluster host info");
}
}
}
http://git-wip-us.apache.org/repos/asf/ambari/blob/3c3402a3/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatHandler.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatHandler.java b/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatHandler.java
index 4b91179..de772f5 100644
--- a/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatHandler.java
+++ b/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatHandler.java
@@ -1317,6 +1317,14 @@ public class TestHeartbeatHandler {
Cluster cluster = getDummyCluster();
Host hostObject = clusters.getHost(DummyHostname1);
clusters.mapHostToCluster(hostObject.getHostName(), cluster.getClusterName());
+ Service hdfs = cluster.addService(HDFS);
+ hdfs.persist();
+ hdfs.addServiceComponent(DATANODE).persist();
+ hdfs.getServiceComponent(DATANODE).addServiceComponentHost(DummyHostname1).persist();
+ hdfs.addServiceComponent(NAMENODE).persist();
+ hdfs.getServiceComponent(NAMENODE).addServiceComponentHost(DummyHostname1).persist();
+ hdfs.getServiceComponent(NAMENODE).getServiceComponentHost(DummyHostname1).setState(State.STARTED);
+ hdfs.getServiceComponent(DATANODE).getServiceComponentHost(DummyHostname1).setState(State.STARTED);
ActionQueue aq = new ActionQueue();
@@ -1400,8 +1408,54 @@ public class TestHeartbeatHandler {
hb3.setComponentStatus(componentStatus);
handler.handleHeartBeat(hb3);
assertEquals(HostHealthStatus.HealthStatus.UNHEALTHY.name(), hostObject.getStatus());
-
-
+
+ //All are up
+ hb1.setResponseId(3);
+ handler.handleHeartBeat(hb1);
+ assertEquals(HostHealthStatus.HealthStatus.HEALTHY.name(), hostObject.getStatus());
+
+ //Only one component reported status
+ hdfs.getServiceComponent(NAMENODE).getServiceComponentHost(DummyHostname1).setState(State.INSTALLED);
+ HeartBeat hb4 = new HeartBeat();
+ hb4.setResponseId(4);
+ hb4.setNodeStatus(new HostStatus(Status.HEALTHY, DummyHostStatus));
+ hb4.setHostname(DummyHostname1);
+ componentStatus = new ArrayList<ComponentStatus>();
+ dataNodeStatus = new ComponentStatus();
+ dataNodeStatus.setClusterName(cluster.getClusterName());
+ dataNodeStatus.setServiceName(HDFS);
+ dataNodeStatus.setComponentName(DATANODE);
+ dataNodeStatus.setStatus("STARTED");
+ componentStatus.add(dataNodeStatus);
+ hb4.setComponentStatus(componentStatus);
+ handler.handleHeartBeat(hb4);
+ assertEquals(HostHealthStatus.HealthStatus.UNHEALTHY.name(), hostObject.getStatus());
+
+ hb1.setResponseId(5);
+ handler.handleHeartBeat(hb1);
+ assertEquals(HostHealthStatus.HealthStatus.HEALTHY.name(), hostObject.getStatus());
+
+ //Some command reports
+ HeartBeat hb5 = new HeartBeat();
+ hb5.setResponseId(6);
+ hb5.setNodeStatus(new HostStatus(Status.HEALTHY, DummyHostStatus));
+ hb5.setHostname(DummyHostname1);
+ CommandReport cr1 = new CommandReport();
+ cr1.setActionId(StageUtils.getActionId(requestId, stageId));
+ cr1.setServiceName(HDFS);
+ cr1.setTaskId(1);
+ cr1.setRole(DATANODE);
+ cr1.setStatus("COMPLETED");
+ cr1.setStdErr("");
+ cr1.setStdOut("");
+ cr1.setExitCode(215);
+ cr1.setRoleCommand("STOP");
+ cr1.setClusterName(DummyCluster);
+ ArrayList<CommandReport> reports = new ArrayList<CommandReport>();
+ reports.add(cr1);
+ hb5.setReports(reports);
+ handler.handleHeartBeat(hb5);
+ assertEquals(HostHealthStatus.HealthStatus.ALERT.name(), hostObject.getStatus());
}
private ActionManager getMockActionManager() {