You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by sm...@apache.org on 2014/01/02 19:09:44 UTC

git commit: AMBARI 4206. Significant lag between host status update and slave/master component start/stop

Updated Branches:
  refs/heads/branch-1.4.3 3f6e460e4 -> 3c3402a36


AMBARI 4206. Significant lag between host status update and slave/master component start/stop


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/3c3402a3
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/3c3402a3
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/3c3402a3

Branch: refs/heads/branch-1.4.3
Commit: 3c3402a3685fc629b15fd83fc1b005a66b442955
Parents: 3f6e460
Author: Sumit Mohanty <sm...@hortonworks.com>
Authored: Thu Jan 2 10:06:56 2014 -0800
Committer: Sumit Mohanty <sm...@hortonworks.com>
Committed: Thu Jan 2 10:06:56 2014 -0800

----------------------------------------------------------------------
 .../test/python/ambari_agent/TestHardware.py    | 20 ++++--
 .../ambari/server/agent/HeartBeatHandler.java   | 74 ++++++++++++--------
 .../internal/HostResourceProvider.java          | 62 ----------------
 .../ambari/server/state/host/HostImpl.java      | 13 ++--
 .../apache/ambari/server/utils/StageUtils.java  |  2 -
 .../server/agent/TestHeartbeatHandler.java      | 58 ++++++++++++++-
 6 files changed, 122 insertions(+), 107 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/3c3402a3/ambari-agent/src/test/python/ambari_agent/TestHardware.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/test/python/ambari_agent/TestHardware.py b/ambari-agent/src/test/python/ambari_agent/TestHardware.py
index e4937ec..2e82a8a 100644
--- a/ambari-agent/src/test/python/ambari_agent/TestHardware.py
+++ b/ambari-agent/src/test/python/ambari_agent/TestHardware.py
@@ -22,6 +22,8 @@ from unittest import TestCase
 from ambari_agent.Hardware import Hardware
 from mock.mock import patch
 from ambari_agent.Facter import Facter
+import unittest
+import socket
 
 class TestHardware(TestCase):
   def test_build(self):
@@ -75,14 +77,18 @@ class TestHardware(TestCase):
 
     self.assertEquals(result, None)
 
-  @patch.object(Facter, "getFqdn")
-  def test_fqdnDomainHostname(self, facter_getFqdn_mock):
-    facter_getFqdn_mock.return_value = "ambari.apache.org"
+  '''
+  This test validates the current behavior where hostname and fqdn are
+  the same.
+  '''
+  @patch.object(socket, "getfqdn")
+  def test_fqdnDomainHostname(self, socket_getFqdn_mock):
+    socket_getFqdn_mock.return_value = "ambari.apache.org"
     result = Facter().facterInfo()
 
-    self.assertEquals(result['hostname'], "ambari")
-    self.assertEquals(result['domain'], "apache.org")
-    self.assertEquals(result['fqdn'], (result['hostname'] + '.' + result['domain']))
+    self.assertEquals(result['hostname'], "ambari.apache.org")
+    self.assertEquals(result['domain'], "")
+    self.assertEquals(result['fqdn'], (result['hostname'] + result['domain']))
 
   @patch.object(Facter, "setDataUpTimeOutput")
   def test_uptimeSecondsHoursDays(self, facter_setDataUpTimeOutput_mock):
@@ -154,4 +160,6 @@ lo        Link encap:Local Loopback
     self.assertEquals(result['netmask'], '255.255.255.0')
     self.assertEquals(result['interfaces'], 'eth0,eth1,lo')
 
+if __name__ == "__main__":
+  unittest.main()
 

http://git-wip-us.apache.org/repos/asf/ambari/blob/3c3402a3/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java b/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
index 2501f61..b421bff 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
@@ -17,7 +17,6 @@
  */
 package org.apache.ambari.server.agent;
 
-import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -182,7 +181,8 @@ public class HeartBeatHandler {
     // Examine heartbeart for component live status reports
     processStatusReports(heartbeat, hostname, clusterFsm);
     
-    //Calculate host status
+    // Calculate host status
+    // NOTE: This step must be after processing command/status reports
     processHostStatus(heartbeat, hostname);
 
     // Send commands if node is active
@@ -196,44 +196,61 @@ public class HeartBeatHandler {
   protected void processHostStatus(HeartBeat heartbeat, String hostname) throws AmbariException {
 
     Host host = clusterFsm.getHost(hostname);
-
-    
     HealthStatus healthStatus = host.getHealthStatus().getHealthStatus();
-    
+
     if (!healthStatus.equals(HostHealthStatus.HealthStatus.UNKNOWN)) {
 
       List<ComponentStatus> componentStatuses = heartbeat.getComponentStatus();
       //Host status info could be calculated only if agent returned statuses in heartbeat
+      //Or, if a command is executed that can change component status
+      boolean calculateHostStatus = false;
+      String clusterName = null;
       if (componentStatuses.size() > 0) {
+        calculateHostStatus = true;
+        for (ComponentStatus componentStatus : componentStatuses) {
+          clusterName = componentStatus.getClusterName();
+          break;
+        }
+      }
+
+      if (!calculateHostStatus) {
+        List<CommandReport> reports = heartbeat.getReports();
+        for (CommandReport report : reports) {
+          if (RoleCommand.ACTIONEXECUTE.toString().equals(report.getRoleCommand())) {
+            continue;
+          }
+
+          String service = report.getServiceName();
+          if (actionMetadata.getActions(service.toLowerCase()).contains(report.getRole())) {
+            continue;
+          }
+          if (report.getStatus().equals("COMPLETED")) {
+            calculateHostStatus = true;
+            clusterName = report.getClusterName();
+            break;
+          }
+        }
+      }
 
+      if (calculateHostStatus) {
+        //Use actual component status to compute the host status
         int masterCount = 0;
         int mastersRunning = 0;
         int slaveCount = 0;
         int slavesRunning = 0;
 
-        Map<String, StackId> stackIdsByClusters =
-            new HashMap<String, StackId>();
-
-        for (ComponentStatus componentStatus : componentStatuses) {
-
-          String clusterName = componentStatus.getClusterName();
-
-          StackId stackId;
-          if (stackIdsByClusters.containsKey(clusterName)) {
-            stackId = stackIdsByClusters.get(clusterName);
-
-          } else {
-            Cluster cluster = clusterFsm.getCluster(clusterName);
-            stackId = cluster.getDesiredStackVersion();
-            stackIdsByClusters.put(clusterName, stackId);
-          }
+        StackId stackId;
+        Cluster cluster = clusterFsm.getCluster(clusterName);
+        stackId = cluster.getDesiredStackVersion();
 
+        List<ServiceComponentHost> scHosts = cluster.getServiceComponentHosts(heartbeat.getHostname());
+        for (ServiceComponentHost scHost : scHosts) {
           ComponentInfo componentInfo =
               ambariMetaInfo.getComponent(stackId.getStackName(),
-                  stackId.getStackVersion(), componentStatus.getServiceName(),
-                  componentStatus.getComponentName());
+                  stackId.getStackVersion(), scHost.getServiceName(),
+                  scHost.getServiceComponentName());
 
-          String status = componentStatus.getStatus();
+          String status = scHost.getState().name();
 
           String category = componentInfo.getCategory();
 
@@ -249,7 +266,7 @@ public class HeartBeatHandler {
             }
           }
         }
-        
+
         if (masterCount == mastersRunning && slaveCount == slavesRunning) {
           healthStatus = HostHealthStatus.HealthStatus.HEALTHY;
         } else if (masterCount > 0 && mastersRunning < masterCount) {
@@ -257,18 +274,17 @@ public class HeartBeatHandler {
         } else {
           healthStatus = HostHealthStatus.HealthStatus.ALERT;
         }
-        
+
         host.setStatus(healthStatus.name());
         host.persist();
       }
-      
+
       //If host doesn't belongs to any cluster
       if ((clusterFsm.getClustersForHost(host.getHostName())).size() == 0) {
         healthStatus = HostHealthStatus.HealthStatus.HEALTHY;
         host.setStatus(healthStatus.name());
         host.persist();
-      } 
-      
+      }
     }
   }
 

http://git-wip-us.apache.org/repos/asf/ambari/blob/3c3402a3/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/HostResourceProvider.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/HostResourceProvider.java b/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/HostResourceProvider.java
index 0c9c1f6..1759826 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/HostResourceProvider.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/HostResourceProvider.java
@@ -664,66 +664,4 @@ public class HostResourceProvider extends AbstractControllerResourceProvider {
       }
     }
   }
-
-  // calculate the host status, accounting for the state of the host components
-  private String calculateHostStatus(HostResponse response) throws AmbariException {
-    HostHealthStatus.HealthStatus healthStatus = response.getHealthStatus().getHealthStatus();
-
-    if (!healthStatus.equals(HostHealthStatus.HealthStatus.UNKNOWN)) {
-      AmbariManagementController controller     = getManagementController();
-      AmbariMetaInfo             ambariMetaInfo = controller.getAmbariMetaInfo();
-      Clusters                   clusters       = controller.getClusters();
-      String                     clusterName    = response.getClusterName();
-
-      if (clusterName != null && clusterName.length() > 0) {
-        Cluster cluster = clusters.getCluster(clusterName);
-        if (cluster != null) {
-          StackId  stackId = cluster.getDesiredStackVersion();
-
-          ServiceComponentHostRequest request = new ServiceComponentHostRequest(clusterName,
-              null, null, response.getHostname(), null);
-
-          Set<ServiceComponentHostResponse> hostComponentResponses =
-              controller.getHostComponents(Collections.singleton(request));
-
-          int masterCount    = 0;
-          int mastersRunning = 0;
-          int slaveCount     = 0;
-          int slavesRunning  = 0;
-
-          for (ServiceComponentHostResponse hostComponentResponse : hostComponentResponses ) {
-            ComponentInfo componentInfo = ambariMetaInfo.getComponentCategory(stackId.getStackName(),
-                stackId.getStackVersion(), hostComponentResponse.getServiceName(),
-                hostComponentResponse.getComponentName());
-
-            if (componentInfo != null) {
-              String category = componentInfo.getCategory();
-              String state    = hostComponentResponse.getLiveState();
-
-              if (category.equals("MASTER")) {
-                ++masterCount;
-                if (state.equals("STARTED")) {
-                  ++mastersRunning;
-                }
-              } else if (category.equals("SLAVE")) {
-                ++slaveCount;
-                if (state.equals("STARTED")) {
-                  ++slavesRunning;
-                }
-              }
-            }
-          }
-
-          if (masterCount == mastersRunning && slaveCount == slavesRunning) {
-            healthStatus = HostHealthStatus.HealthStatus.HEALTHY;
-          } else if (masterCount > 0 && mastersRunning < masterCount ) {
-            healthStatus = HostHealthStatus.HealthStatus.UNHEALTHY;
-          } else {
-            healthStatus = HostHealthStatus.HealthStatus.ALERT;
-          }
-        }
-      }
-    }
-    return healthStatus.toString();
-  }
 }

http://git-wip-us.apache.org/repos/asf/ambari/blob/3c3402a3/ambari-server/src/main/java/org/apache/ambari/server/state/host/HostImpl.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/state/host/HostImpl.java b/ambari-server/src/main/java/org/apache/ambari/server/state/host/HostImpl.java
index d2ac9c0..78f18fa 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/state/host/HostImpl.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/state/host/HostImpl.java
@@ -1009,12 +1009,13 @@ public class HostImpl implements Host {
 
   @Override
   public void setStatus(String status) {
-    try {
-      writeLock.lock();
-      this.status = status;
-    }
-    finally {
-      writeLock.unlock();
+    if (status != null && !status.equals(this.status)) {
+      try {
+        writeLock.lock();
+        this.status = status;
+      } finally {
+        writeLock.unlock();
+      }
     }
   }
 

http://git-wip-us.apache.org/repos/asf/ambari/blob/3c3402a3/ambari-server/src/main/java/org/apache/ambari/server/utils/StageUtils.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/utils/StageUtils.java b/ambari-server/src/main/java/org/apache/ambari/server/utils/StageUtils.java
index f696357..4ea8fa6 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/utils/StageUtils.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/utils/StageUtils.java
@@ -250,8 +250,6 @@ public class StageUtils {
             //Add index of host to current host role
             hostsForComponentsHost.add(hostIndex);
           }
-          else
-            LOG.warn("Component " + componentName + " doesn't have mapped role name for cluster host info");
         }
       }
     }

http://git-wip-us.apache.org/repos/asf/ambari/blob/3c3402a3/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatHandler.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatHandler.java b/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatHandler.java
index 4b91179..de772f5 100644
--- a/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatHandler.java
+++ b/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatHandler.java
@@ -1317,6 +1317,14 @@ public class TestHeartbeatHandler {
     Cluster cluster = getDummyCluster();
     Host hostObject = clusters.getHost(DummyHostname1);
     clusters.mapHostToCluster(hostObject.getHostName(), cluster.getClusterName());
+    Service hdfs = cluster.addService(HDFS);
+    hdfs.persist();
+    hdfs.addServiceComponent(DATANODE).persist();
+    hdfs.getServiceComponent(DATANODE).addServiceComponentHost(DummyHostname1).persist();
+    hdfs.addServiceComponent(NAMENODE).persist();
+    hdfs.getServiceComponent(NAMENODE).addServiceComponentHost(DummyHostname1).persist();
+    hdfs.getServiceComponent(NAMENODE).getServiceComponentHost(DummyHostname1).setState(State.STARTED);
+    hdfs.getServiceComponent(DATANODE).getServiceComponentHost(DummyHostname1).setState(State.STARTED);
 
     ActionQueue aq = new ActionQueue();
 
@@ -1400,8 +1408,54 @@ public class TestHeartbeatHandler {
     hb3.setComponentStatus(componentStatus);
     handler.handleHeartBeat(hb3);
     assertEquals(HostHealthStatus.HealthStatus.UNHEALTHY.name(), hostObject.getStatus());
-    
-    
+
+    //All are up
+    hb1.setResponseId(3);
+    handler.handleHeartBeat(hb1);
+    assertEquals(HostHealthStatus.HealthStatus.HEALTHY.name(), hostObject.getStatus());
+
+    //Only one component reported status
+    hdfs.getServiceComponent(NAMENODE).getServiceComponentHost(DummyHostname1).setState(State.INSTALLED);
+    HeartBeat hb4 = new HeartBeat();
+    hb4.setResponseId(4);
+    hb4.setNodeStatus(new HostStatus(Status.HEALTHY, DummyHostStatus));
+    hb4.setHostname(DummyHostname1);
+    componentStatus = new ArrayList<ComponentStatus>();
+    dataNodeStatus = new ComponentStatus();
+    dataNodeStatus.setClusterName(cluster.getClusterName());
+    dataNodeStatus.setServiceName(HDFS);
+    dataNodeStatus.setComponentName(DATANODE);
+    dataNodeStatus.setStatus("STARTED");
+    componentStatus.add(dataNodeStatus);
+    hb4.setComponentStatus(componentStatus);
+    handler.handleHeartBeat(hb4);
+    assertEquals(HostHealthStatus.HealthStatus.UNHEALTHY.name(), hostObject.getStatus());
+
+    hb1.setResponseId(5);
+    handler.handleHeartBeat(hb1);
+    assertEquals(HostHealthStatus.HealthStatus.HEALTHY.name(), hostObject.getStatus());
+
+    //Some command reports
+    HeartBeat hb5 = new HeartBeat();
+    hb5.setResponseId(6);
+    hb5.setNodeStatus(new HostStatus(Status.HEALTHY, DummyHostStatus));
+    hb5.setHostname(DummyHostname1);
+    CommandReport cr1 = new CommandReport();
+    cr1.setActionId(StageUtils.getActionId(requestId, stageId));
+    cr1.setServiceName(HDFS);
+    cr1.setTaskId(1);
+    cr1.setRole(DATANODE);
+    cr1.setStatus("COMPLETED");
+    cr1.setStdErr("");
+    cr1.setStdOut("");
+    cr1.setExitCode(215);
+    cr1.setRoleCommand("STOP");
+    cr1.setClusterName(DummyCluster);
+    ArrayList<CommandReport> reports = new ArrayList<CommandReport>();
+    reports.add(cr1);
+    hb5.setReports(reports);
+    handler.handleHeartBeat(hb5);
+    assertEquals(HostHealthStatus.HealthStatus.ALERT.name(), hostObject.getStatus());
   }
 
   private ActionManager getMockActionManager() {