You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by nc...@apache.org on 2013/04/22 15:44:55 UTC

svn commit: r1470505 - in /incubator/ambari/trunk: ./ ambari-server/src/main/java/org/apache/ambari/server/agent/ ambari-server/src/main/java/org/apache/ambari/server/state/ ambari-server/src/test/java/org/apache/ambari/server/agent/

Author: ncole
Date: Mon Apr 22 13:44:54 2013
New Revision: 1470505

URL: http://svn.apache.org/r1470505
Log:
AMBARI-1976. When host expires, mark each component for host with unknown state

Modified:
    incubator/ambari/trunk/CHANGES.txt
    incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
    incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartbeatMonitor.java
    incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/state/State.java
    incubator/ambari/trunk/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatMonitor.java

Modified: incubator/ambari/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/incubator/ambari/trunk/CHANGES.txt?rev=1470505&r1=1470504&r2=1470505&view=diff
==============================================================================
--- incubator/ambari/trunk/CHANGES.txt (original)
+++ incubator/ambari/trunk/CHANGES.txt Mon Apr 22 13:44:54 2013
@@ -1219,6 +1219,8 @@ Trunk (unreleased changes):
  AMBARI-1657. User directories on HDFS do not get created with custom names 
  provided from Ambari UI. (swagle)
 
+ AMBARI-1976. When host expires, update each component for host with unknown state. (ncole)
+
  AMBARI-1980. Fix for nagios_alerts element when there is an error. (ncole)
 
  AMBARI-1865. Fix for upgrade script to copy configurations. (ncole)

Modified: incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
URL: http://svn.apache.org/viewvc/incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java?rev=1470505&r1=1470504&r2=1470505&view=diff
==============================================================================
--- incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java (original)
+++ incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java Mon Apr 22 13:44:54 2013
@@ -264,7 +264,8 @@ public class HeartBeatHandler {
               if (prevState.equals(State.INSTALLED)
                   || prevState.equals(State.STARTED)
                   || prevState.equals(State.STARTING)
-                  || prevState.equals(State.STOPPING)) {
+                  || prevState.equals(State.STOPPING)
+                  || prevState.equals(State.UNKNOWN)) {
                 scHost.setState(liveState);
                 if (!prevState.equals(liveState)) {
                   LOG.info("State of service component " + componentName

Modified: incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartbeatMonitor.java
URL: http://svn.apache.org/viewvc/incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartbeatMonitor.java?rev=1470505&r1=1470504&r2=1470505&view=diff
==============================================================================
--- incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartbeatMonitor.java (original)
+++ incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartbeatMonitor.java Mon Apr 22 13:44:54 2013
@@ -29,7 +29,10 @@ import org.apache.ambari.server.state.Cl
 import org.apache.ambari.server.state.Config;
 import org.apache.ambari.server.state.Host;
 import org.apache.ambari.server.state.HostState;
+import org.apache.ambari.server.state.Service;
+import org.apache.ambari.server.state.ServiceComponent;
 import org.apache.ambari.server.state.ServiceComponentHost;
+import org.apache.ambari.server.state.State;
 import org.apache.ambari.server.state.fsm.InvalidStateTransitionException;
 import org.apache.ambari.server.state.host.HostHeartbeatLostEvent;
 import org.apache.ambari.server.state.svccomphost.HBaseMasterPortScanner;
@@ -118,7 +121,21 @@ public class HeartbeatMonitor implements
         LOG.warn("Hearbeat lost from host "+host);
         //Heartbeat is expired
         hostObj.handleEvent(new HostHeartbeatLostEvent(host));
+        
+        // mark all components that are not clients with unknown status
+        for (Cluster cluster : fsm.getClustersForHost(hostObj.getHostName())) {
+          for (ServiceComponentHost sch : cluster.getServiceComponentHosts(hostObj.getHostName())) {
+            Service s = cluster.getService(sch.getServiceName());
+            ServiceComponent sc = s.getServiceComponent(sch.getServiceComponentName());
+            if (!sc.isClientComponent()) {
+              sch.setState(State.UNKNOWN);
+            }
+          }
+        }
+        
+        // hbase
         if(hostState != hostObj.getState() && scanner != null) scanner.updateHBaseMaster(hostObj);
+        
         //Purge action queue
         actionQueue.dequeueAll(host);
         //notify action manager
@@ -178,7 +195,7 @@ public class HeartbeatMonitor implements
         statusCmd.setClusterName(cl.getClusterName());
         statusCmd.setServiceName(serviceName);
         statusCmd.setComponentName(sch.getServiceComponentName());
-        statusCmd.setConfigurations(configurations);			
+        statusCmd.setConfigurations(configurations);
         cmds.add(statusCmd);
       }
     }

Modified: incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/state/State.java
URL: http://svn.apache.org/viewvc/incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/state/State.java?rev=1470505&r1=1470504&r2=1470505&view=diff
==============================================================================
--- incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/state/State.java (original)
+++ incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/state/State.java Mon Apr 22 13:44:54 2013
@@ -66,7 +66,11 @@ public enum State {
   /**
    * Disabled master's backup state
    */
-  MAINTENANCE(11);
+  MAINTENANCE(11),
+  /**
+   * State could not be determined.
+   */
+  UNKNOWN(12);
 
   private final int state;
 

Modified: incubator/ambari/trunk/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatMonitor.java
URL: http://svn.apache.org/viewvc/incubator/ambari/trunk/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatMonitor.java?rev=1470505&r1=1470504&r2=1470505&view=diff
==============================================================================
--- incubator/ambari/trunk/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatMonitor.java (original)
+++ incubator/ambari/trunk/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatMonitor.java Mon Apr 22 13:44:54 2013
@@ -17,8 +17,18 @@
  */
 package org.apache.ambari.server.agent;
 
-import static org.junit.Assert.*;
-import static org.mockito.Mockito.*;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+import static org.mockito.Matchers.eq;
+import static org.mockito.Mockito.atLeast;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.verify;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
 
 import org.apache.ambari.server.AmbariException;
 import org.apache.ambari.server.Role;
@@ -28,25 +38,24 @@ import org.apache.ambari.server.orm.Guic
 import org.apache.ambari.server.orm.InMemoryDefaultTestModule;
 import org.apache.ambari.server.state.Cluster;
 import org.apache.ambari.server.state.Clusters;
+import org.apache.ambari.server.state.Host;
 import org.apache.ambari.server.state.HostState;
 import org.apache.ambari.server.state.Service;
+import org.apache.ambari.server.state.ServiceComponent;
+import org.apache.ambari.server.state.ServiceComponentHost;
 import org.apache.ambari.server.state.StackId;
-import org.apache.ambari.server.state.cluster.ClustersImpl;
+import org.apache.ambari.server.state.State;
 import org.apache.ambari.server.state.fsm.InvalidStateTransitionException;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
-
-import com.google.inject.Guice;
-import com.google.inject.Injector;
-import com.google.inject.persist.PersistService;
 import org.mockito.ArgumentCaptor;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
+import com.google.inject.Guice;
+import com.google.inject.Injector;
+import com.google.inject.persist.PersistService;
 
 public class TestHeartbeatMonitor {
 
@@ -244,4 +253,125 @@ public class TestHeartbeatMonitor {
     }
     assertEquals(fsm.getHost(hostname).getState(), HostState.HEARTBEAT_LOST);
   }
+  
+  @Test
+  public void testHeartbeatLossWithComponent() throws AmbariException, InterruptedException,
+          InvalidStateTransitionException {
+    
+    Clusters clusters = injector.getInstance(Clusters.class);
+    clusters.addHost(hostname1);
+    clusters.getHost(hostname1).setOsType("centos5");
+    clusters.getHost(hostname1).persist();
+    clusters.addCluster(clusterName);
+    Cluster cluster = clusters.getCluster(clusterName);
+    cluster.setDesiredStackVersion(new StackId("HDP-0.1"));
+
+    Set<String> hostNames = new HashSet<String>(){{
+      add(hostname1);
+     }};
+
+    clusters.mapHostsToCluster(hostNames, clusterName);
+
+    Service hdfs = cluster.addService(serviceName);
+    hdfs.persist();
+    hdfs.addServiceComponent(Role.DATANODE.name()).persist();
+    hdfs.getServiceComponent(Role.DATANODE.name()).addServiceComponentHost(hostname1).persist();
+    hdfs.addServiceComponent(Role.NAMENODE.name()).persist();
+    hdfs.getServiceComponent(Role.NAMENODE.name()).addServiceComponentHost(hostname1).persist();
+    hdfs.addServiceComponent(Role.SECONDARY_NAMENODE.name()).persist();
+    hdfs.getServiceComponent(Role.SECONDARY_NAMENODE.name()).addServiceComponentHost(hostname1).persist();
+    hdfs.addServiceComponent(Role.HDFS_CLIENT.name()).persist();
+    hdfs.getServiceComponent(Role.HDFS_CLIENT.name()).addServiceComponentHost(hostname1);
+    
+    ActionQueue aq = new ActionQueue();
+    ActionManager am = mock(ActionManager.class);
+    HeartbeatMonitor hm = new HeartbeatMonitor(clusters, aq, am, 10);
+    HeartBeatHandler handler = new HeartBeatHandler(clusters, aq, am, injector);
+    
+    Register reg = new Register();
+    reg.setHostname(hostname1);
+    reg.setResponseId(12);
+    reg.setTimestamp(System.currentTimeMillis() - 300);
+    reg.setAgentVersion(ambariMetaInfo.getServerVersion());
+    
+    HostInfo hi = new HostInfo();
+    hi.setOS("Centos5");
+    reg.setHardwareProfile(hi);
+    handler.handleRegistration(reg);
+    
+    HeartBeat hb = new HeartBeat();
+    hb.setHostname(hostname1);
+    hb.setNodeStatus(new HostStatus(HostStatus.Status.HEALTHY, "cool"));
+    hb.setTimestamp(System.currentTimeMillis());
+    hb.setResponseId(12);
+    handler.handleHeartBeat(hb);
+    hm.start();
+    aq.enqueue(hostname1, new ExecutionCommand());
+    //Heartbeat will expire and action queue will be flushed
+    while (aq.size(hostname1) != 0) {
+      Thread.sleep(1);
+    }
+
+    cluster = clusters.getClustersForHost(hostname1).iterator().next();
+    for (ServiceComponentHost sch : cluster.getServiceComponentHosts(hostname1)) {
+      Service s = cluster.getService(sch.getServiceName());
+      ServiceComponent sc = s.getServiceComponent(sch.getServiceComponentName());
+      if (!sc.isClientComponent())
+        assertEquals(State.UNKNOWN, sch.getState());
+      else
+        assertEquals(State.INIT, sch.getState());
+    }
+
+    // don't keep marking the host as down
+    hm.shutdown();
+    
+    // try to flip statuses back
+    hb = new HeartBeat();
+    hb.setHostname(hostname1);
+    hb.setNodeStatus(new HostStatus(HostStatus.Status.HEALTHY, "cool"));
+    hb.setTimestamp(System.currentTimeMillis());
+    hb.setResponseId(0);
+    
+    List<ComponentStatus> statuses = new ArrayList<ComponentStatus>();
+    ComponentStatus cs = new ComponentStatus();
+    cs.setClusterName(clusterName);
+    cs.setServiceName(Service.Type.HDFS.name());
+    cs.setStatus(State.STARTED.name());
+    cs.setComponentName(Role.DATANODE.name());
+    statuses.add(cs);
+
+    cs = new ComponentStatus();
+    cs.setClusterName(clusterName);
+    cs.setServiceName(Service.Type.HDFS.name());
+    cs.setStatus(State.STARTED.name());
+    cs.setComponentName(Role.NAMENODE.name());
+    statuses.add(cs);
+    
+    cs = new ComponentStatus();
+    cs.setClusterName(clusterName);
+    cs.setServiceName(Service.Type.HDFS.name());
+    cs.setStatus(State.STARTED.name());
+    cs.setComponentName(Role.SECONDARY_NAMENODE.name());
+    statuses.add(cs);
+    
+    hb.setComponentStatus(statuses);
+    
+    Host host = clusters.getHost(hostname1);
+    host.setState(HostState.HEALTHY);
+    hb.setTimestamp(System.currentTimeMillis() + Integer.MAX_VALUE);
+    host.setLastHeartbeatTime(System.currentTimeMillis() + Integer.MAX_VALUE);
+    handler.handleHeartBeat(hb);
+    
+    cluster = clusters.getClustersForHost(hostname1).iterator().next();
+    for (ServiceComponentHost sch : cluster.getServiceComponentHosts(hostname1)) {
+      Service s = cluster.getService(sch.getServiceName());
+      ServiceComponent sc = s.getServiceComponent(sch.getServiceComponentName());
+      if (!sc.isClientComponent())
+        assertEquals(State.STARTED, sch.getState());
+      else
+        assertEquals(State.INIT, sch.getState());
+    }
+    
+    
+  }
 }