You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by nc...@apache.org on 2013/04/22 15:44:55 UTC
svn commit: r1470505 - in /incubator/ambari/trunk: ./
ambari-server/src/main/java/org/apache/ambari/server/agent/
ambari-server/src/main/java/org/apache/ambari/server/state/
ambari-server/src/test/java/org/apache/ambari/server/agent/
Author: ncole
Date: Mon Apr 22 13:44:54 2013
New Revision: 1470505
URL: http://svn.apache.org/r1470505
Log:
AMBARI-1976. When host expires, mark each component for host with unknown state
Modified:
incubator/ambari/trunk/CHANGES.txt
incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartbeatMonitor.java
incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/state/State.java
incubator/ambari/trunk/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatMonitor.java
Modified: incubator/ambari/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/incubator/ambari/trunk/CHANGES.txt?rev=1470505&r1=1470504&r2=1470505&view=diff
==============================================================================
--- incubator/ambari/trunk/CHANGES.txt (original)
+++ incubator/ambari/trunk/CHANGES.txt Mon Apr 22 13:44:54 2013
@@ -1219,6 +1219,8 @@ Trunk (unreleased changes):
AMBARI-1657. User directories on HDFS do not get created with custom names
provided from Ambari UI. (swagle)
+ AMBARI-1976. When host expires, update each component for host with unknown state. (ncole)
+
AMBARI-1980. Fix for nagios_alerts element when there is an error. (ncole)
AMBARI-1865. Fix for upgrade script to copy configurations. (ncole)
Modified: incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
URL: http://svn.apache.org/viewvc/incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java?rev=1470505&r1=1470504&r2=1470505&view=diff
==============================================================================
--- incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java (original)
+++ incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java Mon Apr 22 13:44:54 2013
@@ -264,7 +264,8 @@ public class HeartBeatHandler {
if (prevState.equals(State.INSTALLED)
|| prevState.equals(State.STARTED)
|| prevState.equals(State.STARTING)
- || prevState.equals(State.STOPPING)) {
+ || prevState.equals(State.STOPPING)
+ || prevState.equals(State.UNKNOWN)) {
scHost.setState(liveState);
if (!prevState.equals(liveState)) {
LOG.info("State of service component " + componentName
Modified: incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartbeatMonitor.java
URL: http://svn.apache.org/viewvc/incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartbeatMonitor.java?rev=1470505&r1=1470504&r2=1470505&view=diff
==============================================================================
--- incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartbeatMonitor.java (original)
+++ incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartbeatMonitor.java Mon Apr 22 13:44:54 2013
@@ -29,7 +29,10 @@ import org.apache.ambari.server.state.Cl
import org.apache.ambari.server.state.Config;
import org.apache.ambari.server.state.Host;
import org.apache.ambari.server.state.HostState;
+import org.apache.ambari.server.state.Service;
+import org.apache.ambari.server.state.ServiceComponent;
import org.apache.ambari.server.state.ServiceComponentHost;
+import org.apache.ambari.server.state.State;
import org.apache.ambari.server.state.fsm.InvalidStateTransitionException;
import org.apache.ambari.server.state.host.HostHeartbeatLostEvent;
import org.apache.ambari.server.state.svccomphost.HBaseMasterPortScanner;
@@ -118,7 +121,21 @@ public class HeartbeatMonitor implements
LOG.warn("Hearbeat lost from host "+host);
//Heartbeat is expired
hostObj.handleEvent(new HostHeartbeatLostEvent(host));
+
+ // mark all components that are not clients with unknown status
+ for (Cluster cluster : fsm.getClustersForHost(hostObj.getHostName())) {
+ for (ServiceComponentHost sch : cluster.getServiceComponentHosts(hostObj.getHostName())) {
+ Service s = cluster.getService(sch.getServiceName());
+ ServiceComponent sc = s.getServiceComponent(sch.getServiceComponentName());
+ if (!sc.isClientComponent()) {
+ sch.setState(State.UNKNOWN);
+ }
+ }
+ }
+
+ // hbase
if(hostState != hostObj.getState() && scanner != null) scanner.updateHBaseMaster(hostObj);
+
//Purge action queue
actionQueue.dequeueAll(host);
//notify action manager
@@ -178,7 +195,7 @@ public class HeartbeatMonitor implements
statusCmd.setClusterName(cl.getClusterName());
statusCmd.setServiceName(serviceName);
statusCmd.setComponentName(sch.getServiceComponentName());
- statusCmd.setConfigurations(configurations);
+ statusCmd.setConfigurations(configurations);
cmds.add(statusCmd);
}
}
Modified: incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/state/State.java
URL: http://svn.apache.org/viewvc/incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/state/State.java?rev=1470505&r1=1470504&r2=1470505&view=diff
==============================================================================
--- incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/state/State.java (original)
+++ incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/state/State.java Mon Apr 22 13:44:54 2013
@@ -66,7 +66,11 @@ public enum State {
/**
* Disabled master's backup state
*/
- MAINTENANCE(11);
+ MAINTENANCE(11),
+ /**
+ * State could not be determined.
+ */
+ UNKNOWN(12);
private final int state;
Modified: incubator/ambari/trunk/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatMonitor.java
URL: http://svn.apache.org/viewvc/incubator/ambari/trunk/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatMonitor.java?rev=1470505&r1=1470504&r2=1470505&view=diff
==============================================================================
--- incubator/ambari/trunk/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatMonitor.java (original)
+++ incubator/ambari/trunk/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatMonitor.java Mon Apr 22 13:44:54 2013
@@ -17,8 +17,18 @@
*/
package org.apache.ambari.server.agent;
-import static org.junit.Assert.*;
-import static org.mockito.Mockito.*;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+import static org.mockito.Matchers.eq;
+import static org.mockito.Mockito.atLeast;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.verify;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
import org.apache.ambari.server.AmbariException;
import org.apache.ambari.server.Role;
@@ -28,25 +38,24 @@ import org.apache.ambari.server.orm.Guic
import org.apache.ambari.server.orm.InMemoryDefaultTestModule;
import org.apache.ambari.server.state.Cluster;
import org.apache.ambari.server.state.Clusters;
+import org.apache.ambari.server.state.Host;
import org.apache.ambari.server.state.HostState;
import org.apache.ambari.server.state.Service;
+import org.apache.ambari.server.state.ServiceComponent;
+import org.apache.ambari.server.state.ServiceComponentHost;
import org.apache.ambari.server.state.StackId;
-import org.apache.ambari.server.state.cluster.ClustersImpl;
+import org.apache.ambari.server.state.State;
import org.apache.ambari.server.state.fsm.InvalidStateTransitionException;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
-
-import com.google.inject.Guice;
-import com.google.inject.Injector;
-import com.google.inject.persist.PersistService;
import org.mockito.ArgumentCaptor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
+import com.google.inject.Guice;
+import com.google.inject.Injector;
+import com.google.inject.persist.PersistService;
public class TestHeartbeatMonitor {
@@ -244,4 +253,125 @@ public class TestHeartbeatMonitor {
}
assertEquals(fsm.getHost(hostname).getState(), HostState.HEARTBEAT_LOST);
}
+
+ @Test
+ public void testHeartbeatLossWithComponent() throws AmbariException, InterruptedException,
+ InvalidStateTransitionException {
+
+ Clusters clusters = injector.getInstance(Clusters.class);
+ clusters.addHost(hostname1);
+ clusters.getHost(hostname1).setOsType("centos5");
+ clusters.getHost(hostname1).persist();
+ clusters.addCluster(clusterName);
+ Cluster cluster = clusters.getCluster(clusterName);
+ cluster.setDesiredStackVersion(new StackId("HDP-0.1"));
+
+ Set<String> hostNames = new HashSet<String>(){{
+ add(hostname1);
+ }};
+
+ clusters.mapHostsToCluster(hostNames, clusterName);
+
+ Service hdfs = cluster.addService(serviceName);
+ hdfs.persist();
+ hdfs.addServiceComponent(Role.DATANODE.name()).persist();
+ hdfs.getServiceComponent(Role.DATANODE.name()).addServiceComponentHost(hostname1).persist();
+ hdfs.addServiceComponent(Role.NAMENODE.name()).persist();
+ hdfs.getServiceComponent(Role.NAMENODE.name()).addServiceComponentHost(hostname1).persist();
+ hdfs.addServiceComponent(Role.SECONDARY_NAMENODE.name()).persist();
+ hdfs.getServiceComponent(Role.SECONDARY_NAMENODE.name()).addServiceComponentHost(hostname1).persist();
+ hdfs.addServiceComponent(Role.HDFS_CLIENT.name()).persist();
+ hdfs.getServiceComponent(Role.HDFS_CLIENT.name()).addServiceComponentHost(hostname1);
+
+ ActionQueue aq = new ActionQueue();
+ ActionManager am = mock(ActionManager.class);
+ HeartbeatMonitor hm = new HeartbeatMonitor(clusters, aq, am, 10);
+ HeartBeatHandler handler = new HeartBeatHandler(clusters, aq, am, injector);
+
+ Register reg = new Register();
+ reg.setHostname(hostname1);
+ reg.setResponseId(12);
+ reg.setTimestamp(System.currentTimeMillis() - 300);
+ reg.setAgentVersion(ambariMetaInfo.getServerVersion());
+
+ HostInfo hi = new HostInfo();
+ hi.setOS("Centos5");
+ reg.setHardwareProfile(hi);
+ handler.handleRegistration(reg);
+
+ HeartBeat hb = new HeartBeat();
+ hb.setHostname(hostname1);
+ hb.setNodeStatus(new HostStatus(HostStatus.Status.HEALTHY, "cool"));
+ hb.setTimestamp(System.currentTimeMillis());
+ hb.setResponseId(12);
+ handler.handleHeartBeat(hb);
+ hm.start();
+ aq.enqueue(hostname1, new ExecutionCommand());
+ //Heartbeat will expire and action queue will be flushed
+ while (aq.size(hostname1) != 0) {
+ Thread.sleep(1);
+ }
+
+ cluster = clusters.getClustersForHost(hostname1).iterator().next();
+ for (ServiceComponentHost sch : cluster.getServiceComponentHosts(hostname1)) {
+ Service s = cluster.getService(sch.getServiceName());
+ ServiceComponent sc = s.getServiceComponent(sch.getServiceComponentName());
+ if (!sc.isClientComponent())
+ assertEquals(State.UNKNOWN, sch.getState());
+ else
+ assertEquals(State.INIT, sch.getState());
+ }
+
+ // don't keep marking the host as down
+ hm.shutdown();
+
+ // try to flip statuses back
+ hb = new HeartBeat();
+ hb.setHostname(hostname1);
+ hb.setNodeStatus(new HostStatus(HostStatus.Status.HEALTHY, "cool"));
+ hb.setTimestamp(System.currentTimeMillis());
+ hb.setResponseId(0);
+
+ List<ComponentStatus> statuses = new ArrayList<ComponentStatus>();
+ ComponentStatus cs = new ComponentStatus();
+ cs.setClusterName(clusterName);
+ cs.setServiceName(Service.Type.HDFS.name());
+ cs.setStatus(State.STARTED.name());
+ cs.setComponentName(Role.DATANODE.name());
+ statuses.add(cs);
+
+ cs = new ComponentStatus();
+ cs.setClusterName(clusterName);
+ cs.setServiceName(Service.Type.HDFS.name());
+ cs.setStatus(State.STARTED.name());
+ cs.setComponentName(Role.NAMENODE.name());
+ statuses.add(cs);
+
+ cs = new ComponentStatus();
+ cs.setClusterName(clusterName);
+ cs.setServiceName(Service.Type.HDFS.name());
+ cs.setStatus(State.STARTED.name());
+ cs.setComponentName(Role.SECONDARY_NAMENODE.name());
+ statuses.add(cs);
+
+ hb.setComponentStatus(statuses);
+
+ Host host = clusters.getHost(hostname1);
+ host.setState(HostState.HEALTHY);
+ hb.setTimestamp(System.currentTimeMillis() + Integer.MAX_VALUE);
+ host.setLastHeartbeatTime(System.currentTimeMillis() + Integer.MAX_VALUE);
+ handler.handleHeartBeat(hb);
+
+ cluster = clusters.getClustersForHost(hostname1).iterator().next();
+ for (ServiceComponentHost sch : cluster.getServiceComponentHosts(hostname1)) {
+ Service s = cluster.getService(sch.getServiceName());
+ ServiceComponent sc = s.getServiceComponent(sch.getServiceComponentName());
+ if (!sc.isClientComponent())
+ assertEquals(State.STARTED, sch.getState());
+ else
+ assertEquals(State.INIT, sch.getState());
+ }
+
+
+ }
}