You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by jo...@apache.org on 2015/08/27 22:36:27 UTC

[1/2] ambari git commit: AMBARI-12889 - Distribute Repository For Upgrade With Unhealthy Hosts (jonathanhurley)

Repository: ambari
Updated Branches:
  refs/heads/trunk a71c52838 -> 59dd207c3


AMBARI-12889 - Distribute Repository For Upgrade With Unhealthy Hosts (jonathanhurley)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/3b5efe48
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/3b5efe48
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/3b5efe48

Branch: refs/heads/trunk
Commit: 3b5efe48706f34ad5f89290d45ae16980237d5ce
Parents: a71c528
Author: Jonathan Hurley <jh...@hortonworks.com>
Authored: Wed Aug 26 17:10:45 2015 -0400
Committer: Jonathan Hurley <jh...@hortonworks.com>
Committed: Thu Aug 27 15:22:57 2015 -0400

----------------------------------------------------------------------
 .../ClusterStackVersionResourceProvider.java    | 74 ++++++++++++++------
 .../org/apache/ambari/server/state/Cluster.java | 24 +++++--
 .../server/state/cluster/ClusterImpl.java       | 42 +++++------
 ...ClusterStackVersionResourceProviderTest.java | 10 ++-
 .../server/state/cluster/ClusterTest.java       | 71 +++++++++++++++++--
 5 files changed, 164 insertions(+), 57 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/3b5efe48/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/ClusterStackVersionResourceProvider.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/ClusterStackVersionResourceProvider.java b/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/ClusterStackVersionResourceProvider.java
index 2f3e959..a942c93 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/ClusterStackVersionResourceProvider.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/ClusterStackVersionResourceProvider.java
@@ -20,6 +20,7 @@ package org.apache.ambari.server.controller.internal;
 import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.JDK_LOCATION;
 
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -69,8 +70,10 @@ import org.apache.ambari.server.orm.entities.RepositoryVersionEntity;
 import org.apache.ambari.server.orm.entities.StackEntity;
 import org.apache.ambari.server.serveraction.upgrades.FinalizeUpgradeAction;
 import org.apache.ambari.server.state.Cluster;
+import org.apache.ambari.server.state.Clusters;
 import org.apache.ambari.server.state.ComponentInfo;
 import org.apache.ambari.server.state.Host;
+import org.apache.ambari.server.state.MaintenanceState;
 import org.apache.ambari.server.state.RepositoryVersionState;
 import org.apache.ambari.server.state.ServiceComponentHost;
 import org.apache.ambari.server.state.ServiceInfo;
@@ -300,17 +303,19 @@ public class ClusterStackVersionResourceProvider extends AbstractControllerResou
     desiredRepoVersion = (String) propertyMap.get(CLUSTER_STACK_VERSION_REPOSITORY_VERSION_PROPERTY_ID);
 
     Cluster cluster;
-    Map<String, Host> hostsForCluster;
-
     AmbariManagementController managementController = getManagementController();
     AmbariMetaInfo ami = managementController.getAmbariMetaInfo();
+
     try {
-      cluster = managementController.getClusters().getCluster(clName);
-      hostsForCluster = managementController.getClusters().getHostsForCluster(clName);
+      Clusters clusters = managementController.getClusters();
+      cluster = clusters.getCluster(clName);
     } catch (AmbariException e) {
       throw new NoSuchParentResourceException(e.getMessage(), e);
     }
 
+    // get all of the host eligible for stack distribution
+    List<Host> hosts = getHostsForStackDistribution(cluster);
+
     final StackId stackId;
     if (propertyMap.containsKey(CLUSTER_STACK_VERSION_STACK_PROPERTY_ID) &&
             propertyMap.containsKey(CLUSTER_STACK_VERSION_VERSION_PROPERTY_ID)) {
@@ -350,13 +355,13 @@ public class ClusterStackVersionResourceProvider extends AbstractControllerResou
 
     RequestStageContainer req = createRequest();
 
-    Iterator<Host> hostsForClusterIter = hostsForCluster.values().iterator();
+    Iterator<Host> hostIterator = hosts.iterator();
     Map<String, String> hostLevelParams = new HashMap<String, String>();
     hostLevelParams.put(JDK_LOCATION, getManagementController().getJdkResourceUrl());
     String hostParamsJson = StageUtils.getGson().toJson(hostLevelParams);
 
     int maxTasks = configuration.getAgentPackageParallelCommandsLimit();
-    int hostCount = hostsForCluster.size();
+    int hostCount = hosts.size();
     int batchCount = (int) (Math.ceil((double)hostCount / maxTasks));
 
     ArrayList<Host> directTransitions = new ArrayList<Host>();
@@ -394,8 +399,8 @@ public class ClusterStackVersionResourceProvider extends AbstractControllerResou
       stages.add(stage);
 
       // Populate with commands for host
-      for (int i = 0; i < maxTasks && hostsForClusterIter.hasNext(); i++) {
-        Host host = hostsForClusterIter.next();
+      for (int i = 0; i < maxTasks && hostIterator.hasNext(); i++) {
+        Host host = hostIterator.next();
         if (hostHasVersionableComponents(cluster, ami, stackId, host)) {
           addHostVersionInstallCommandsToStage(desiredRepoVersion,
                   cluster, managementController, ami, stackId, perOsRepos, stage, host);
@@ -405,19 +410,21 @@ public class ClusterStackVersionResourceProvider extends AbstractControllerResou
 
       }
     }
+
     req.addStages(stages);
 
     try {
-      ClusterVersionEntity existingCSVer = clusterVersionDAO.findByClusterAndStackAndVersion(
+      ClusterVersionEntity clusterVersionEntity = clusterVersionDAO.findByClusterAndStackAndVersion(
           clName, stackId, desiredRepoVersion);
 
-      if (existingCSVer == null) {
+      if (clusterVersionEntity == null) {
         try {
           // Create/persist new cluster stack version
           cluster.createClusterVersion(stackId,
               desiredRepoVersion, managementController.getAuthName(),
               RepositoryVersionState.INSTALLING);
-          existingCSVer = clusterVersionDAO.findByClusterAndStackAndVersion(
+
+          clusterVersionEntity = clusterVersionDAO.findByClusterAndStackAndVersion(
               clName, stackId, desiredRepoVersion);
         } catch (AmbariException e) {
           throw new SystemException(
@@ -432,12 +439,13 @@ public class ClusterStackVersionResourceProvider extends AbstractControllerResou
       }
 
       // Will also initialize all Host Versions in an INSTALLING state.
-      cluster.inferHostVersions(existingCSVer);
+      cluster.transitionHostsToInstalling(clusterVersionEntity);
 
       // Directly transition host versions to INSTALLED for hosts that don't have
       // versionable components
       for(Host host : directTransitions) {
-        transitionHostVersionToInstalled(host, cluster, existingCSVer.getRepositoryVersion().getVersion());
+        transitionHostVersionToInstalled(host, cluster,
+            clusterVersionEntity.getRepositoryVersion().getVersion());
       }
 
       req.persist();
@@ -449,11 +457,9 @@ public class ClusterStackVersionResourceProvider extends AbstractControllerResou
   }
 
   private void addHostVersionInstallCommandsToStage(final String desiredRepoVersion,
-                                                    Cluster cluster, AmbariManagementController managementController,
-                                                    AmbariMetaInfo ami,
-                                                    final StackId stackId,
-                                                    Map<String, List<RepositoryEntity>> perOsRepos,
-                                                    Stage stage, Host host) throws SystemException {
+      Cluster cluster, AmbariManagementController managementController, AmbariMetaInfo ami,
+      final StackId stackId, Map<String, List<RepositoryEntity>> perOsRepos, Stage stage, Host host)
+          throws SystemException {
     // Determine repositories for host
     final List<RepositoryEntity> repoInfo = perOsRepos.get(host.getOsFamily());
     if (repoInfo == null) {
@@ -461,6 +467,7 @@ public class ClusterStackVersionResourceProvider extends AbstractControllerResou
                       "not defined. Repo version=%s, stackId=%s",
               host.getOsFamily(), desiredRepoVersion, stackId));
     }
+
     // determine packages for all services that are installed on host
     List<ServiceOsSpecific.Package> packages = new ArrayList<ServiceOsSpecific.Package>();
     Set<String> servicesOnHost = new HashSet<String>();
@@ -486,6 +493,7 @@ public class ClusterStackVersionResourceProvider extends AbstractControllerResou
         }
       }
     }
+
     final String packageList = gson.toJson(packages);
     final String repoList = gson.toJson(repoInfo);
 
@@ -517,8 +525,8 @@ public class ClusterStackVersionResourceProvider extends AbstractControllerResou
    * Returns true if there is at least one versionable component on host for a given
    * stack.
    */
-  private boolean hostHasVersionableComponents(Cluster cluster, AmbariMetaInfo ami,
-                                               StackId stackId, Host host) throws SystemException {
+  private boolean hostHasVersionableComponents(Cluster cluster, AmbariMetaInfo ami, StackId stackId,
+      Host host) throws SystemException {
     List<ServiceComponentHost> components = cluster.getServiceComponentHosts(host.getHostName());
     for (ServiceComponentHost component : components) {
       ComponentInfo componentInfo;
@@ -543,8 +551,7 @@ public class ClusterStackVersionResourceProvider extends AbstractControllerResou
    *  Transitioning host version to INSTALLED state manually would not be the
    *  best idea since some additional logic may be bound to event listeners.
    */
-  private void transitionHostVersionToInstalled(Host host, Cluster cluster,
-                                                String version) {
+  private void transitionHostVersionToInstalled(Host host, Cluster cluster, String version) {
     LOG.info(String.format("Transitioning version %s on host %s directly to installed" +
                     " without distributing bits to host since it has no versionable components.",
             version, host.getHostName()));
@@ -691,4 +698,27 @@ public class ClusterStackVersionResourceProvider extends AbstractControllerResou
   protected Set<String> getPKPropertyIds() {
     return pkPropertyIds;
   }
+
+  /**
+   * Gets all of the hosts in a cluster which are not in "maintenance mode" and
+   * are considered to be healthy. In the case of stack distribution, a host
+   * must be explicitely marked as being in maintenance mode for it to be
+   * considered as unhealthy.
+   *
+   * @param cluster
+   *          the cluster (not {@code null}).
+   * @return the list of hosts that are not in maintenance mode and are
+   *         elidgable to have a stack distributed to them.
+   */
+  private List<Host> getHostsForStackDistribution(Cluster cluster) {
+    Collection<Host> hosts = cluster.getHosts();
+    List<Host> healthyHosts = new ArrayList<>(hosts.size());
+    for (Host host : hosts) {
+      if (host.getMaintenanceState(cluster.getClusterId()) == MaintenanceState.OFF) {
+        healthyHosts.add(host);
+      }
+    }
+
+    return healthyHosts;
+  }
 }

http://git-wip-us.apache.org/repos/asf/ambari/blob/3b5efe48/ambari-server/src/main/java/org/apache/ambari/server/state/Cluster.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/state/Cluster.java b/ambari-server/src/main/java/org/apache/ambari/server/state/Cluster.java
index ad481f3..5209dfb 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/state/Cluster.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/state/Cluster.java
@@ -180,15 +180,25 @@ public interface Cluster {
       RepositoryVersionState desiredState) throws AmbariException;
 
   /**
-   * Create/update host versions for all of the hosts within a cluster based on state of cluster stack version.
-   * The difference of this method compared to {@link Cluster#mapHostVersions}
-   * is that it affects all hosts (not only missing hosts). Also, current method contains some additional logics to allow only INSTALLING
-   * state for hosts.
-   * @param sourceClusterVersion cluster version to be queried for a stack name/version info and desired RepositoryVersionState. The only valid state
-   * of a cluster version is {@link RepositoryVersionState#INSTALLING}
+   * Creates or updates host versions for all of the hosts within a cluster
+   * based on state of cluster stack version. This is used to transition all
+   * hosts into the {@link RepositoryVersionState#INSTALLING} state.
+   * <p/>
+   * The difference between this method compared to
+   * {@link Cluster#mapHostVersions} is that it affects all hosts (not only
+   * missing hosts).
+   * <p/>
+   * Hosts that are in maintenance mode will not be included. These hosts have
+   * been explicitely marked as being in maintenance andd are not included in
+   * this operation.
+   *
+   * @param sourceClusterVersion
+   *          cluster version to be queried for a stack name/version info and
+   *          desired RepositoryVersionState. The only valid state of a cluster
+   *          version is {@link RepositoryVersionState#INSTALLING}
    * @throws AmbariException
    */
-  void inferHostVersions(ClusterVersionEntity sourceClusterVersion)
+  void transitionHostsToInstalling(ClusterVersionEntity sourceClusterVersion)
       throws AmbariException;
 
   /**

http://git-wip-us.apache.org/repos/asf/ambari/blob/3b5efe48/ambari-server/src/main/java/org/apache/ambari/server/state/cluster/ClusterImpl.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/state/cluster/ClusterImpl.java b/ambari-server/src/main/java/org/apache/ambari/server/state/cluster/ClusterImpl.java
index 86f5f32..4fe24e9 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/state/cluster/ClusterImpl.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/state/cluster/ClusterImpl.java
@@ -1133,27 +1133,18 @@ public class ClusterImpl implements Cluster {
   }
 
   /**
-   * Because this is a top-down approach, it should only be called for the purposes of bootstrapping data, such as
-   * installing a brand new cluster (through Blueprints).
-   * @param sourceClusterVersion cluster version to be queried for a stack name/version info and desired RepositoryVersionState.
-   * The only valid state of this cluster version is {@link RepositoryVersionState#INSTALLING}
-   * @throws AmbariException
+   * {@inheritDoc}
    */
   @Override
-  public void inferHostVersions(ClusterVersionEntity sourceClusterVersion) throws AmbariException {
+  public void transitionHostsToInstalling(ClusterVersionEntity sourceClusterVersion) throws AmbariException {
     if (sourceClusterVersion == null) {
       throw new AmbariException("Could not find current stack version of cluster " + getClusterName());
     }
 
-    RepositoryVersionState desiredState = sourceClusterVersion.getState();
-
-    @SuppressWarnings("serial")
-    Set<RepositoryVersionState> validStates = new HashSet<RepositoryVersionState>(){{
-      add(RepositoryVersionState.INSTALLING);
-    }};
-
-    if (!validStates.contains(desiredState)) {
-      throw new AmbariException("The state must be one of " + validStates);
+    if (RepositoryVersionState.INSTALLING != sourceClusterVersion.getState()) {
+      throw new AmbariException("Unable to transition cluster hosts into "
+          + RepositoryVersionState.INSTALLING
+          + ". The only valid state is INSTALLING");
     }
 
     Map<String, Host> hosts = clusters.getHostsForCluster(getClusterName());
@@ -1169,17 +1160,26 @@ public class ClusterImpl implements Cluster {
           getClusterName(), repoVersionStackId,
           sourceClusterVersion.getRepositoryVersion().getVersion());
 
-      if (existingHostVersionEntities != null) {
-        for (HostVersionEntity entity : existingHostVersionEntities) {
-          existingHostsWithClusterStackAndVersion.add(entity.getHostName());
-          existingHostStackVersions.put(entity.getHostName(), entity);
-        }
+      // for each host that already has a stack and version, keep track of them
+      for (HostVersionEntity entity : existingHostVersionEntities) {
+        String hostName = entity.getHostName();
+        existingHostsWithClusterStackAndVersion.add(hostName);
+        existingHostStackVersions.put(hostName, entity);
       }
 
+      // find any hosts that do not have the stack/repo version already
       Sets.SetView<String> hostsMissingRepoVersion = Sets.difference(
           hosts.keySet(), existingHostsWithClusterStackAndVersion);
 
       for (String hostname : hosts.keySet()) {
+        // if the host is in maintenance mode, that's an explicit marker which
+        // indicates that it should not be transitioned to INSTALLING; these
+        // hosts are excluded from being transitioned into INSTALLING
+        Host host = hosts.get(hostname);
+        if (host.getMaintenanceState(getClusterId()) != MaintenanceState.OFF) {
+          continue;
+        }
+
         if (hostsMissingRepoVersion.contains(hostname)) {
           // Create new host stack version
           HostEntity hostEntity = hostDAO.findByName(hostname);
@@ -1190,7 +1190,7 @@ public class ClusterImpl implements Cluster {
         } else {
           // Update existing host stack version
           HostVersionEntity hostVersionEntity = existingHostStackVersions.get(hostname);
-          hostVersionEntity.setState(desiredState);
+          hostVersionEntity.setState(RepositoryVersionState.INSTALLING);
           hostVersionDAO.merge(hostVersionEntity);
         }
       }

http://git-wip-us.apache.org/repos/asf/ambari/blob/3b5efe48/ambari-server/src/test/java/org/apache/ambari/server/controller/internal/ClusterStackVersionResourceProviderTest.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/java/org/apache/ambari/server/controller/internal/ClusterStackVersionResourceProviderTest.java b/ambari-server/src/test/java/org/apache/ambari/server/controller/internal/ClusterStackVersionResourceProviderTest.java
index 5b24edc..1819ef9 100644
--- a/ambari-server/src/test/java/org/apache/ambari/server/controller/internal/ClusterStackVersionResourceProviderTest.java
+++ b/ambari-server/src/test/java/org/apache/ambari/server/controller/internal/ClusterStackVersionResourceProviderTest.java
@@ -78,6 +78,7 @@ import org.apache.ambari.server.state.Cluster;
 import org.apache.ambari.server.state.Clusters;
 import org.apache.ambari.server.state.ConfigHelper;
 import org.apache.ambari.server.state.Host;
+import org.apache.ambari.server.state.MaintenanceState;
 import org.apache.ambari.server.state.RepositoryVersionState;
 import org.apache.ambari.server.state.ServiceComponentHost;
 import org.apache.ambari.server.state.ServiceInfo;
@@ -181,6 +182,9 @@ public class ClusterStackVersionResourceProviderTest {
       Host host = createNiceMock(hostname, Host.class);
       expect(host.getHostName()).andReturn(hostname).anyTimes();
       expect(host.getOsFamily()).andReturn("redhat6").anyTimes();
+      expect(host.getMaintenanceState(EasyMock.anyLong())).andReturn(
+          MaintenanceState.OFF).anyTimes();
+
       replay(host);
       hostsForCluster.put(hostname, host);
     }
@@ -238,10 +242,12 @@ public class ClusterStackVersionResourceProviderTest {
             eq(managementController))).andReturn(csvResourceProvider).anyTimes();
 
     expect(clusters.getCluster(anyObject(String.class))).andReturn(cluster);
-    expect(clusters.getHostsForCluster(anyObject(String.class))).andReturn(hostsForCluster);
+    expect(clusters.getHostsForCluster(anyObject(String.class))).andReturn(
+        hostsForCluster).anyTimes();
 
     String clusterName = "Cluster100";
-    //expect(cluster.getClusterName()).andReturn(clusterName).anyTimes();
+    expect(cluster.getClusterId()).andReturn(1L).anyTimes();
+    expect(cluster.getHosts()).andReturn(hostsForCluster.values()).atLeastOnce();
     expect(cluster.getCurrentStackVersion()).andReturn(stackId);
     expect(cluster.getServiceComponentHosts(anyObject(String.class))).andAnswer(new IAnswer<List<ServiceComponentHost>>() {
       @Override

http://git-wip-us.apache.org/repos/asf/ambari/blob/3b5efe48/ambari-server/src/test/java/org/apache/ambari/server/state/cluster/ClusterTest.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/java/org/apache/ambari/server/state/cluster/ClusterTest.java b/ambari-server/src/test/java/org/apache/ambari/server/state/cluster/ClusterTest.java
index f047b33..edd6267 100644
--- a/ambari-server/src/test/java/org/apache/ambari/server/state/cluster/ClusterTest.java
+++ b/ambari-server/src/test/java/org/apache/ambari/server/state/cluster/ClusterTest.java
@@ -42,8 +42,6 @@ import java.util.Set;
 import javax.persistence.EntityManager;
 import javax.persistence.RollbackException;
 
-import junit.framework.Assert;
-
 import org.apache.ambari.server.AmbariException;
 import org.apache.ambari.server.agent.AgentEnv;
 import org.apache.ambari.server.agent.AgentEnv.Directory;
@@ -88,6 +86,7 @@ import org.apache.ambari.server.state.DesiredConfig;
 import org.apache.ambari.server.state.Host;
 import org.apache.ambari.server.state.HostHealthStatus;
 import org.apache.ambari.server.state.HostState;
+import org.apache.ambari.server.state.MaintenanceState;
 import org.apache.ambari.server.state.RepositoryVersionState;
 import org.apache.ambari.server.state.Service;
 import org.apache.ambari.server.state.ServiceComponent;
@@ -117,6 +116,8 @@ import com.google.inject.persist.Transactional;
 import com.google.inject.persist.UnitOfWork;
 import com.google.inject.util.Modules;
 
+import junit.framework.Assert;
+
 public class ClusterTest {
 
   private Clusters clusters;
@@ -1423,14 +1424,24 @@ public class ClusterTest {
     assertNotNull(c1.getCurrentClusterVersion());
   }
 
+  /**
+   * Tests that hosts can be correctly transitioned into the "INSTALLING" state.
+   * This method also tests that hosts in MM will not be transitioned, as per
+   * the contract of
+   * {@link Cluster#transitionHostsToInstalling(ClusterVersionEntity)}.
+   *
+   * @throws Exception
+   */
   @Test
-  public void testInferHostVersions() throws Exception {
+  public void testTransitionHostVersions() throws Exception {
     createDefaultCluster();
 
     StackId stackId = new StackId("HDP", "0.2");
     helper.getOrCreateRepositoryVersion(stackId, stackId.getStackVersion());
+
     c1.createClusterVersion(stackId, "0.2", "admin",
         RepositoryVersionState.INSTALLING);
+
     ClusterVersionEntity entityHDP2 = null;
     for (ClusterVersionEntity entity : c1.getAllClusterVersions()) {
       StackEntity repoVersionStackEntity = entity.getRepositoryVersion().getStack();
@@ -1442,12 +1453,13 @@ public class ClusterTest {
         break;
       }
     }
+
     assertNotNull(entityHDP2);
 
     List<HostVersionEntity> hostVersionsH1Before = hostVersionDAO.findByClusterAndHost("c1", "h1");
     assertEquals(1, hostVersionsH1Before.size());
 
-    c1.inferHostVersions(entityHDP2);
+    c1.transitionHostsToInstalling(entityHDP2);
 
     List<HostVersionEntity> hostVersionsH1After = hostVersionDAO.findByClusterAndHost("c1", "h1");
     assertEquals(2, hostVersionsH1After.size());
@@ -1462,10 +1474,11 @@ public class ClusterTest {
         break;
       }
     }
+
     assertTrue(checked);
 
     // Test for update of existing host stack version
-    c1.inferHostVersions(entityHDP2);
+    c1.transitionHostsToInstalling(entityHDP2);
 
     hostVersionsH1After = hostVersionDAO.findByClusterAndHost("c1", "h1");
     assertEquals(2, hostVersionsH1After.size());
@@ -1480,7 +1493,55 @@ public class ClusterTest {
         break;
       }
     }
+
     assertTrue(checked);
+
+    // reset all to INSTALL_FAILED
+    List<HostVersionEntity> hostVersionEntities = hostVersionDAO.findAll();
+    for (HostVersionEntity hostVersionEntity : hostVersionEntities) {
+      hostVersionEntity.setState(RepositoryVersionState.INSTALL_FAILED);
+      hostVersionDAO.merge(hostVersionEntity);
+    }
+
+    // verify they have been transition to INSTALL_FAILED
+    hostVersionEntities = hostVersionDAO.findAll();
+    for (HostVersionEntity hostVersionEntity : hostVersionEntities) {
+      assertEquals(RepositoryVersionState.INSTALL_FAILED, hostVersionEntity.getState());
+    }
+
+    // put 1 host in maintenance mode
+    Collection<Host> hosts = c1.getHosts();
+    Iterator<Host> iterator = hosts.iterator();
+    Host hostInMaintenanceMode = iterator.next();
+    Host hostNotInMaintenanceMode = iterator.next();
+    hostInMaintenanceMode.setMaintenanceState(c1.getClusterId(), MaintenanceState.ON);
+
+    // transition host versions to INSTALLING
+    c1.transitionHostsToInstalling(entityHDP2);
+
+    List<HostVersionEntity> hostInMaintModeVersions = hostVersionDAO.findByClusterAndHost("c1",
+        hostInMaintenanceMode.getHostName());
+
+    List<HostVersionEntity> otherHostVersions = hostVersionDAO.findByClusterAndHost("c1",
+        hostNotInMaintenanceMode.getHostName());
+
+    // verify the MM host is in INSTALL_FAILED still
+    for (HostVersionEntity hostVersionEntity : hostInMaintModeVersions) {
+      StackEntity repoVersionStackEntity = hostVersionEntity.getRepositoryVersion().getStack();
+      if (repoVersionStackEntity.getStackName().equals("HDP")
+          && repoVersionStackEntity.getStackVersion().equals("0.2")) {
+        assertEquals(RepositoryVersionState.INSTALL_FAILED, hostVersionEntity.getState());
+      }
+    }
+
+    // verify the other host is in INSTALLING
+    for (HostVersionEntity hostVersionEntity : otherHostVersions) {
+      StackEntity repoVersionStackEntity = hostVersionEntity.getRepositoryVersion().getStack();
+      if (repoVersionStackEntity.getStackName().equals("HDP")
+          && repoVersionStackEntity.getStackVersion().equals("0.2")) {
+      assertEquals(RepositoryVersionState.INSTALLING, hostVersionEntity.getState());
+      }
+    }
   }
 
   @Test


[2/2] ambari git commit: AMBARI-12903 - Pre-Req Checks Should Only Warn On Unhealthy Slave Hosts Not In MM (jonathanhurley)

Posted by jo...@apache.org.
AMBARI-12903 - Pre-Req Checks Should Only Warn On Unhealthy Slave Hosts Not In MM (jonathanhurley)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/59dd207c
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/59dd207c
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/59dd207c

Branch: refs/heads/trunk
Commit: 59dd207c39968df161341568f7538b9e3a990de2
Parents: 3b5efe4
Author: Jonathan Hurley <jh...@hortonworks.com>
Authored: Thu Aug 27 12:04:24 2015 -0400
Committer: Jonathan Hurley <jh...@hortonworks.com>
Committed: Thu Aug 27 15:22:58 2015 -0400

----------------------------------------------------------------------
 .../ambari/server/checks/CheckDescription.java  |  4 +-
 .../server/checks/HostsHeartbeatCheck.java      | 58 +++++++++++++---
 .../checks/HostsRepositoryVersionCheck.java     | 73 +++++++++++---------
 .../server/checks/HostsHeartbeatCheckTest.java  | 27 ++++++--
 4 files changed, 111 insertions(+), 51 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/59dd207c/ambari-server/src/main/java/org/apache/ambari/server/checks/CheckDescription.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/checks/CheckDescription.java b/ambari-server/src/main/java/org/apache/ambari/server/checks/CheckDescription.java
index 5cfbb47..7151b0e 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/checks/CheckDescription.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/checks/CheckDescription.java
@@ -41,7 +41,9 @@ public enum CheckDescription {
       "All hosts must be heartbeating with the Ambari Server unless they are in Maintenance Mode",
       new HashMap<String, String>() {{
         put(AbstractCheckDescriptor.DEFAULT,
-            "The following hosts must be heartbeating to the Ambari Server: {{fails}}.");
+            "The following hosts must be heartbeating to the Ambari Server or be put into maintenance mode.");
+        put(HostsHeartbeatCheck.KEY_HOSTS_IN_MM_WARNING,
+            "The following hosts are in maintenance mode and will not be a part of the upgrade.");
       }}),
 
   HOSTS_MASTER_MAINTENANCE(PrereqCheckType.HOST,

http://git-wip-us.apache.org/repos/asf/ambari/blob/59dd207c/ambari-server/src/main/java/org/apache/ambari/server/checks/HostsHeartbeatCheck.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/checks/HostsHeartbeatCheck.java b/ambari-server/src/main/java/org/apache/ambari/server/checks/HostsHeartbeatCheck.java
index 6076a32..a8600c4 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/checks/HostsHeartbeatCheck.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/checks/HostsHeartbeatCheck.java
@@ -17,13 +17,13 @@
  */
 package org.apache.ambari.server.checks;
 
-import java.util.Map;
+import java.util.Collection;
 
 import org.apache.ambari.server.AmbariException;
 import org.apache.ambari.server.controller.PrereqCheckRequest;
 import org.apache.ambari.server.state.Cluster;
 import org.apache.ambari.server.state.Host;
-import org.apache.ambari.server.state.HostHealthStatus;
+import org.apache.ambari.server.state.HostHealthStatus.HealthStatus;
 import org.apache.ambari.server.state.MaintenanceState;
 import org.apache.ambari.server.state.stack.PrereqCheckStatus;
 import org.apache.ambari.server.state.stack.PrerequisiteCheck;
@@ -31,12 +31,23 @@ import org.apache.ambari.server.state.stack.PrerequisiteCheck;
 import com.google.inject.Singleton;
 
 /**
- * Checks that all hosts are either in maintenance mode or heartbeating with the server.
+ * Checks that all hosts are heartbeating with the Ambari Server. If there is a
+ * host which is not heartbeating, then it must be in maintenance mode to
+ * prevent a failure of this check.
+ * <p/>
+ * Hosts that are in maintenance mode will be added to a warning that they will
+ * not be included in the upgrade.
+ * <p/>
+ * This check will return {@link PrereqCheckStatus#FAIL} if there are hosts not
+ * heartbeating and not in maintenance mode. Otherwise, it will return
+ * {@link PrereqCheckStatus#WARNING} for any hosts in maintenance mode.
  */
 @Singleton
 @UpgradeCheck(group = UpgradeCheckGroup.LIVELINESS, order = 1.0f)
 public class HostsHeartbeatCheck extends AbstractCheckDescriptor {
 
+  static final String KEY_HOSTS_IN_MM_WARNING = "key.hosts.in.mm.warning";
+
   /**
    * Constructor.
    */
@@ -45,19 +56,48 @@ public class HostsHeartbeatCheck extends AbstractCheckDescriptor {
   }
 
   @Override
-  public void perform(PrerequisiteCheck prerequisiteCheck, PrereqCheckRequest request) throws AmbariException {
+  public void perform(PrerequisiteCheck prerequisiteCheck, PrereqCheckRequest request)
+      throws AmbariException {
     final String clusterName = request.getClusterName();
     final Cluster cluster = clustersProvider.get().getCluster(clusterName);
-    final Map<String, Host> clusterHosts = clustersProvider.get().getHostsForCluster(clusterName);
-    for (Map.Entry<String, Host> hostEntry : clusterHosts.entrySet()) {
-      final Host host = hostEntry.getValue();
-      if (host.getHealthStatus().getHealthStatus() == HostHealthStatus.HealthStatus.UNKNOWN && host.getMaintenanceState(cluster.getClusterId()) == MaintenanceState.OFF) {
-        prerequisiteCheck.getFailedOn().add(host.getHostName());
+    Collection<Host> hosts = cluster.getHosts();
+
+    for (Host host : hosts) {
+      HealthStatus hostHealth = host.getHealthStatus().getHealthStatus();
+      MaintenanceState maintenanceState = host.getMaintenanceState(cluster.getClusterId());
+      switch (hostHealth) {
+        case UNHEALTHY:
+        case UNKNOWN:
+          if (maintenanceState == MaintenanceState.OFF) {
+            prerequisiteCheck.getFailedOn().add(host.getHostName());
+          }
+          break;
+        default:
+          break;
+
       }
     }
+
+    // for any hosts unhealthy and NOT in MM mode, fail this check
     if (!prerequisiteCheck.getFailedOn().isEmpty()) {
       prerequisiteCheck.setStatus(PrereqCheckStatus.FAIL);
       prerequisiteCheck.setFailReason(getFailReason(prerequisiteCheck, request));
+      return;
+    }
+
+    // no failues so far, check to see if any hosts are in MM so that this check
+    // will produce a warning
+    for (Host host : hosts) {
+      MaintenanceState maintenanceState = host.getMaintenanceState(cluster.getClusterId());
+      if (maintenanceState != MaintenanceState.OFF) {
+        prerequisiteCheck.getFailedOn().add(host.getHostName());
+      }
+    }
+
+    if (!prerequisiteCheck.getFailedOn().isEmpty()) {
+      prerequisiteCheck.setStatus(PrereqCheckStatus.WARNING);
+      prerequisiteCheck.setFailReason(
+          getFailReason(KEY_HOSTS_IN_MM_WARNING, prerequisiteCheck, request));
     }
   }
 }

http://git-wip-us.apache.org/repos/asf/ambari/blob/59dd207c/ambari-server/src/main/java/org/apache/ambari/server/checks/HostsRepositoryVersionCheck.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/checks/HostsRepositoryVersionCheck.java b/ambari-server/src/main/java/org/apache/ambari/server/checks/HostsRepositoryVersionCheck.java
index 6ebf8e1..eaa0096 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/checks/HostsRepositoryVersionCheck.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/checks/HostsRepositoryVersionCheck.java
@@ -35,7 +35,10 @@ import org.apache.ambari.server.state.stack.PrerequisiteCheck;
 import com.google.inject.Singleton;
 
 /**
- * Checks that all hosts have particular repository version.
+ * Checks that all hosts have particular repository version. Hosts that are in
+ * maintenance mode will be skipped and will not report a warning. Even if they
+ * do not have the repo version, they will not be included in the upgrade
+ * orchstration, so no warning is required.
  */
 @Singleton
 @UpgradeCheck(group = UpgradeCheckGroup.REPOSITORY_VERSION)
@@ -60,52 +63,54 @@ public class HostsRepositoryVersionCheck extends AbstractCheckDescriptor {
   }
 
   @Override
-  public void perform(PrerequisiteCheck prerequisiteCheck, PrereqCheckRequest request) throws AmbariException {
+  public void perform(PrerequisiteCheck prerequisiteCheck, PrereqCheckRequest request)
+      throws AmbariException {
     final String clusterName = request.getClusterName();
     final Cluster cluster = clustersProvider.get().getCluster(clusterName);
     final Map<String, Host> clusterHosts = clustersProvider.get().getHostsForCluster(clusterName);
     final StackId stackId = cluster.getDesiredStackVersion();
 
     for (Host host : clusterHosts.values()) {
-      if (host.getMaintenanceState(cluster.getClusterId()) == MaintenanceState.OFF) {
-
-        if (null != request.getRepositoryVersion()) {
-          boolean found = false;
-          for (HostVersionEntity hve : hostVersionDaoProvider.get().findByHost(host.getHostName())) {
+      // hosts in MM will produce a warning if they do not have the repo version
+      MaintenanceState maintenanceState = host.getMaintenanceState(cluster.getClusterId());
+      if (maintenanceState != MaintenanceState.OFF) {
+        continue;
+      }
 
-            if (hve.getRepositoryVersion().getVersion().equals(request.getRepositoryVersion()) &&
-                hve.getState() == RepositoryVersionState.INSTALLED) {
-                found = true;
-                break;
-            }
-          }
+      if (null != request.getRepositoryVersion()) {
+        boolean found = false;
+        for (HostVersionEntity hve : hostVersionDaoProvider.get().findByHost(host.getHostName())) {
 
-          if (!found) {
-            prerequisiteCheck.getFailedOn().add(host.getHostName());
-          }
-        } else {
-          final RepositoryVersionEntity repositoryVersion = repositoryVersionDaoProvider.get().findByStackAndVersion(
-              stackId, request.getRepositoryVersion());
-          if (repositoryVersion == null) {
-            prerequisiteCheck.setStatus(PrereqCheckStatus.FAIL);
-            prerequisiteCheck.setFailReason(getFailReason(KEY_NO_REPO_VERSION, prerequisiteCheck, request));
-            prerequisiteCheck.getFailedOn().addAll(clusterHosts.keySet());
-            return;
+          if (hve.getRepositoryVersion().getVersion().equals(request.getRepositoryVersion())
+              && hve.getState() == RepositoryVersionState.INSTALLED) {
+            found = true;
+            break;
           }
+        }
 
-          StackEntity repositoryStackEntity = repositoryVersion.getStack();
-          StackId repositoryStackId = new StackId(
-              repositoryStackEntity.getStackName(),
-              repositoryStackEntity.getStackVersion());
+        if (!found) {
+          prerequisiteCheck.getFailedOn().add(host.getHostName());
+        }
+      } else {
+        final RepositoryVersionEntity repositoryVersion = repositoryVersionDaoProvider.get().findByStackAndVersion(
+            stackId, request.getRepositoryVersion());
+        if (repositoryVersion == null) {
+          prerequisiteCheck.setStatus(PrereqCheckStatus.FAIL);
+          prerequisiteCheck.setFailReason(
+              getFailReason(KEY_NO_REPO_VERSION, prerequisiteCheck, request));
+          prerequisiteCheck.getFailedOn().addAll(clusterHosts.keySet());
+          return;
+        }
 
-          final HostVersionEntity hostVersion = hostVersionDaoProvider.get().findByClusterStackVersionAndHost(
-              clusterName, repositoryStackId, repositoryVersion.getVersion(),
-              host.getHostName());
+        StackEntity repositoryStackEntity = repositoryVersion.getStack();
+        StackId repositoryStackId = new StackId(repositoryStackEntity.getStackName(),
+            repositoryStackEntity.getStackVersion());
 
-          if (hostVersion == null || hostVersion.getState() != RepositoryVersionState.INSTALLED) {
-            prerequisiteCheck.getFailedOn().add(host.getHostName());
-          }
+        final HostVersionEntity hostVersion = hostVersionDaoProvider.get().findByClusterStackVersionAndHost(
+            clusterName, repositoryStackId, repositoryVersion.getVersion(), host.getHostName());
 
+        if (hostVersion == null || hostVersion.getState() != RepositoryVersionState.INSTALLED) {
+          prerequisiteCheck.getFailedOn().add(host.getHostName());
         }
       }
     }

http://git-wip-us.apache.org/repos/asf/ambari/blob/59dd207c/ambari-server/src/test/java/org/apache/ambari/server/checks/HostsHeartbeatCheckTest.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/java/org/apache/ambari/server/checks/HostsHeartbeatCheckTest.java b/ambari-server/src/test/java/org/apache/ambari/server/checks/HostsHeartbeatCheckTest.java
index 5f6cae9..847027c 100644
--- a/ambari-server/src/test/java/org/apache/ambari/server/checks/HostsHeartbeatCheckTest.java
+++ b/ambari-server/src/test/java/org/apache/ambari/server/checks/HostsHeartbeatCheckTest.java
@@ -17,8 +17,8 @@
  */
 package org.apache.ambari.server.checks;
 
-import java.util.HashMap;
-import java.util.Map;
+import java.util.ArrayList;
+import java.util.List;
 
 import org.apache.ambari.server.configuration.Configuration;
 import org.apache.ambari.server.controller.PrereqCheckRequest;
@@ -74,7 +74,7 @@ public class HostsHeartbeatCheckTest {
     Mockito.when(cluster.getClusterId()).thenReturn(1L);
     Mockito.when(cluster.getCurrentStackVersion()).thenReturn(new StackId("HDP", "2.2"));
     Mockito.when(clusters.getCluster("cluster")).thenReturn(cluster);
-    final Map<String, Host> hosts = new HashMap<String, Host>();
+    final List<Host> hosts = new ArrayList<>();
     final Host host1 = Mockito.mock(Host.class);
     final Host host2 = Mockito.mock(Host.class);
     final Host host3 = Mockito.mock(Host.class);
@@ -90,17 +90,30 @@ public class HostsHeartbeatCheckTest {
     Mockito.when(status1.getHealthStatus()).thenReturn(HealthStatus.HEALTHY);
     Mockito.when(status2.getHealthStatus()).thenReturn(HealthStatus.HEALTHY);
     Mockito.when(status3.getHealthStatus()).thenReturn(HealthStatus.UNKNOWN);
-    hosts.put("host1", host1);
-    hosts.put("host2", host2);
-    hosts.put("host3", host3);
-    Mockito.when(clusters.getHostsForCluster("cluster")).thenReturn(hosts);
+    hosts.add(host1);
+    hosts.add(host2);
+    hosts.add(host3);
+    Mockito.when(cluster.getHosts()).thenReturn(hosts);
 
     PrerequisiteCheck check = new PrerequisiteCheck(null, null);
     hostHeartbeatCheck.perform(check, new PrereqCheckRequest("cluster"));
     Assert.assertEquals(PrereqCheckStatus.FAIL, check.getStatus());
 
+    // put the unhealthy host into MM to now produce a warning
+    check = new PrerequisiteCheck(null, null);
+    Mockito.when(host3.getMaintenanceState(1L)).thenReturn(MaintenanceState.ON);
+    hostHeartbeatCheck.perform(check, new PrereqCheckRequest("cluster"));
+    Assert.assertEquals(PrereqCheckStatus.WARNING, check.getStatus());
+
+    // make it's status healthy, but keep in MM to still produce a warning
+    check = new PrerequisiteCheck(null, null);
     Mockito.when(status3.getHealthStatus()).thenReturn(HealthStatus.HEALTHY);
+    hostHeartbeatCheck.perform(check, new PrereqCheckRequest("cluster"));
+    Assert.assertEquals(PrereqCheckStatus.WARNING, check.getStatus());
+
+    // take it out our MM to allow the check to pass
     check = new PrerequisiteCheck(null, null);
+    Mockito.when(host3.getMaintenanceState(1L)).thenReturn(MaintenanceState.OFF);
     hostHeartbeatCheck.perform(check, new PrereqCheckRequest("cluster"));
     Assert.assertEquals(PrereqCheckStatus.PASS, check.getStatus());