You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by jo...@apache.org on 2015/08/31 16:47:13 UTC

ambari git commit: AMBARI-12924 - Upgrade Orchestration To Skip Unhealthy Hosts (jonathanhurley)

Repository: ambari
Updated Branches:
  refs/heads/trunk f0109402d -> 016cbb6e9


AMBARI-12924 - Upgrade Orchestration To Skip Unhealthy Hosts (jonathanhurley)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/016cbb6e
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/016cbb6e
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/016cbb6e

Branch: refs/heads/trunk
Commit: 016cbb6e9f4fbcaa4a1387a9bb474086c6b0f7f8
Parents: f010940
Author: Jonathan Hurley <jh...@hortonworks.com>
Authored: Fri Aug 28 15:19:58 2015 -0400
Committer: Jonathan Hurley <jh...@hortonworks.com>
Committed: Mon Aug 31 10:47:06 2015 -0400

----------------------------------------------------------------------
 .../apache/ambari/server/stack/HostsType.java   |  7 +++
 .../ambari/server/stack/MasterHostResolver.java | 59 ++++++++++++--------
 .../state/stack/upgrade/ClusterGrouping.java    | 12 +++-
 .../ambari/server/state/UpgradeHelperTest.java  | 44 +++++++++++++++
 4 files changed, 96 insertions(+), 26 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/016cbb6e/ambari-server/src/main/java/org/apache/ambari/server/stack/HostsType.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/stack/HostsType.java b/ambari-server/src/main/java/org/apache/ambari/server/stack/HostsType.java
index 55313d5..9c953f2 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/stack/HostsType.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/stack/HostsType.java
@@ -49,6 +49,13 @@ public class HostsType {
    */
   public LinkedHashSet<String> hosts = new LinkedHashSet<String>();
 
+  /**
+   * Unhealthy hosts are those which are explicitely put into maintenance mode.
+   * If there is a host which is not heartbeating (or is generally unhealthy)
+   * but not in maintenance mode, then the prerequisite upgrade checks will let
+   * the administrator know that it must be put into maintenance mode before an
+   * upgrade can begin.
+   */
   public List<ServiceComponentHost> unhealthy = new ArrayList<ServiceComponentHost>();
 
 }

http://git-wip-us.apache.org/repos/asf/ambari/blob/016cbb6e/ambari-server/src/main/java/org/apache/ambari/server/stack/MasterHostResolver.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/stack/MasterHostResolver.java b/ambari-server/src/main/java/org/apache/ambari/server/stack/MasterHostResolver.java
index ef75d38..62613ff 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/stack/MasterHostResolver.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/stack/MasterHostResolver.java
@@ -31,7 +31,8 @@ import java.util.Set;
 import org.apache.ambari.server.AmbariException;
 import org.apache.ambari.server.state.Cluster;
 import org.apache.ambari.server.state.ConfigHelper;
-import org.apache.ambari.server.state.HostState;
+import org.apache.ambari.server.state.Host;
+import org.apache.ambari.server.state.MaintenanceState;
 import org.apache.ambari.server.state.ServiceComponent;
 import org.apache.ambari.server.state.ServiceComponentHost;
 import org.apache.ambari.server.utils.HTTPUtils;
@@ -134,7 +135,7 @@ public class MasterHostResolver {
         case HDFS:
           if (componentName.equalsIgnoreCase("NAMENODE")) {
             if (componentHosts.size() != 2) {
-              return filterSameVersion(hostsType, serviceName, componentName);
+              return filterHosts(hostsType, serviceName, componentName);
             }
 
             Map<Status, String> pair = getNameNodePair();
@@ -163,43 +164,55 @@ public class MasterHostResolver {
       LOG.error("Unable to get master and hosts for Component " + componentName + ". Error: " + err.getMessage(), err);
     }
 
-    hostsType = filterSameVersion(hostsType, serviceName, componentName);
+    hostsType = filterHosts(hostsType, serviceName, componentName);
 
     return hostsType;
   }
 
   /**
-   * Compares the versions of a HostComponent to the version for the resolver.
-   * If version is unspecified for the object, the {@link HostsType} object is
-   * returned without change.
+   * Filters the supplied list of hosts in the following ways:
+   * <ul>
+   * <li>Compares the versions of a HostComponent to the version for the
+   * resolver. Only versions that do not match are retained.</li>
+   * <li>Removes unhealthy hosts in maintenance mode from the list of healthy
+   * hosts</li>
+   * </ul>
    *
-   * @param hostsType the hosts to resolve
-   * @param service   the service name
-   * @param component the component name
-   * @return the modified hosts instance with filtered and unhealthy hosts filled
+   * @param hostsType
+   *          the hosts to resolve
+   * @param service
+   *          the service name
+   * @param component
+   *          the component name
+   * @return the modified hosts instance with filtered and unhealthy hosts
+   *         filled
    */
-  private HostsType filterSameVersion(HostsType hostsType, String service, String component) {
-
+  private HostsType filterHosts(HostsType hostsType, String service, String component) {
     try {
       org.apache.ambari.server.state.Service svc = m_cluster.getService(service);
       ServiceComponent sc = svc.getServiceComponent(component);
 
       // !!! not really a fan of passing these around
-      List<ServiceComponentHost> unhealthy = new ArrayList<ServiceComponentHost>();
-      LinkedHashSet<String> toUpgrade = new LinkedHashSet<String>();
-
-      for (String host : hostsType.hosts) {
-        ServiceComponentHost sch = sc.getServiceComponentHost(host);
-
-        if (HostState.HEALTHY != sch.getHostState() && !sc.isMasterComponent()) {
-          unhealthy.add(sch);
+      List<ServiceComponentHost> unhealthyHosts = new ArrayList<ServiceComponentHost>();
+      LinkedHashSet<String> upgradeHosts = new LinkedHashSet<String>();
+
+      for (String hostName : hostsType.hosts) {
+        ServiceComponentHost sch = sc.getServiceComponentHost(hostName);
+        Host host = sch.getHost();
+        MaintenanceState maintenanceState = host.getMaintenanceState(sch.getClusterId());
+
+        // !!! FIXME: only rely on maintenance state once the upgrade endpoint
+        // is using the pre-req endpoint for determining if an upgrade is
+        // possible
+        if (maintenanceState != MaintenanceState.OFF && !sc.isMasterComponent()) {
+          unhealthyHosts.add(sch);
         } else if (null == m_version || null == sch.getVersion() || !sch.getVersion().equals(m_version)) {
-          toUpgrade.add(host);
+          upgradeHosts.add(hostName);
         }
       }
 
-      hostsType.unhealthy = unhealthy;
-      hostsType.hosts = toUpgrade;
+      hostsType.unhealthy = unhealthyHosts;
+      hostsType.hosts = upgradeHosts;
 
       return hostsType;
     } catch (AmbariException e) {

http://git-wip-us.apache.org/repos/asf/ambari/blob/016cbb6e/ambari-server/src/main/java/org/apache/ambari/server/state/stack/upgrade/ClusterGrouping.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/state/stack/upgrade/ClusterGrouping.java b/ambari-server/src/main/java/org/apache/ambari/server/state/stack/upgrade/ClusterGrouping.java
index ad84210..cf58511 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/state/stack/upgrade/ClusterGrouping.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/state/stack/upgrade/ClusterGrouping.java
@@ -35,7 +35,9 @@ import javax.xml.bind.annotation.XmlTransient;
 import javax.xml.bind.annotation.XmlType;
 
 import org.apache.ambari.server.stack.HostsType;
+import org.apache.ambari.server.state.Cluster;
 import org.apache.ambari.server.state.Host;
+import org.apache.ambari.server.state.MaintenanceState;
 import org.apache.ambari.server.state.UpgradeContext;
 import org.apache.ambari.server.state.stack.UpgradePack.ProcessingComponent;
 
@@ -224,11 +226,15 @@ public class ClusterGrouping extends Grouping {
             new TaskWrapper(service, component, realHosts, et));
       }
     } else if (null == service && null == component) {
-      // no service, no component goes to all hosts
-
+      // no service and no component will distributed the task to all healthy
+      // hosts not in maintenance mode
+      Cluster cluster = ctx.getCluster();
       Set<String> hostNames = new HashSet<String>();
       for (Host host : ctx.getCluster().getHosts()) {
-        hostNames.add(host.getHostName());
+        MaintenanceState maintenanceState = host.getMaintenanceState(cluster.getClusterId());
+        if (maintenanceState == MaintenanceState.OFF) {
+          hostNames.add(host.getHostName());
+        }
       }
 
       return new StageWrapper(

http://git-wip-us.apache.org/repos/asf/ambari/blob/016cbb6e/ambari-server/src/test/java/org/apache/ambari/server/state/UpgradeHelperTest.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/java/org/apache/ambari/server/state/UpgradeHelperTest.java b/ambari-server/src/test/java/org/apache/ambari/server/state/UpgradeHelperTest.java
index 6267f53..7077f4c 100644
--- a/ambari-server/src/test/java/org/apache/ambari/server/state/UpgradeHelperTest.java
+++ b/ambari-server/src/test/java/org/apache/ambari/server/state/UpgradeHelperTest.java
@@ -194,6 +194,50 @@ public class UpgradeHelperTest {
   }
 
   /**
+   * Tests that hosts in MM are not included in the upgrade.
+   *
+   * @throws Exception
+   */
+  @Test
+  public void testUpgradeOrchestrationWithHostsInMM() throws Exception {
+    Map<String, UpgradePack> upgrades = ambariMetaInfo.getUpgradePacks("foo", "bar");
+    assertTrue(upgrades.isEmpty());
+
+    upgrades = ambariMetaInfo.getUpgradePacks("HDP", "2.1.1");
+
+    ServiceInfo si = ambariMetaInfo.getService("HDP", "2.1.1", "ZOOKEEPER");
+    si.setDisplayName("Zk");
+
+    ComponentInfo ci = si.getComponentByName("ZOOKEEPER_SERVER");
+    ci.setDisplayName("ZooKeeper1 Server2");
+
+    assertTrue(upgrades.containsKey("upgrade_test"));
+    UpgradePack upgrade = upgrades.get("upgrade_test");
+    assertNotNull(upgrade);
+
+    // turn on MM for the first host
+    Cluster cluster = makeCluster();
+    Host hostInMaintenanceMode = cluster.getHosts().iterator().next();
+    hostInMaintenanceMode.setMaintenanceState(cluster.getClusterId(), MaintenanceState.ON);
+
+    // use a "real" master host resolver here so that we can actually test MM
+    MasterHostResolver masterHostResolver = new MasterHostResolver(null, cluster, "");
+
+    UpgradeContext context = new UpgradeContext(masterHostResolver, HDP_21, HDP_21,
+        UPGRADE_VERSION, Direction.UPGRADE);
+
+    List<UpgradeGroupHolder> groups = m_upgradeHelper.createSequence(upgrade, context);
+    assertEquals(6, groups.size());
+
+    for (UpgradeGroupHolder group : groups) {
+      for (StageWrapper stageWrapper : group.items) {
+        Set<String> hosts = stageWrapper.getHosts();
+        assertFalse(hosts.contains(hostInMaintenanceMode.getHostName()));
+      }
+    }
+  }
+
+  /**
    * Verify that a Rolling Upgrades restarts the NameNodes in the following order: standby, active.
    * @throws Exception
    */