You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by am...@apache.org on 2019/06/07 13:11:54 UTC

[ambari] branch branch-2.7 updated: [AMBARI-25244] : Rack based parallel restart for Rolling Upgrade (Backport to branch-2.7) (#2997)

This is an automated email from the ASF dual-hosted git repository.

amagyar pushed a commit to branch branch-2.7
in repository https://gitbox.apache.org/repos/asf/ambari.git


The following commit(s) were added to refs/heads/branch-2.7 by this push:
     new 146ede0  [AMBARI-25244] : Rack based parallel restart for Rolling Upgrade (Backport to branch-2.7) (#2997)
146ede0 is described below

commit 146ede00d696c3f9afc2e203845d327e9a8057f6
Author: virajjasani <34...@users.noreply.github.com>
AuthorDate: Fri Jun 7 18:41:48 2019 +0530

    [AMBARI-25244] : Rack based parallel restart for Rolling Upgrade (Backport to branch-2.7) (#2997)
    
    * [AMBARI-25244] : Rack based parallel restart for Rolling Upgrade
    
    * minor change
    
    * minor change
    
    * minor changes
    
    * minor changes
---
 ambari-server/pom.xml                              |   5 +
 ambari-server/src/examples/rack_hosts.yaml         | 116 +++++++++++++++++++++
 .../server/state/stack/upgrade/Grouping.java       |  98 ++++++++++++++++-
 3 files changed, 215 insertions(+), 4 deletions(-)

diff --git a/ambari-server/pom.xml b/ambari-server/pom.xml
index f70abe5..6f36cbc 100644
--- a/ambari-server/pom.xml
+++ b/ambari-server/pom.xml
@@ -1634,6 +1634,11 @@
       <version>1.10.1</version>
     </dependency>
     <dependency>
+      <groupId>com.esotericsoftware.yamlbeans</groupId>
+      <artifactId>yamlbeans</artifactId>
+      <version>1.13</version>
+    </dependency>
+    <dependency>
       <groupId>org.apache.ambari</groupId>
       <artifactId>ambari-metrics-common</artifactId>
       <version>${project.version}</version>
diff --git a/ambari-server/src/examples/rack_hosts.yaml b/ambari-server/src/examples/rack_hosts.yaml
new file mode 100644
index 0000000..ef7a4a3
--- /dev/null
+++ b/ambari-server/src/examples/rack_hosts.yaml
@@ -0,0 +1,116 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+racks:
+  racka-1:
+    hostGroups:
+    - hosts:
+      - cluster12b-slave1-237.abc.xyz.com
+      - cluster12b-slave1-274.abc.xyz.com
+      - cluster12b-slave1-278.abc.xyz.com
+      - cluster12b-slave1-282.abc.xyz.com
+      - cluster12b-slave1-363.abc.xyz.com
+      - cluster12b-slave1-378.abc.xyz.com
+      - cluster12b-slave1-380.abc.xyz.com
+      - cluster12b-slave1-430.abc.xyz.com
+    - hosts:
+      - cluster12a-slave5-1.abc.xyz.com
+      - cluster12a-slave5-2.abc.xyz.com
+      - cluster12a-slave5-3.abc.xyz.com
+      - cluster12a-slave5-4.abc.xyz.com
+      - cluster12a-master5-1.abc.xyz.com
+      - cluster12a-master5-2.abc.xyz.com
+      - cluster12a-master5-3.abc.xyz.com
+      - cluster12b-slave1-141.abc.xyz.com
+      - cluster12b-slave1-163.abc.xyz.com
+      - cluster12b-slave1-176.abc.xyz.com
+      - cluster12b-slave1-5.abc.xyz.com
+      - cluster12b-slave1-72.abc.xyz.com
+      - cluster12b-master2-2.abc.xyz.com
+  rackb-22:
+    hosts:
+    - cluster12a-slave39-1.abc.xyz.com
+    - cluster12a-slave39-2.abc.xyz.com
+    - cluster12a-slave39-3.abc.xyz.com
+    - cluster12a-slave39-4.abc.xyz.com
+    - cluster12a-slave39-5.abc.xyz.com
+    - cluster12a-slave39-6.abc.xyz.com
+    - cluster12b-slave1-162.abc.xyz.com
+    - cluster12b-slave1-242.abc.xyz.com
+    - cluster12b-slave1-336.abc.xyz.com
+    - cluster12b-slave1-360.abc.xyz.com
+    - cluster12b-slave1-376.abc.xyz.com
+    - cluster12b-master1-2.abc.xyz.com
+  rackc-3:
+    hostGroups:
+    - hosts:
+      - cluster12b-slave1-339.abc.xyz.com
+    - hosts:
+      - cluster12a-slave19-1.abc.xyz.com
+      - cluster12a-slave19-2.abc.xyz.com
+      - cluster12a-slave19-3.abc.xyz.com
+      - cluster12a-slave19-4.abc.xyz.com
+      - cluster12b-slave1-120.abc.xyz.com
+      - cluster12b-slave1-165.abc.xyz.com
+      - cluster12b-slave1-232.abc.xyz.com
+      - cluster12b-slave1-281.abc.xyz.com
+      - cluster12b-slave1-29.abc.xyz.com
+      - cluster12b-slave1-314.abc.xyz.com
+      - cluster12b-slave1-328.abc.xyz.com
+      - cluster12b-slave1-334.abc.xyz.com
+      - cluster12b-slave1-36.abc.xyz.com
+  rackd-11:
+    hosts:
+    - cluster12a-slave50-1.abc.xyz.com
+    - cluster12a-slave50-2.abc.xyz.com
+    - cluster12a-slave50-3.abc.xyz.com
+    - cluster12a-slave50-4.abc.xyz.com
+  racke-122:
+    hosts:
+    - cluster12a-slave57-1.abc.xyz.com
+    - cluster12a-slave57-2.abc.xyz.com
+    - cluster12a-slave57-3.abc.xyz.com
+    - cluster12b-slave1-171.abc.xyz.com
+    - cluster12b-slave1-178.abc.xyz.com
+    - cluster12b-slave1-213.abc.xyz.com
+    - cluster12b-slave1-269.abc.xyz.com
+    - cluster12b-slave1-28.abc.xyz.com
+    - cluster12b-slave1-293.abc.xyz.com
+    - cluster12b-slave1-298.abc.xyz.com
+    - cluster12b-slave1-423.abc.xyz.com
+    - cluster12b-slave1-437.abc.xyz.com
+    - cluster12b-slave1-56.abc.xyz.com
+  racka-98:
+    hostGroups:
+    - hosts:
+      - cluster12b-slave1-356.abc.xyz.com
+      - cluster12b-slave1-459.abc.xyz.com
+      - cluster12b-slave1-460.abc.xyz.com
+    - hosts:
+      - cluster12a-slave43-1.abc.xyz.com
+      - cluster12a-slave43-2.abc.xyz.com
+      - cluster12b-slave1-1.abc.xyz.com
+      - cluster12b-slave1-11.abc.xyz.com
+      - cluster12b-slave1-160.abc.xyz.com
+      - cluster12b-slave1-173.abc.xyz.com
+      - cluster12b-slave1-229.abc.xyz.com
+      - cluster12b-slave1-249.abc.xyz.com
+      - cluster12b-slave1-38.abc.xyz.com
+      - cluster12b-slave1-59.abc.xyz.com
+      - cluster12b-slave1-62.abc.xyz.com
+      - cluster12b-slave1-76.abc.xyz.com
+      - cluster12b-slave1-78.abc.xyz.com
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/state/stack/upgrade/Grouping.java b/ambari-server/src/main/java/org/apache/ambari/server/state/stack/upgrade/Grouping.java
index 5f667ba..666894c 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/state/stack/upgrade/Grouping.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/state/stack/upgrade/Grouping.java
@@ -17,6 +17,8 @@
  */
 package org.apache.ambari.server.state.stack.upgrade;
 
+import java.io.FileReader;
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
@@ -40,6 +42,7 @@ import org.apache.ambari.server.state.stack.UpgradePack.ProcessingComponent;
 import org.apache.ambari.server.utils.SetUtils;
 import org.apache.commons.lang.StringUtils;
 
+import com.esotericsoftware.yamlbeans.YamlReader;
 import com.google.common.base.Objects;
 
 /**
@@ -50,6 +53,10 @@ import com.google.common.base.Objects;
     StartGrouping.class, StopGrouping.class, HostOrderGrouping.class })
 public class Grouping {
 
+  private static final String RACKS_YAML_KEY_NAME = "racks";
+  private static final String HOSTS_YAML_KEY_NAME = "hosts";
+  private static final String HOST_GROUPS_YAML_KEY_NAME = "hostGroups";
+
   @XmlAttribute(name="name")
   public String name;
 
@@ -215,7 +222,8 @@ public class Grouping {
      * @param pc Processing Component
      * @param params Params to add to the stage.
      */
-    private void addTasksToStageInBatches(List<TaskWrapper> tasks, String verb, UpgradeContext ctx, String service, ProcessingComponent pc, Map<String, String> params) {
+    private void addTasksToStageInBatches(List<TaskWrapper> tasks, String verb, UpgradeContext ctx, String service,
+                                          ProcessingComponent pc, Map<String, String> params) {
       if (tasks == null || tasks.isEmpty() || tasks.get(0).getTasks() == null || tasks.get(0).getTasks().isEmpty()) {
         return;
       }
@@ -225,7 +233,7 @@ public class Grouping {
 
       // Expand some of the TaskWrappers into multiple based on the batch size.
       for (TaskWrapper tw : tasks) {
-        List<Set<String>> hostSets;
+        List<Set<String>> hostSets = null;
         if (m_grouping.parallelScheduler != null) {
           int taskParallelism = m_grouping.parallelScheduler.maxDegreeOfParallelism;
           String maxDegreeFromClusterEnv = ctx.getResolver()
@@ -236,11 +244,28 @@ public class Grouping {
           if (taskParallelism == Integer.MAX_VALUE) {
             taskParallelism = ctx.getDefaultMaxDegreeOfParallelism();
           }
-          hostSets = SetUtils.split(tw.getHosts(), taskParallelism);
+          final String rackYamlFile =
+                  ctx.getResolver().getValueFromDesiredConfigurations(ConfigHelper.CLUSTER_ENV, "rack_yaml_file_path");
+          if (StringUtils.isNotEmpty(rackYamlFile)) {
+            // If rack to hosts mapping yaml file path is present in cluster-env property: rack_yaml_file_path,
+            // host sets will be formed based on rack i.e. based on parallel value, hosts present on same rack will
+            // be part of the same batch. This is useful when we want to avoid possibility of single rack failure
+            Map<String, Set<String>> hostsByRack = organizeHostsByRack(tw.getHosts(), rackYamlFile);
+            List<Set<String>> hostSetsForRack;
+            for (String rack : hostsByRack.keySet()) {
+              hostSetsForRack = SetUtils.split(hostsByRack.get(rack), taskParallelism);
+              if (hostSets == null) {
+                hostSets = hostSetsForRack;
+              } else {
+                hostSets.addAll(hostSetsForRack);
+              }
+            }
+          } else {
+            hostSets = SetUtils.split(tw.getHosts(), taskParallelism);
+          }
         } else {
           hostSets = SetUtils.split(tw.getHosts(), 1);
         }
-
         int numBatchesNeeded = hostSets.size();
         int batchNum = 0;
         for (Set<String> hostSubset : hostSets) {
@@ -259,6 +284,71 @@ public class Grouping {
     }
 
     /**
+     * Utility method to organize and return Rack to Hosts mapping for given rack yaml file
+     *
+     * @param hosts        : All hosts that are part of current group
+     * @param rackYamlFile : file path for yaml containing rack to hosts mapping
+     *                        e.g ambari-server/src/examples/rack_hosts.yaml
+     * @return
+     */
+    private Map<String, Set<String>> organizeHostsByRack(Set<String> hosts, String rackYamlFile) {
+      try {
+        Map<String, String> hostToRackMap = getHostToRackMap(rackYamlFile);
+        Map<String, Set<String>> rackToHostsMap = new HashMap<>();
+        for (String host : hosts) {
+          if (hostToRackMap.containsKey(host)) {
+            String rack = hostToRackMap.get(host);
+            if (!rackToHostsMap.containsKey(rack)) {
+              rackToHostsMap.put(rack, new HashSet<>());
+            }
+            rackToHostsMap.get(rack).add(host);
+          } else {
+            throw new RuntimeException(String.format("Rack mapping is not present for host name: %s", host));
+          }
+        }
+        return rackToHostsMap;
+      } catch (Exception e) {
+        throw new RuntimeException(
+                String.format("Failed to generate Rack to Hosts mapping. filePath: %s", rackYamlFile), e);
+      }
+    }
+
+    private static Map<String, String> getHostToRackMap(String rackYamlFile)
+            throws IOException {
+      YamlReader yamlReader = new YamlReader(new FileReader(rackYamlFile));
+      Map rackHostsMap;
+      try {
+        rackHostsMap = (Map) yamlReader.read();
+      } finally {
+        yamlReader.close();
+      }
+      Map racks = (Map) rackHostsMap.get(RACKS_YAML_KEY_NAME);
+      Map<String, String> hostToRackMap = new HashMap<>();
+      for (Map.Entry entry : (Set<Map.Entry>) racks.entrySet()) {
+        Map rackInfoMap = (Map) entry.getValue();
+        String rackName = (String) entry.getKey();
+        if (rackInfoMap.containsKey(HOSTS_YAML_KEY_NAME)) {
+          List<String> hostList = (List<String>) rackInfoMap.get(HOSTS_YAML_KEY_NAME);
+          for (String host : hostList) {
+            hostToRackMap.put(host, rackName);
+          }
+        }
+        if (rackInfoMap.containsKey(HOST_GROUPS_YAML_KEY_NAME)) {
+          List<Map> hostGroups = (List<Map>) rackInfoMap.get(HOST_GROUPS_YAML_KEY_NAME);
+          for (Map hostGroup : hostGroups) {
+            if (hostGroup.containsKey(HOSTS_YAML_KEY_NAME)) {
+              List<String> hostList = (List<String>) hostGroup.get(HOSTS_YAML_KEY_NAME);
+              for (String host : hostList) {
+                hostToRackMap.put(host, rackName);
+              }
+            }
+          }
+        }
+      }
+      return hostToRackMap;
+    }
+
+    /**
      * Determine if service checks need to be ran after the stages.
      * @param upgradeContext the upgrade context
      * @return Return the stages, which may potentially be followed by service checks.