You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by am...@apache.org on 2019/06/07 13:11:54 UTC
[ambari] branch branch-2.7 updated: [AMBARI-25244] : Rack based
parallel restart for Rolling Upgrade (Backport to branch-2.7) (#2997)
This is an automated email from the ASF dual-hosted git repository.
amagyar pushed a commit to branch branch-2.7
in repository https://gitbox.apache.org/repos/asf/ambari.git
The following commit(s) were added to refs/heads/branch-2.7 by this push:
new 146ede0 [AMBARI-25244] : Rack based parallel restart for Rolling Upgrade (Backport to branch-2.7) (#2997)
146ede0 is described below
commit 146ede00d696c3f9afc2e203845d327e9a8057f6
Author: virajjasani <34...@users.noreply.github.com>
AuthorDate: Fri Jun 7 18:41:48 2019 +0530
[AMBARI-25244] : Rack based parallel restart for Rolling Upgrade (Backport to branch-2.7) (#2997)
* [AMBARI-25244] : Rack based parallel restart for Rolling Upgrade
* minor change
* minor change
* minor changes
* minor changes
---
ambari-server/pom.xml | 5 +
ambari-server/src/examples/rack_hosts.yaml | 116 +++++++++++++++++++++
.../server/state/stack/upgrade/Grouping.java | 98 ++++++++++++++++-
3 files changed, 215 insertions(+), 4 deletions(-)
diff --git a/ambari-server/pom.xml b/ambari-server/pom.xml
index f70abe5..6f36cbc 100644
--- a/ambari-server/pom.xml
+++ b/ambari-server/pom.xml
@@ -1634,6 +1634,11 @@
<version>1.10.1</version>
</dependency>
<dependency>
+ <groupId>com.esotericsoftware.yamlbeans</groupId>
+ <artifactId>yamlbeans</artifactId>
+ <version>1.13</version>
+ </dependency>
+ <dependency>
<groupId>org.apache.ambari</groupId>
<artifactId>ambari-metrics-common</artifactId>
<version>${project.version}</version>
diff --git a/ambari-server/src/examples/rack_hosts.yaml b/ambari-server/src/examples/rack_hosts.yaml
new file mode 100644
index 0000000..ef7a4a3
--- /dev/null
+++ b/ambari-server/src/examples/rack_hosts.yaml
@@ -0,0 +1,116 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+racks:
+ racka-1:
+ hostGroups:
+ - hosts:
+ - cluster12b-slave1-237.abc.xyz.com
+ - cluster12b-slave1-274.abc.xyz.com
+ - cluster12b-slave1-278.abc.xyz.com
+ - cluster12b-slave1-282.abc.xyz.com
+ - cluster12b-slave1-363.abc.xyz.com
+ - cluster12b-slave1-378.abc.xyz.com
+ - cluster12b-slave1-380.abc.xyz.com
+ - cluster12b-slave1-430.abc.xyz.com
+ - hosts:
+ - cluster12a-slave5-1.abc.xyz.com
+ - cluster12a-slave5-2.abc.xyz.com
+ - cluster12a-slave5-3.abc.xyz.com
+ - cluster12a-slave5-4.abc.xyz.com
+ - cluster12a-master5-1.abc.xyz.com
+ - cluster12a-master5-2.abc.xyz.com
+ - cluster12a-master5-3.abc.xyz.com
+ - cluster12b-slave1-141.abc.xyz.com
+ - cluster12b-slave1-163.abc.xyz.com
+ - cluster12b-slave1-176.abc.xyz.com
+ - cluster12b-slave1-5.abc.xyz.com
+ - cluster12b-slave1-72.abc.xyz.com
+ - cluster12b-master2-2.abc.xyz.com
+ rackb-22:
+ hosts:
+ - cluster12a-slave39-1.abc.xyz.com
+ - cluster12a-slave39-2.abc.xyz.com
+ - cluster12a-slave39-3.abc.xyz.com
+ - cluster12a-slave39-4.abc.xyz.com
+ - cluster12a-slave39-5.abc.xyz.com
+ - cluster12a-slave39-6.abc.xyz.com
+ - cluster12b-slave1-162.abc.xyz.com
+ - cluster12b-slave1-242.abc.xyz.com
+ - cluster12b-slave1-336.abc.xyz.com
+ - cluster12b-slave1-360.abc.xyz.com
+ - cluster12b-slave1-376.abc.xyz.com
+ - cluster12b-master1-2.abc.xyz.com
+ rackc-3:
+ hostGroups:
+ - hosts:
+ - cluster12b-slave1-339.abc.xyz.com
+ - hosts:
+ - cluster12a-slave19-1.abc.xyz.com
+ - cluster12a-slave19-2.abc.xyz.com
+ - cluster12a-slave19-3.abc.xyz.com
+ - cluster12a-slave19-4.abc.xyz.com
+ - cluster12b-slave1-120.abc.xyz.com
+ - cluster12b-slave1-165.abc.xyz.com
+ - cluster12b-slave1-232.abc.xyz.com
+ - cluster12b-slave1-281.abc.xyz.com
+ - cluster12b-slave1-29.abc.xyz.com
+ - cluster12b-slave1-314.abc.xyz.com
+ - cluster12b-slave1-328.abc.xyz.com
+ - cluster12b-slave1-334.abc.xyz.com
+ - cluster12b-slave1-36.abc.xyz.com
+ rackd-11:
+ hosts:
+ - cluster12a-slave50-1.abc.xyz.com
+ - cluster12a-slave50-2.abc.xyz.com
+ - cluster12a-slave50-3.abc.xyz.com
+ - cluster12a-slave50-4.abc.xyz.com
+ racke-122:
+ hosts:
+ - cluster12a-slave57-1.abc.xyz.com
+ - cluster12a-slave57-2.abc.xyz.com
+ - cluster12a-slave57-3.abc.xyz.com
+ - cluster12b-slave1-171.abc.xyz.com
+ - cluster12b-slave1-178.abc.xyz.com
+ - cluster12b-slave1-213.abc.xyz.com
+ - cluster12b-slave1-269.abc.xyz.com
+ - cluster12b-slave1-28.abc.xyz.com
+ - cluster12b-slave1-293.abc.xyz.com
+ - cluster12b-slave1-298.abc.xyz.com
+ - cluster12b-slave1-423.abc.xyz.com
+ - cluster12b-slave1-437.abc.xyz.com
+ - cluster12b-slave1-56.abc.xyz.com
+ racka-98:
+ hostGroups:
+ - hosts:
+ - cluster12b-slave1-356.abc.xyz.com
+ - cluster12b-slave1-459.abc.xyz.com
+ - cluster12b-slave1-460.abc.xyz.com
+ - hosts:
+ - cluster12a-slave43-1.abc.xyz.com
+ - cluster12a-slave43-2.abc.xyz.com
+ - cluster12b-slave1-1.abc.xyz.com
+ - cluster12b-slave1-11.abc.xyz.com
+ - cluster12b-slave1-160.abc.xyz.com
+ - cluster12b-slave1-173.abc.xyz.com
+ - cluster12b-slave1-229.abc.xyz.com
+ - cluster12b-slave1-249.abc.xyz.com
+ - cluster12b-slave1-38.abc.xyz.com
+ - cluster12b-slave1-59.abc.xyz.com
+ - cluster12b-slave1-62.abc.xyz.com
+ - cluster12b-slave1-76.abc.xyz.com
+ - cluster12b-slave1-78.abc.xyz.com
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/state/stack/upgrade/Grouping.java b/ambari-server/src/main/java/org/apache/ambari/server/state/stack/upgrade/Grouping.java
index 5f667ba..666894c 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/state/stack/upgrade/Grouping.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/state/stack/upgrade/Grouping.java
@@ -17,6 +17,8 @@
*/
package org.apache.ambari.server.state.stack.upgrade;
+import java.io.FileReader;
+import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
@@ -40,6 +42,7 @@ import org.apache.ambari.server.state.stack.UpgradePack.ProcessingComponent;
import org.apache.ambari.server.utils.SetUtils;
import org.apache.commons.lang.StringUtils;
+import com.esotericsoftware.yamlbeans.YamlReader;
import com.google.common.base.Objects;
/**
@@ -50,6 +53,10 @@ import com.google.common.base.Objects;
StartGrouping.class, StopGrouping.class, HostOrderGrouping.class })
public class Grouping {
+ private static final String RACKS_YAML_KEY_NAME = "racks";
+ private static final String HOSTS_YAML_KEY_NAME = "hosts";
+ private static final String HOST_GROUPS_YAML_KEY_NAME = "hostGroups";
+
@XmlAttribute(name="name")
public String name;
@@ -215,7 +222,8 @@ public class Grouping {
* @param pc Processing Component
* @param params Params to add to the stage.
*/
- private void addTasksToStageInBatches(List<TaskWrapper> tasks, String verb, UpgradeContext ctx, String service, ProcessingComponent pc, Map<String, String> params) {
+ private void addTasksToStageInBatches(List<TaskWrapper> tasks, String verb, UpgradeContext ctx, String service,
+ ProcessingComponent pc, Map<String, String> params) {
if (tasks == null || tasks.isEmpty() || tasks.get(0).getTasks() == null || tasks.get(0).getTasks().isEmpty()) {
return;
}
@@ -225,7 +233,7 @@ public class Grouping {
// Expand some of the TaskWrappers into multiple based on the batch size.
for (TaskWrapper tw : tasks) {
- List<Set<String>> hostSets;
+ List<Set<String>> hostSets = null;
if (m_grouping.parallelScheduler != null) {
int taskParallelism = m_grouping.parallelScheduler.maxDegreeOfParallelism;
String maxDegreeFromClusterEnv = ctx.getResolver()
@@ -236,11 +244,28 @@ public class Grouping {
if (taskParallelism == Integer.MAX_VALUE) {
taskParallelism = ctx.getDefaultMaxDegreeOfParallelism();
}
- hostSets = SetUtils.split(tw.getHosts(), taskParallelism);
+ final String rackYamlFile =
+ ctx.getResolver().getValueFromDesiredConfigurations(ConfigHelper.CLUSTER_ENV, "rack_yaml_file_path");
+ if (StringUtils.isNotEmpty(rackYamlFile)) {
+ // If rack to hosts mapping yaml file path is present in cluster-env property: rack_yaml_file_path,
+ // host sets will be formed based on rack i.e. based on parallel value, hosts present on same rack will
+ // be part of the same batch. This is useful when we want to avoid possibility of single rack failure
+ Map<String, Set<String>> hostsByRack = organizeHostsByRack(tw.getHosts(), rackYamlFile);
+ List<Set<String>> hostSetsForRack;
+ for (String rack : hostsByRack.keySet()) {
+ hostSetsForRack = SetUtils.split(hostsByRack.get(rack), taskParallelism);
+ if (hostSets == null) {
+ hostSets = hostSetsForRack;
+ } else {
+ hostSets.addAll(hostSetsForRack);
+ }
+ }
+ } else {
+ hostSets = SetUtils.split(tw.getHosts(), taskParallelism);
+ }
} else {
hostSets = SetUtils.split(tw.getHosts(), 1);
}
-
int numBatchesNeeded = hostSets.size();
int batchNum = 0;
for (Set<String> hostSubset : hostSets) {
@@ -259,6 +284,71 @@ public class Grouping {
}
/**
+ * Utility method to organize and return Rack to Hosts mapping for given rack yaml file
+ *
+ * @param hosts : All hosts that are part of current group
+ * @param rackYamlFile : file path for yaml containing rack to hosts mapping
+ * e.g ambari-server/src/examples/rack_hosts.yaml
+ * @return
+ */
+ private Map<String, Set<String>> organizeHostsByRack(Set<String> hosts, String rackYamlFile) {
+ try {
+ Map<String, String> hostToRackMap = getHostToRackMap(rackYamlFile);
+ Map<String, Set<String>> rackToHostsMap = new HashMap<>();
+ for (String host : hosts) {
+ if (hostToRackMap.containsKey(host)) {
+ String rack = hostToRackMap.get(host);
+ if (!rackToHostsMap.containsKey(rack)) {
+ rackToHostsMap.put(rack, new HashSet<>());
+ }
+ rackToHostsMap.get(rack).add(host);
+ } else {
+ throw new RuntimeException(String.format("Rack mapping is not present for host name: %s", host));
+ }
+ }
+ return rackToHostsMap;
+ } catch (Exception e) {
+ throw new RuntimeException(
+ String.format("Failed to generate Rack to Hosts mapping. filePath: %s", rackYamlFile), e);
+ }
+ }
+
+ private static Map<String, String> getHostToRackMap(String rackYamlFile)
+ throws IOException {
+ YamlReader yamlReader = new YamlReader(new FileReader(rackYamlFile));
+ Map rackHostsMap;
+ try {
+ rackHostsMap = (Map) yamlReader.read();
+ } finally {
+ yamlReader.close();
+ }
+ Map racks = (Map) rackHostsMap.get(RACKS_YAML_KEY_NAME);
+ Map<String, String> hostToRackMap = new HashMap<>();
+ for (Map.Entry entry : (Set<Map.Entry>) racks.entrySet()) {
+ Map rackInfoMap = (Map) entry.getValue();
+ String rackName = (String) entry.getKey();
+ if (rackInfoMap.containsKey(HOSTS_YAML_KEY_NAME)) {
+ List<String> hostList = (List<String>) rackInfoMap.get(HOSTS_YAML_KEY_NAME);
+ for (String host : hostList) {
+ hostToRackMap.put(host, rackName);
+ }
+ }
+ if (rackInfoMap.containsKey(HOST_GROUPS_YAML_KEY_NAME)) {
+ List<Map> hostGroups = (List<Map>) rackInfoMap.get(HOST_GROUPS_YAML_KEY_NAME);
+ for (Map hostGroup : hostGroups) {
+ if (hostGroup.containsKey(HOSTS_YAML_KEY_NAME)) {
+ List<String> hostList = (List<String>) hostGroup.get(HOSTS_YAML_KEY_NAME);
+ for (String host : hostList) {
+ hostToRackMap.put(host, rackName);
+ }
+ }
+ }
+ }
+ }
+ return hostToRackMap;
+ }
+
+ /**
* Determine if service checks need to be ran after the stages.
* @param upgradeContext the upgrade context
* @return Return the stages, which may potentially be followed by service checks.