You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by sm...@apache.org on 2016/02/10 19:01:07 UTC
ambari git commit: AMBARI-14865: Auto start - Maintenance mode of
components should be respected when handling agent registration.
Repository: ambari
Updated Branches:
refs/heads/trunk 46bbbf9c2 -> ac5ca8fd1
AMBARI-14865: Auto start - Maintenance mode of components should be respected when handling agent registration.
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/ac5ca8fd
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/ac5ca8fd
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/ac5ca8fd
Branch: refs/heads/trunk
Commit: ac5ca8fd1cd51360f7394c1fd85f8a062a4fb8e2
Parents: 46bbbf9
Author: Nahappan Somasundaram <ns...@hortonworks.com>
Authored: Mon Feb 1 14:22:15 2016 -0800
Committer: Nahappan Somasundaram <ns...@hortonworks.com>
Committed: Wed Feb 10 09:27:47 2016 -0800
----------------------------------------------------------------------
.../main/python/ambari_agent/RecoveryManager.py | 41 +++------
.../test/python/ambari_agent/TestActionQueue.py | 2 +-
.../python/ambari_agent/TestRecoveryManager.py | 88 +++++++++-----------
.../ambari/server/agent/HeartBeatHandler.java | 39 ++++++++-
.../ambari/server/agent/RecoveryConfig.java | 18 +---
.../server/agent/TestHeartbeatHandler.java | 78 ++++++++++++++++-
6 files changed, 167 insertions(+), 99 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/ambari/blob/ac5ca8fd/ambari-agent/src/main/python/ambari_agent/RecoveryManager.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/python/ambari_agent/RecoveryManager.py b/ambari-agent/src/main/python/ambari_agent/RecoveryManager.py
index b5795b2..ed537ca 100644
--- a/ambari-agent/src/main/python/ambari_agent/RecoveryManager.py
+++ b/ambari-agent/src/main/python/ambari_agent/RecoveryManager.py
@@ -88,7 +88,6 @@ class RecoveryManager:
self.allowed_desired_states = [self.STARTED, self.INSTALLED]
self.allowed_current_states = [self.INIT, self.INSTALLED]
self.enabled_components = []
- self.disabled_components = []
self.statuses = {}
self.__status_lock = threading.RLock()
self.__command_lock = threading.RLock()
@@ -107,7 +106,7 @@ class RecoveryManager:
self.actions = {}
- self.update_config(6, 60, 5, 12, recovery_enabled, auto_start_only, "", "")
+ self.update_config(6, 60, 5, 12, recovery_enabled, auto_start_only, "")
pass
@@ -213,19 +212,12 @@ class RecoveryManager:
pass
"""
- Whether specific components are enabled/disabled for recovery. Being enabled takes
- precedence over being disabled. When specific components are enabled then only
- those components are enabled. When specific components are disabled then all of
- the other components are enabled.
+ Whether specific components are enabled for recovery.
"""
def configured_for_recovery(self, component):
- if len(self.disabled_components) == 0 and len(self.enabled_components) == 0:
- return True
- if len(self.disabled_components) > 0 and component not in self.disabled_components \
- and len(self.enabled_components) == 0:
- return True
if len(self.enabled_components) > 0 and component in self.enabled_components:
return True
+
return False
def requires_recovery(self, component):
@@ -547,8 +539,8 @@ class RecoveryManager:
"maxCount" : 10,
"windowInMinutes" : 60,
"retryGap" : 0,
- "disabledComponents" : "a,b",
- "enabledComponents" : "c,d"}
+ "components" : "a,b"
+ }
"""
recovery_enabled = False
@@ -558,7 +550,6 @@ class RecoveryManager:
retry_gap = 5
max_lifetime_count = 12
enabled_components = ""
- disabled_components = ""
if reg_resp and "recoveryConfig" in reg_resp:
@@ -578,17 +569,16 @@ class RecoveryManager:
if 'maxLifetimeCount' in config:
max_lifetime_count = self._read_int_(config['maxLifetimeCount'], max_lifetime_count)
- if 'enabledComponents' in config:
- enabled_components = config['enabledComponents']
- if 'disabledComponents' in config:
- disabled_components = config['disabledComponents']
+ if 'components' in config:
+ enabled_components = config['components']
+
self.update_config(max_count, window_in_min, retry_gap, max_lifetime_count, recovery_enabled, auto_start_only,
- enabled_components, disabled_components)
+ enabled_components)
pass
def update_config(self, max_count, window_in_min, retry_gap, max_lifetime_count, recovery_enabled,
- auto_start_only, enabled_components, disabled_components):
+ auto_start_only, enabled_components):
"""
Update recovery configuration, recovery is disabled if configuration values
are not correct
@@ -619,7 +609,6 @@ class RecoveryManager:
self.retry_gap_in_sec = retry_gap * 60
self.auto_start_only = auto_start_only
self.max_lifetime_count = max_lifetime_count
- self.disabled_components = []
self.enabled_components = []
self.allowed_desired_states = [self.STARTED, self.INSTALLED]
@@ -635,19 +624,13 @@ class RecoveryManager:
if len(component.strip()) > 0:
self.enabled_components.append(component.strip())
- if disabled_components is not None and len(disabled_components) > 0:
- components = disabled_components.split(",")
- for component in components:
- if len(component.strip()) > 0:
- self.disabled_components.append(component.strip())
-
self.recovery_enabled = recovery_enabled
if self.recovery_enabled:
logger.info(
"==> Auto recovery is enabled with maximum %s in %s minutes with gap of %s minutes between and"
- " lifetime max being %s. Enabled components - %s and Disabled components - %s",
+ " lifetime max being %s. Enabled components - %s",
self.max_count, self.window_in_min, self.retry_gap, self.max_lifetime_count,
- ', '.join(self.enabled_components), ', '.join(self.disabled_components))
+ ', '.join(self.enabled_components))
pass
http://git-wip-us.apache.org/repos/asf/ambari/blob/ac5ca8fd/ambari-agent/src/test/python/ambari_agent/TestActionQueue.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/test/python/ambari_agent/TestActionQueue.py b/ambari-agent/src/test/python/ambari_agent/TestActionQueue.py
index 715a60c..f7e2894 100644
--- a/ambari-agent/src/test/python/ambari_agent/TestActionQueue.py
+++ b/ambari-agent/src/test/python/ambari_agent/TestActionQueue.py
@@ -338,7 +338,7 @@ class TestActionQueue(TestCase):
config.set('agent', 'tolerate_download_failures', "true")
dummy_controller = MagicMock()
dummy_controller.recovery_manager = RecoveryManager(tempfile.mktemp())
- dummy_controller.recovery_manager.update_config(5, 5, 1, 11, True, False, "", "")
+ dummy_controller.recovery_manager.update_config(5, 5, 1, 11, True, False, "")
actionQueue = ActionQueue(config, dummy_controller)
unfreeze_flag = threading.Event()
http://git-wip-us.apache.org/repos/asf/ambari/blob/ac5ca8fd/ambari-agent/src/test/python/ambari_agent/TestRecoveryManager.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/test/python/ambari_agent/TestRecoveryManager.py b/ambari-agent/src/test/python/ambari_agent/TestRecoveryManager.py
index 0321a20..ef81704 100644
--- a/ambari-agent/src/test/python/ambari_agent/TestRecoveryManager.py
+++ b/ambari-agent/src/test/python/ambari_agent/TestRecoveryManager.py
@@ -141,6 +141,7 @@ class _TestRecoveryManager(TestCase):
mock_uds.reset_mock()
+ rm.update_config(12, 5, 1, 15, True, False, "NODEMANAGER")
rm.process_execution_commands([self.exec_command1, self.exec_command2, self.exec_command3])
mock_uds.assert_has_calls([call("NODEMANAGER", "INSTALLED")], [call("NODEMANAGER", "STARTED")])
@@ -173,32 +174,29 @@ class _TestRecoveryManager(TestCase):
rm = RecoveryManager(tempfile.mktemp(), True, False)
self.assertTrue(rm.enabled())
- rm.update_config(0, 60, 5, 12, True, False, "", "")
+ config = rm.update_config(0, 60, 5, 12, True, False, "")
self.assertFalse(rm.enabled())
- rm.update_config(6, 60, 5, 12, True, False, "", "")
+ rm.update_config(6, 60, 5, 12, True, False, "")
self.assertTrue(rm.enabled())
- rm.update_config(6, 0, 5, 12, True, False, "", "")
+ rm.update_config(6, 0, 5, 12, True, False, "")
self.assertFalse(rm.enabled())
- rm.update_config(6, 60, 0, 12, True, False, "", "")
+ rm.update_config(6, 60, 0, 12, True, False, "")
self.assertFalse(rm.enabled())
- rm.update_config(6, 60, 1, 12, True, False, None, None)
+ rm.update_config(6, 60, 1, 12, True, False, None)
self.assertTrue(rm.enabled())
- rm.update_config(6, 60, 61, 12, True, False, "", None)
+ rm.update_config(6, 60, 61, 12, True, False, None)
self.assertFalse(rm.enabled())
- rm.update_config(6, 60, 5, 0, True, False, None, "")
- self.assertFalse(rm.enabled())
-
- rm.update_config(6, 60, 5, 4, True, False, "", "")
+ rm.update_config(6, 60, 5, 4, True, False, "")
self.assertFalse(rm.enabled())
# maximum 2 in 2 minutes and at least 1 minute wait
- rm.update_config(2, 5, 1, 4, True, False, "", "")
+ rm.update_config(2, 5, 1, 4, True, False, "")
self.assertTrue(rm.enabled())
# T = 1000-2
@@ -224,7 +222,7 @@ class _TestRecoveryManager(TestCase):
self.assertFalse(rm.may_execute("NODEMANAGER")) # too soon
# maximum 2 in 2 minutes and no min wait
- rm.update_config(2, 5, 1, 5, True, True, "", "")
+ rm.update_config(2, 5, 1, 5, True, True, "")
# T = 1500-3
self.assertTrue(rm.execute("NODEMANAGER2"))
@@ -244,7 +242,7 @@ class _TestRecoveryManager(TestCase):
def test_recovery_required(self):
rm = RecoveryManager(tempfile.mktemp(), True, False)
-
+ rm.update_config(12, 5, 1, 15, True, False, "NODEMANAGER")
rm.update_current_status("NODEMANAGER", "INSTALLED")
rm.update_desired_status("NODEMANAGER", "INSTALLED")
self.assertFalse(rm.requires_recovery("NODEMANAGER"))
@@ -292,13 +290,13 @@ class _TestRecoveryManager(TestCase):
def test_recovery_required2(self):
rm = RecoveryManager(tempfile.mktemp(), True, True)
- rm.update_config(15, 5, 1, 16, True, False, "", "")
+ rm.update_config(15, 5, 1, 16, True, False, "NODEMANAGER")
rm.update_current_status("NODEMANAGER", "INSTALLED")
rm.update_desired_status("NODEMANAGER", "STARTED")
self.assertTrue(rm.requires_recovery("NODEMANAGER"))
rm = RecoveryManager(tempfile.mktemp(), True, True)
- rm.update_config(15, 5, 1, 16, True, False, "NODEMANAGER", "")
+ rm.update_config(15, 5, 1, 16, True, False, "NODEMANAGER")
rm.update_current_status("NODEMANAGER", "INSTALLED")
rm.update_desired_status("NODEMANAGER", "STARTED")
self.assertTrue(rm.requires_recovery("NODEMANAGER"))
@@ -308,26 +306,16 @@ class _TestRecoveryManager(TestCase):
self.assertFalse(rm.requires_recovery("DATANODE"))
rm = RecoveryManager(tempfile.mktemp(), True, True)
- rm.update_config(15, 5, 1, 16, True, False, "", "NODEMANAGER")
+ rm.update_config(15, 5, 1, 16, True, False, "")
rm.update_current_status("NODEMANAGER", "INSTALLED")
rm.update_desired_status("NODEMANAGER", "STARTED")
self.assertFalse(rm.requires_recovery("NODEMANAGER"))
rm.update_current_status("DATANODE", "INSTALLED")
rm.update_desired_status("DATANODE", "STARTED")
- self.assertTrue(rm.requires_recovery("DATANODE"))
-
- rm.update_config(15, 5, 1, 16, True, False, "", "NODEMANAGER")
- rm.update_config(15, 5, 1, 16, True, False, "NODEMANAGER", "")
- rm.update_current_status("NODEMANAGER", "INSTALLED")
- rm.update_desired_status("NODEMANAGER", "STARTED")
- self.assertTrue(rm.requires_recovery("NODEMANAGER"))
-
- rm.update_current_status("DATANODE", "INSTALLED")
- rm.update_desired_status("DATANODE", "STARTED")
self.assertFalse(rm.requires_recovery("DATANODE"))
- rm.update_config(15, 5, 1, 16, True, False, "NODEMANAGER", "NODEMANAGER")
+ rm.update_config(15, 5, 1, 16, True, False, "NODEMANAGER")
rm.update_current_status("NODEMANAGER", "INSTALLED")
rm.update_desired_status("NODEMANAGER", "STARTED")
self.assertTrue(rm.requires_recovery("NODEMANAGER"))
@@ -392,12 +380,12 @@ class _TestRecoveryManager(TestCase):
4200, 4201, 4202,
4300, 4301, 4302]
rm = RecoveryManager(tempfile.mktemp(), True)
- rm.update_config(15, 5, 1, 16, True, False, "", "")
+ rm.update_config(15, 5, 1, 16, True, False, "")
command1 = copy.deepcopy(self.command)
rm.store_or_update_command(command1)
-
+ rm.update_config(12, 5, 1, 15, True, False, "NODEMANAGER")
rm.update_current_status("NODEMANAGER", "INSTALLED")
rm.update_desired_status("NODEMANAGER", "STARTED")
self.assertEqual("INSTALLED", rm.get_current_status("NODEMANAGER"))
@@ -423,14 +411,14 @@ class _TestRecoveryManager(TestCase):
self.assertEqual(1, len(commands))
self.assertEqual("INSTALL", commands[0]["roleCommand"])
- rm.update_config(2, 5, 1, 5, True, True, "", "")
+ rm.update_config(2, 5, 1, 5, True, True, "")
rm.update_current_status("NODEMANAGER", "INIT")
rm.update_desired_status("NODEMANAGER", "INSTALLED")
commands = rm.get_recovery_commands()
self.assertEqual(0, len(commands))
- rm.update_config(12, 5, 1, 15, True, False, "", "")
+ rm.update_config(12, 5, 1, 15, True, False, "NODEMANAGER")
rm.update_current_status("NODEMANAGER", "INIT")
rm.update_desired_status("NODEMANAGER", "INSTALLED")
@@ -471,25 +459,25 @@ class _TestRecoveryManager(TestCase):
def test_update_rm_config(self, mock_uc):
rm = RecoveryManager(tempfile.mktemp())
rm.update_configuration_from_registration(None)
- mock_uc.assert_has_calls([call(6, 60, 5, 12, False, True, "", "")])
+ mock_uc.assert_has_calls([call(6, 60, 5, 12, False, True, "")])
mock_uc.reset_mock()
rm.update_configuration_from_registration({})
- mock_uc.assert_has_calls([call(6, 60, 5, 12, False, True, "", "")])
+ mock_uc.assert_has_calls([call(6, 60, 5, 12, False, True, "")])
mock_uc.reset_mock()
rm.update_configuration_from_registration(
{"recoveryConfig": {
"type" : "DEFAULT"}}
)
- mock_uc.assert_has_calls([call(6, 60, 5, 12, False, True, "", "")])
+ mock_uc.assert_has_calls([call(6, 60, 5, 12, False, True, "")])
mock_uc.reset_mock()
rm.update_configuration_from_registration(
{"recoveryConfig": {
"type" : "FULL"}}
)
- mock_uc.assert_has_calls([call(6, 60, 5, 12, True, False, "", "")])
+ mock_uc.assert_has_calls([call(6, 60, 5, 12, True, False, "")])
mock_uc.reset_mock()
rm.update_configuration_from_registration(
@@ -497,7 +485,7 @@ class _TestRecoveryManager(TestCase):
"type" : "AUTO_START",
"max_count" : "med"}}
)
- mock_uc.assert_has_calls([call(6, 60, 5, 12, True, True, "", "")])
+ mock_uc.assert_has_calls([call(6, 60, 5, 12, True, True, "")])
mock_uc.reset_mock()
rm.update_configuration_from_registration(
@@ -507,10 +495,9 @@ class _TestRecoveryManager(TestCase):
"windowInMinutes" : 20,
"retryGap" : 2,
"maxLifetimeCount" : 5,
- "enabledComponents" : " A,B",
- "disabledComponents" : "C"}}
+ "components" : " A,B"}}
)
- mock_uc.assert_has_calls([call(5, 20, 2, 5, True, True, " A,B", "C")])
+ mock_uc.assert_has_calls([call(5, 20, 2, 5, True, True, " A,B")])
pass
@patch.object(RecoveryManager, "_now_")
@@ -522,7 +509,7 @@ class _TestRecoveryManager(TestCase):
rec_st = rm.get_recovery_status()
self.assertEquals(rec_st, {"summary": "DISABLED"})
- rm.update_config(2, 5, 1, 4, True, True, "", "")
+ rm.update_config(2, 5, 1, 4, True, True, "")
rec_st = rm.get_recovery_status()
self.assertEquals(rec_st, {"summary": "RECOVERABLE", "componentReports": []})
@@ -566,12 +553,12 @@ class _TestRecoveryManager(TestCase):
[1000, 1001, 1002, 1003, 1104, 1105, 1106, 1807, 1808, 1809, 1810, 1811, 1812]
rm = RecoveryManager(tempfile.mktemp(), True)
- rm.update_config(5, 5, 1, 11, True, False, "", "")
+ rm.update_config(5, 5, 1, 11, True, False, "")
command1 = copy.deepcopy(self.command)
rm.store_or_update_command(command1)
-
+ rm.update_config(12, 5, 1, 15, True, False, "NODEMANAGER")
rm.update_current_status("NODEMANAGER", "INSTALLED")
rm.update_desired_status("NODEMANAGER", "STARTED")
@@ -607,23 +594,24 @@ class _TestRecoveryManager(TestCase):
def test_configured_for_recovery(self):
rm = RecoveryManager(tempfile.mktemp(), True)
+ rm.update_config(12, 5, 1, 15, True, False, "A,B")
self.assertTrue(rm.configured_for_recovery("A"))
self.assertTrue(rm.configured_for_recovery("B"))
- rm.update_config(5, 5, 1, 11, True, False, "", "")
- self.assertTrue(rm.configured_for_recovery("A"))
- self.assertTrue(rm.configured_for_recovery("B"))
+ rm.update_config(5, 5, 1, 11, True, False, "")
+ self.assertFalse(rm.configured_for_recovery("A"))
+ self.assertFalse(rm.configured_for_recovery("B"))
- rm.update_config(5, 5, 1, 11, True, False, "A", "")
+ rm.update_config(5, 5, 1, 11, True, False, "A")
self.assertTrue(rm.configured_for_recovery("A"))
self.assertFalse(rm.configured_for_recovery("B"))
- rm.update_config(5, 5, 1, 11, True, False, "", "B,C")
+ rm.update_config(5, 5, 1, 11, True, False, "A")
self.assertTrue(rm.configured_for_recovery("A"))
self.assertFalse(rm.configured_for_recovery("B"))
self.assertFalse(rm.configured_for_recovery("C"))
- rm.update_config(5, 5, 1, 11, True, False, "A, D, F ", "B,C")
+ rm.update_config(5, 5, 1, 11, True, False, "A, D, F ")
self.assertTrue(rm.configured_for_recovery("A"))
self.assertFalse(rm.configured_for_recovery("B"))
self.assertFalse(rm.configured_for_recovery("C"))
@@ -637,7 +625,7 @@ class _TestRecoveryManager(TestCase):
[1000, 1071, 1372]
rm = RecoveryManager(tempfile.mktemp(), True)
- rm.update_config(2, 5, 1, 4, True, True, "", "")
+ rm.update_config(2, 5, 1, 4, True, True, "")
rm.execute("COMPONENT")
actions = rm.get_actions_copy()["COMPONENT"]
@@ -655,7 +643,7 @@ class _TestRecoveryManager(TestCase):
def test_is_action_info_stale(self, time_mock):
rm = RecoveryManager(tempfile.mktemp(), True)
- rm.update_config(5, 60, 5, 16, True, False, "", "")
+ rm.update_config(5, 60, 5, 16, True, False, "")
time_mock.return_value = 0
self.assertFalse(rm.is_action_info_stale("COMPONENT_NAME"))
http://git-wip-us.apache.org/repos/asf/ambari/blob/ac5ca8fd/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java b/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
index 210fe17..248ce4b 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
@@ -989,7 +989,44 @@ public class HeartBeatHandler {
if(response.getAgentConfig() != null) {
LOG.debug("Agent configuration map set to " + response.getAgentConfig());
}
- response.setRecoveryConfig(RecoveryConfig.getRecoveryConfig(config));
+
+ //
+ // Filter the enabled components by maintenance mode
+ //
+
+ // Build a map of component name => Service component host
+ // for easy look up of maintenance state by component name.
+ // As of now, a host can belong to only one cluster.
+ // Clusters::getClustersForHost(hostname) returns one item.
+ Map<String, ServiceComponentHost> schFromComponentName = new HashMap<>();
+
+ for (Cluster cl : clusterFsm.getClustersForHost(hostname)) {
+ List<ServiceComponentHost> scHosts = cl.getServiceComponentHosts(hostname);
+ for (ServiceComponentHost sch : scHosts) {
+ schFromComponentName.put(sch.getServiceComponentName(), sch);
+ }
+ }
+
+ // Keep only the components that have maintenance state set to OFF
+ List<String> enabledComponents = new ArrayList<>();
+ String[] confEnabledComponents = config.getEnabledComponents().split(",");
+
+ for (String componentName : confEnabledComponents) {
+ ServiceComponentHost sch = schFromComponentName.get(componentName);
+
+ // Append the component name only if it is
+ // in the host and not in maintenance mode.
+ if (sch != null && sch.getMaintenanceState() == MaintenanceState.OFF) {
+ enabledComponents.add(componentName);
+ }
+ }
+
+ // Overwrite the pre-constructed RecoveryConfig's list of
+ // enabled components with the filtered list
+ RecoveryConfig rc = RecoveryConfig.getRecoveryConfig(config);
+ rc.setEnabledComponents(StringUtils.join(enabledComponents, ','));
+ response.setRecoveryConfig(rc);
+
if(response.getRecoveryConfig() != null) {
LOG.info("Recovery configuration set to " + response.getRecoveryConfig().toString());
}
http://git-wip-us.apache.org/repos/asf/ambari/blob/ac5ca8fd/ambari-server/src/main/java/org/apache/ambari/server/agent/RecoveryConfig.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/agent/RecoveryConfig.java b/ambari-server/src/main/java/org/apache/ambari/server/agent/RecoveryConfig.java
index 3f558eb..3da8609 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/agent/RecoveryConfig.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/agent/RecoveryConfig.java
@@ -48,21 +48,9 @@ public class RecoveryConfig {
@SerializedName("maxLifetimeCount")
private String maxLifetimeCount;
- @SerializedName("enabledComponents")
+ @SerializedName("components")
private String enabledComponents;
- @SerializedName("disabledComponents")
- private String disabledComponents;
-
-
- public String getDisabledComponents() {
- return disabledComponents;
- }
-
- public void setDisabledComponents(String disabledComponents) {
- this.disabledComponents = disabledComponents;
- }
-
public String getEnabledComponents() {
return enabledComponents;
}
@@ -118,7 +106,6 @@ public class RecoveryConfig {
rc.setRetryGap(conf.getNodeRecoveryRetryGap());
rc.setType(conf.getNodeRecoveryType());
rc.setWindowInMinutes(conf.getNodeRecoveryWindowInMin());
- rc.setDisabledComponents(conf.getDisabledComponents());
rc.setEnabledComponents(conf.getEnabledComponents());
return rc;
}
@@ -131,8 +118,7 @@ public class RecoveryConfig {
buffer.append(", windowInMinutes=").append(windowInMinutes);
buffer.append(", retryGap=").append(retryGap);
buffer.append(", maxLifetimeCount=").append(maxLifetimeCount);
- buffer.append(", disabledComponents=").append(disabledComponents);
- buffer.append(", enabledComponents=").append(enabledComponents);
+ buffer.append(", components=").append(enabledComponents);
buffer.append('}');
return buffer.toString();
}
http://git-wip-us.apache.org/repos/asf/ambari/blob/ac5ca8fd/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatHandler.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatHandler.java b/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatHandler.java
index d3945dc..2a4cec8 100644
--- a/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatHandler.java
+++ b/ambari-server/src/test/java/org/apache/ambari/server/agent/TestHeartbeatHandler.java
@@ -183,9 +183,11 @@ public class TestHeartbeatHandler {
public TemporaryFolder temporaryFolder = new TemporaryFolder();
+ private InMemoryDefaultTestModule module;
+
@Before
public void setup() throws Exception {
- InMemoryDefaultTestModule module = new InMemoryDefaultTestModule(){
+ module = new InMemoryDefaultTestModule(){
@Override
protected void configure() {
@@ -856,11 +858,23 @@ public class TestHeartbeatHandler {
Clusters fsm = clusters;
HeartBeatHandler handler = new HeartBeatHandler(fsm, new ActionQueue(), am,
injector);
- clusters.addHost(DummyHostname1);
+ Cluster cluster = getDummyCluster();
+ Service hdfs = cluster.addService(HDFS);
+ hdfs.persist();
+ hdfs.addServiceComponent(DATANODE).persist();
+ hdfs.getServiceComponent(DATANODE).addServiceComponentHost(DummyHostname1).persist();
+ hdfs.addServiceComponent(NAMENODE).persist();
+ hdfs.getServiceComponent(NAMENODE).addServiceComponentHost(DummyHostname1).persist();
+ hdfs.addServiceComponent(HDFS_CLIENT).persist();
+ hdfs.getServiceComponent(HDFS_CLIENT).addServiceComponentHost(DummyHostname1).persist();
+
Host hostObject = clusters.getHost(DummyHostname1);
hostObject.setIPv4("ipv4");
hostObject.setIPv6("ipv6");
+ // add recovery.enabled_components to ambari configuration
+ module.getProperties().put("recovery.enabled_components", "NAMENODE,DATANODE");
+
Register reg = new Register();
HostInfo hi = new HostInfo();
hi.setHostName(DummyHostname1);
@@ -877,6 +891,7 @@ public class TestHeartbeatHandler {
assertEquals(rc.getMaxLifetimeCount(), "10");
assertEquals(rc.getRetryGap(), "2");
assertEquals(rc.getWindowInMinutes(), "23");
+ assertEquals(rc.getEnabledComponents(), "NAMENODE,DATANODE");
rc = RecoveryConfig.getRecoveryConfig(new Configuration());
assertEquals(rc.getMaxCount(), "6");
@@ -884,6 +899,65 @@ public class TestHeartbeatHandler {
assertEquals(rc.getMaxLifetimeCount(), "12");
assertEquals(rc.getRetryGap(), "5");
assertEquals(rc.getWindowInMinutes(), "60");
+ assertEquals(rc.getEnabledComponents(), "");
+
+ // clean up
+ module.getProperties().remove("recovery.enabled_components");
+ }
+
+ //
+ // Same as testRegistrationRecoveryConfig but will test
+ // maintenance mode set to ON for a service component host
+ //
+ @Test
+ public void testRegistrationRecoveryConfigMaintenanceMode()
+ throws AmbariException, InvalidStateTransitionException {
+ ActionManager am = getMockActionManager();
+ replay(am);
+ Clusters fsm = clusters;
+ HeartBeatHandler handler = new HeartBeatHandler(fsm, new ActionQueue(), am,
+ injector);
+ Cluster cluster = getDummyCluster();
+ Service hdfs = cluster.addService(HDFS);
+ hdfs.persist();
+ hdfs.addServiceComponent(DATANODE).persist();
+ hdfs.getServiceComponent(DATANODE).addServiceComponentHost(DummyHostname1).persist();
+ hdfs.addServiceComponent(NAMENODE).persist();
+ hdfs.getServiceComponent(NAMENODE).addServiceComponentHost(DummyHostname1).persist();
+ hdfs.addServiceComponent(HDFS_CLIENT).persist();
+ hdfs.getServiceComponent(HDFS_CLIENT).addServiceComponentHost(DummyHostname1).persist();
+
+ Host hostObject = clusters.getHost(DummyHostname1);
+ hostObject.setIPv4("ipv4");
+ hostObject.setIPv6("ipv6");
+
+ // add recovery.enabled_components to ambari configuration
+ module.getProperties().put("recovery.enabled_components", "NAMENODE,DATANODE,HDFS_CLIENT");
+
+ // set maintenance mode on HDFS_CLIENT on host1 to true
+ ServiceComponentHost schHdfsClient = hdfs.getServiceComponent(HDFS_CLIENT).getServiceComponentHost(DummyHostname1);
+ schHdfsClient.setMaintenanceState(MaintenanceState.ON);
+
+ Register reg = new Register();
+ HostInfo hi = new HostInfo();
+ hi.setHostName(DummyHostname1);
+ hi.setOS(DummyOsType);
+ reg.setHostname(DummyHostname1);
+ reg.setCurrentPingPort(DummyCurrentPingPort);
+ reg.setHardwareProfile(hi);
+ reg.setAgentVersion(metaInfo.getServerVersion());
+ reg.setPrefix(Configuration.PREFIX_DIR);
+ RegistrationResponse rr = handler.handleRegistration(reg);
+ RecoveryConfig rc = rr.getRecoveryConfig();
+ assertEquals(rc.getMaxCount(), "4");
+ assertEquals(rc.getType(), "FULL");
+ assertEquals(rc.getMaxLifetimeCount(), "10");
+ assertEquals(rc.getRetryGap(), "2");
+ assertEquals(rc.getWindowInMinutes(), "23");
+ assertEquals(rc.getEnabledComponents(), "NAMENODE,DATANODE"); // HDFS_CLIENT is in maintenance mode
+
+ // clean up
+ module.getProperties().remove("recovery.enabled_components");
}
@Test