You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by sm...@apache.org on 2015/10/19 03:32:59 UTC
ambari git commit: AMBARI-13463. Auto start should allow selection of
components that can be auto-started (smohanty)
Repository: ambari
Updated Branches:
refs/heads/branch-2.1 30e3ac644 -> 799f8a9cc
AMBARI-13463. Auto start should allow selection of components that can be auto-started (smohanty)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/799f8a9c
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/799f8a9c
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/799f8a9c
Branch: refs/heads/branch-2.1
Commit: 799f8a9cceaedfe23498e8cc268a80e5102ba49a
Parents: 30e3ac6
Author: Sumit Mohanty <sm...@hortonworks.com>
Authored: Sun Oct 18 18:32:44 2015 -0700
Committer: Sumit Mohanty <sm...@hortonworks.com>
Committed: Sun Oct 18 18:32:44 2015 -0700
----------------------------------------------------------------------
.../src/main/python/ambari_agent/ActionQueue.py | 11 +-
.../main/python/ambari_agent/RecoveryManager.py | 70 +++++++++--
.../test/python/ambari_agent/TestActionQueue.py | 2 +-
.../python/ambari_agent/TestRecoveryManager.py | 123 +++++++++++++++----
.../ambari/server/agent/HeartBeatHandler.java | 2 +-
.../ambari/server/agent/RecoveryConfig.java | 26 ++++
.../server/configuration/Configuration.java | 30 ++++-
7 files changed, 221 insertions(+), 43 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/ambari/blob/799f8a9c/ambari-agent/src/main/python/ambari_agent/ActionQueue.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/python/ambari_agent/ActionQueue.py b/ambari-agent/src/main/python/ambari_agent/ActionQueue.py
index 6a64f99..b82afe8 100644
--- a/ambari-agent/src/main/python/ambari_agent/ActionQueue.py
+++ b/ambari-agent/src/main/python/ambari_agent/ActionQueue.py
@@ -326,7 +326,8 @@ class ActionQueue(threading.Thread):
# let recovery manager know the current state
if status == self.COMPLETED_STATUS:
- if self.controller.recovery_manager.enabled() and command.has_key('roleCommand'):
+ if self.controller.recovery_manager.enabled() and command.has_key('roleCommand') \
+ and self.controller.recovery_manager.configured_for_recovery(command['role']):
if command['roleCommand'] == self.ROLE_COMMAND_START:
self.controller.recovery_manager.update_current_status(command['role'], LiveStatus.LIVE_STATUS)
self.controller.recovery_manager.update_config_staleness(command['role'], False)
@@ -441,10 +442,14 @@ class ActionQueue(threading.Thread):
if component_status_result['exitcode'] == 0:
component_status = LiveStatus.LIVE_STATUS
- self.controller.recovery_manager.update_current_status(component, component_status)
+ if self.controller.recovery_manager.enabled() \
+ and self.controller.recovery_manager.configured_for_recovery(component):
+ self.controller.recovery_manager.update_current_status(component, component_status)
else:
component_status = LiveStatus.DEAD_STATUS
- self.controller.recovery_manager.update_current_status(component, component_status)
+ if self.controller.recovery_manager.enabled() \
+ and self.controller.recovery_manager.configured_for_recovery(component):
+ self.controller.recovery_manager.update_current_status(component, component_status)
request_execution_cmd = self.controller.recovery_manager.requires_recovery(component)
if component_status_result.has_key('structuredOut'):
http://git-wip-us.apache.org/repos/asf/ambari/blob/799f8a9c/ambari-agent/src/main/python/ambari_agent/RecoveryManager.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/python/ambari_agent/RecoveryManager.py b/ambari-agent/src/main/python/ambari_agent/RecoveryManager.py
index 595c4fc..cab81f5 100644
--- a/ambari-agent/src/main/python/ambari_agent/RecoveryManager.py
+++ b/ambari-agent/src/main/python/ambari_agent/RecoveryManager.py
@@ -85,6 +85,8 @@ class RecoveryManager:
self.id = int(time.time())
self.allowed_desired_states = [self.STARTED, self.INSTALLED]
self.allowed_current_states = [self.INIT, self.INSTALLED]
+ self.enabled_components = []
+ self.disabled_components = []
self.actions = {}
self.statuses = {}
self.__status_lock = threading.RLock()
@@ -93,7 +95,7 @@ class RecoveryManager:
self.active_command_count = 0
self.paused = False
- self.update_config(6, 60, 5, 12, recovery_enabled, auto_start_only)
+ self.update_config(6, 60, 5, 12, recovery_enabled, auto_start_only, "", "")
pass
@@ -186,6 +188,21 @@ class RecoveryManager:
self.remove_command(component)
pass
+ """
+ Whether specific components are enabled/disabled for recovery. Being enabled takes
+ precedence over being disabled. When specific components are enabled then only
+ those components are enabled. When specific components are disabled then all of
+ the other components are enabled.
+ """
+ def configured_for_recovery(self, component):
+ if len(self.disabled_components) == 0 and len(self.enabled_components) == 0:
+ return True
+ if len(self.disabled_components) > 0 and component not in self.disabled_components \
+ and len(self.enabled_components) == 0:
+ return True
+ if len(self.enabled_components) > 0 and component in self.enabled_components:
+ return True
+ return False
def requires_recovery(self, component):
"""
@@ -197,6 +214,9 @@ class RecoveryManager:
if not self.enabled():
return False
+ if not self.configured_for_recovery(component):
+ return False
+
if component not in self.statuses:
return False
@@ -433,7 +453,9 @@ class RecoveryManager:
"type" : "DEFAULT|AUTO_START|FULL",
"maxCount" : 10,
"windowInMinutes" : 60,
- "retryGap" : 0 }
+ "retryGap" : 0,
+ "disabledComponents" : "a,b",
+ "enabledComponents" : "c,d"}
"""
recovery_enabled = False
@@ -442,8 +464,12 @@ class RecoveryManager:
window_in_min = 60
retry_gap = 5
max_lifetime_count = 12
+ enabled_components = ""
+ disabled_components = ""
+
if reg_resp and "recoveryConfig" in reg_resp:
+ logger.info("RecoverConfig = " + pprint.pformat(reg_resp["recoveryConfig"]))
config = reg_resp["recoveryConfig"]
if "type" in config:
if config["type"] in ["AUTO_START", "FULL"]:
@@ -458,11 +484,18 @@ class RecoveryManager:
retry_gap = self._read_int_(config["retryGap"], retry_gap)
if 'maxLifetimeCount' in config:
max_lifetime_count = self._read_int_(config['maxLifetimeCount'], max_lifetime_count)
- self.update_config(max_count, window_in_min, retry_gap, max_lifetime_count, recovery_enabled, auto_start_only)
+
+ if 'enabledComponents' in config:
+ enabled_components = config['enabledComponents']
+ if 'disabledComponents' in config:
+ disabled_components = config['disabledComponents']
+ self.update_config(max_count, window_in_min, retry_gap, max_lifetime_count, recovery_enabled, auto_start_only,
+ enabled_components, disabled_components)
pass
- def update_config(self, max_count, window_in_min, retry_gap, max_lifetime_count, recovery_enabled, auto_start_only):
+ def update_config(self, max_count, window_in_min, retry_gap, max_lifetime_count, recovery_enabled,
+ auto_start_only, enabled_components, disabled_components):
"""
Update recovery configuration, recovery is disabled if configuration values
are not correct
@@ -493,6 +526,8 @@ class RecoveryManager:
self.retry_gap_in_sec = retry_gap * 60
self.auto_start_only = auto_start_only
self.max_lifetime_count = max_lifetime_count
+ self.disabled_components = []
+ self.enabled_components = []
self.allowed_desired_states = [self.STARTED, self.INSTALLED]
self.allowed_current_states = [self.INIT, self.INSTALLED, self.STARTED]
@@ -501,11 +536,25 @@ class RecoveryManager:
self.allowed_desired_states = [self.STARTED]
self.allowed_current_states = [self.INSTALLED]
+ if enabled_components is not None and len(enabled_components) > 0:
+ components = enabled_components.split(",")
+ for component in components:
+ if len(component.strip()) > 0:
+ self.enabled_components.append(component.strip())
+
+ if disabled_components is not None and len(disabled_components) > 0:
+ components = disabled_components.split(",")
+ for component in components:
+ if len(component.strip()) > 0:
+ self.disabled_components.append(component.strip())
+
self.recovery_enabled = recovery_enabled
if self.recovery_enabled:
logger.info(
- "==> Auto recovery is enabled with maximum %s in %s minutes with gap of %s minutes between and lifetime max being %s.",
- self.max_count, self.window_in_min, self.retry_gap, self.max_lifetime_count)
+ "==> Auto recovery is enabled with maximum %s in %s minutes with gap of %s minutes between and"
+ " lifetime max being %s. Enabled components - %s and Disabled components - %s",
+ self.max_count, self.window_in_min, self.retry_gap, self.max_lifetime_count,
+ ', '.join(self.enabled_components), ', '.join(self.disabled_components))
pass
@@ -536,16 +585,19 @@ class RecoveryManager:
for command in commands:
if self.COMMAND_TYPE in command and command[self.COMMAND_TYPE] == ActionQueue.EXECUTION_COMMAND:
if self.ROLE in command:
- if command[self.ROLE_COMMAND] in (ActionQueue.ROLE_COMMAND_INSTALL, ActionQueue.ROLE_COMMAND_STOP):
+ if command[self.ROLE_COMMAND] in (ActionQueue.ROLE_COMMAND_INSTALL, ActionQueue.ROLE_COMMAND_STOP) \
+ and self.configured_for_recovery(command[self.ROLE]):
self.update_desired_status(command[self.ROLE], LiveStatus.DEAD_STATUS)
logger.info("Received EXECUTION_COMMAND (STOP/INSTALL), desired state of " + command[self.ROLE] + " to " +
self.get_desired_status(command[self.ROLE]) )
- elif command[self.ROLE_COMMAND] == ActionQueue.ROLE_COMMAND_START:
+ elif command[self.ROLE_COMMAND] == ActionQueue.ROLE_COMMAND_START \
+ and self.configured_for_recovery(command[self.ROLE]):
self.update_desired_status(command[self.ROLE], LiveStatus.LIVE_STATUS)
logger.info("Received EXECUTION_COMMAND (START), desired state of " + command[self.ROLE] + " to " +
self.get_desired_status(command[self.ROLE]) )
elif command[self.HOST_LEVEL_PARAMS].has_key('custom_command') and \
- command[self.HOST_LEVEL_PARAMS]['custom_command'] == ActionQueue.CUSTOM_COMMAND_RESTART:
+ command[self.HOST_LEVEL_PARAMS]['custom_command'] == ActionQueue.CUSTOM_COMMAND_RESTART \
+ and self.configured_for_recovery(command[self.ROLE]):
self.update_desired_status(command[self.ROLE], LiveStatus.LIVE_STATUS)
logger.info("Received EXECUTION_COMMAND (RESTART), desired state of " + command[self.ROLE] + " to " +
self.get_desired_status(command[self.ROLE]) )
http://git-wip-us.apache.org/repos/asf/ambari/blob/799f8a9c/ambari-agent/src/test/python/ambari_agent/TestActionQueue.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/test/python/ambari_agent/TestActionQueue.py b/ambari-agent/src/test/python/ambari_agent/TestActionQueue.py
index 4e130f9..ac2ab92 100644
--- a/ambari-agent/src/test/python/ambari_agent/TestActionQueue.py
+++ b/ambari-agent/src/test/python/ambari_agent/TestActionQueue.py
@@ -342,7 +342,7 @@ class TestActionQueue(TestCase):
config.set('agent', 'tolerate_download_failures', "true")
dummy_controller = MagicMock()
dummy_controller.recovery_manager = RecoveryManager()
- dummy_controller.recovery_manager.update_config(5, 5, 1, 11, True, False)
+ dummy_controller.recovery_manager.update_config(5, 5, 1, 11, True, False, "", "")
actionQueue = ActionQueue(config, dummy_controller)
unfreeze_flag = threading.Event()
http://git-wip-us.apache.org/repos/asf/ambari/blob/799f8a9c/ambari-agent/src/test/python/ambari_agent/TestRecoveryManager.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/test/python/ambari_agent/TestRecoveryManager.py b/ambari-agent/src/test/python/ambari_agent/TestRecoveryManager.py
index 1669a2c..e6115e3 100644
--- a/ambari-agent/src/test/python/ambari_agent/TestRecoveryManager.py
+++ b/ambari-agent/src/test/python/ambari_agent/TestRecoveryManager.py
@@ -173,32 +173,32 @@ class TestRecoveryManager(TestCase):
rm = RecoveryManager(True, False)
self.assertTrue(rm.enabled())
- rm.update_config(0, 60, 5, 12, True, False)
+ rm.update_config(0, 60, 5, 12, True, False, "", "")
self.assertFalse(rm.enabled())
- rm.update_config(6, 60, 5, 12, True, False)
+ rm.update_config(6, 60, 5, 12, True, False, "", "")
self.assertTrue(rm.enabled())
- rm.update_config(6, 0, 5, 12, True, False)
+ rm.update_config(6, 0, 5, 12, True, False, "", "")
self.assertFalse(rm.enabled())
- rm.update_config(6, 60, 0, 12, True, False)
+ rm.update_config(6, 60, 0, 12, True, False, "", "")
self.assertFalse(rm.enabled())
- rm.update_config(6, 60, 1, 12, True, False)
+ rm.update_config(6, 60, 1, 12, True, False, None, None)
self.assertTrue(rm.enabled())
- rm.update_config(6, 60, 61, 12, True, False)
+ rm.update_config(6, 60, 61, 12, True, False, "", None)
self.assertFalse(rm.enabled())
- rm.update_config(6, 60, 5, 0, True, False)
+ rm.update_config(6, 60, 5, 0, True, False, None, "")
self.assertFalse(rm.enabled())
- rm.update_config(6, 60, 5, 4, True, False)
+ rm.update_config(6, 60, 5, 4, True, False, "", "")
self.assertFalse(rm.enabled())
# maximum 2 in 2 minutes and at least 1 minute wait
- rm.update_config(2, 5, 1, 4, True, False)
+ rm.update_config(2, 5, 1, 4, True, False, "", "")
self.assertTrue(rm.enabled())
# T = 1000-2
@@ -224,7 +224,7 @@ class TestRecoveryManager(TestCase):
self.assertFalse(rm.may_execute("NODEMANAGER")) # too soon
# maximum 2 in 2 minutes and no min wait
- rm.update_config(2, 5, 1, 5, True, True)
+ rm.update_config(2, 5, 1, 5, True, True, "", "")
# T = 1500-3
self.assertTrue(rm.execute("NODEMANAGER2"))
@@ -286,6 +286,55 @@ class TestRecoveryManager(TestCase):
rm.update_current_status("NODEMANAGER", "INSTALLED")
rm.update_desired_status("NODEMANAGER", "START")
self.assertFalse(rm.requires_recovery("NODEMANAGER"))
+
+ pass
+
+ def test_recovery_required2(self):
+
+ rm = RecoveryManager(True, True)
+ rm.update_config(15, 5, 1, 16, True, False, "", "")
+ rm.update_current_status("NODEMANAGER", "INSTALLED")
+ rm.update_desired_status("NODEMANAGER", "STARTED")
+ self.assertTrue(rm.requires_recovery("NODEMANAGER"))
+
+ rm = RecoveryManager(True, True)
+ rm.update_config(15, 5, 1, 16, True, False, "NODEMANAGER", "")
+ rm.update_current_status("NODEMANAGER", "INSTALLED")
+ rm.update_desired_status("NODEMANAGER", "STARTED")
+ self.assertTrue(rm.requires_recovery("NODEMANAGER"))
+
+ rm.update_current_status("DATANODE", "INSTALLED")
+ rm.update_desired_status("DATANODE", "STARTED")
+ self.assertFalse(rm.requires_recovery("DATANODE"))
+
+ rm = RecoveryManager(True, True)
+ rm.update_config(15, 5, 1, 16, True, False, "", "NODEMANAGER")
+ rm.update_current_status("NODEMANAGER", "INSTALLED")
+ rm.update_desired_status("NODEMANAGER", "STARTED")
+ self.assertFalse(rm.requires_recovery("NODEMANAGER"))
+
+ rm.update_current_status("DATANODE", "INSTALLED")
+ rm.update_desired_status("DATANODE", "STARTED")
+ self.assertTrue(rm.requires_recovery("DATANODE"))
+
+ rm.update_config(15, 5, 1, 16, True, False, "", "NODEMANAGER")
+ rm.update_config(15, 5, 1, 16, True, False, "NODEMANAGER", "")
+ rm.update_current_status("NODEMANAGER", "INSTALLED")
+ rm.update_desired_status("NODEMANAGER", "STARTED")
+ self.assertTrue(rm.requires_recovery("NODEMANAGER"))
+
+ rm.update_current_status("DATANODE", "INSTALLED")
+ rm.update_desired_status("DATANODE", "STARTED")
+ self.assertFalse(rm.requires_recovery("DATANODE"))
+
+ rm.update_config(15, 5, 1, 16, True, False, "NODEMANAGER", "NODEMANAGER")
+ rm.update_current_status("NODEMANAGER", "INSTALLED")
+ rm.update_desired_status("NODEMANAGER", "STARTED")
+ self.assertTrue(rm.requires_recovery("NODEMANAGER"))
+
+ rm.update_current_status("DATANODE", "INSTALLED")
+ rm.update_desired_status("DATANODE", "STARTED")
+ self.assertFalse(rm.requires_recovery("DATANODE"))
pass
@patch('time.time', MagicMock(side_effects=[1]))
@@ -343,7 +392,7 @@ class TestRecoveryManager(TestCase):
4200, 4201, 4202,
4300, 4301, 4302]
rm = RecoveryManager(True)
- rm.update_config(15, 5, 1, 16, True, False)
+ rm.update_config(15, 5, 1, 16, True, False, "", "")
command1 = copy.deepcopy(self.command)
@@ -374,14 +423,14 @@ class TestRecoveryManager(TestCase):
self.assertEqual(1, len(commands))
self.assertEqual("INSTALL", commands[0]["roleCommand"])
- rm.update_config(2, 5, 1, 5, True, True)
+ rm.update_config(2, 5, 1, 5, True, True, "", "")
rm.update_current_status("NODEMANAGER", "INIT")
rm.update_desired_status("NODEMANAGER", "INSTALLED")
commands = rm.get_recovery_commands()
self.assertEqual(0, len(commands))
- rm.update_config(12, 5, 1, 15, True, False)
+ rm.update_config(12, 5, 1, 15, True, False, "", "")
rm.update_current_status("NODEMANAGER", "INIT")
rm.update_desired_status("NODEMANAGER", "INSTALLED")
@@ -422,25 +471,25 @@ class TestRecoveryManager(TestCase):
def test_update_rm_config(self, mock_uc):
rm = RecoveryManager()
rm.update_configuration_from_registration(None)
- mock_uc.assert_has_calls([call(6, 60, 5, 12, False, True)])
+ mock_uc.assert_has_calls([call(6, 60, 5, 12, False, True, "", "")])
mock_uc.reset_mock()
rm.update_configuration_from_registration({})
- mock_uc.assert_has_calls([call(6, 60, 5, 12, False, True)])
+ mock_uc.assert_has_calls([call(6, 60, 5, 12, False, True, "", "")])
mock_uc.reset_mock()
rm.update_configuration_from_registration(
{"recoveryConfig": {
"type" : "DEFAULT"}}
)
- mock_uc.assert_has_calls([call(6, 60, 5, 12, False, True)])
+ mock_uc.assert_has_calls([call(6, 60, 5, 12, False, True, "", "")])
mock_uc.reset_mock()
rm.update_configuration_from_registration(
{"recoveryConfig": {
"type" : "FULL"}}
)
- mock_uc.assert_has_calls([call(6, 60, 5, 12, True, False)])
+ mock_uc.assert_has_calls([call(6, 60, 5, 12, True, False, "", "")])
mock_uc.reset_mock()
rm.update_configuration_from_registration(
@@ -448,7 +497,7 @@ class TestRecoveryManager(TestCase):
"type" : "AUTO_START",
"max_count" : "med"}}
)
- mock_uc.assert_has_calls([call(6, 60, 5, 12, True, True)])
+ mock_uc.assert_has_calls([call(6, 60, 5, 12, True, True, "", "")])
mock_uc.reset_mock()
rm.update_configuration_from_registration(
@@ -457,9 +506,11 @@ class TestRecoveryManager(TestCase):
"maxCount" : "5",
"windowInMinutes" : 20,
"retryGap" : 2,
- "maxLifetimeCount" : 5}}
+ "maxLifetimeCount" : 5,
+ "enabledComponents" : " A,B",
+ "disabledComponents" : "C"}}
)
- mock_uc.assert_has_calls([call(5, 20, 2, 5, True, True)])
+ mock_uc.assert_has_calls([call(5, 20, 2, 5, True, True, " A,B", "C")])
pass
@patch.object(RecoveryManager, "_now_")
@@ -471,7 +522,7 @@ class TestRecoveryManager(TestCase):
rec_st = rm.get_recovery_status()
self.assertEquals(rec_st, {"summary": "DISABLED"})
- rm.update_config(2, 5, 1, 4, True, True)
+ rm.update_config(2, 5, 1, 4, True, True, "", "")
rec_st = rm.get_recovery_status()
self.assertEquals(rec_st, {"summary": "RECOVERABLE", "componentReports": []})
@@ -515,7 +566,7 @@ class TestRecoveryManager(TestCase):
[1000, 1001, 1002, 1003, 1104, 1105, 1106, 1807, 1808, 1809, 1810, 1811, 1812]
rm = RecoveryManager(True)
- rm.update_config(5, 5, 1, 11, True, False)
+ rm.update_config(5, 5, 1, 11, True, False, "", "")
command1 = copy.deepcopy(self.command)
@@ -552,4 +603,30 @@ class TestRecoveryManager(TestCase):
rm.stop_execution_command()
self.assertTrue(rm.has_active_command())
rm.stop_execution_command()
- self.assertFalse(rm.has_active_command())
\ No newline at end of file
+ self.assertFalse(rm.has_active_command())
+
+ def test_configured_for_recovery(self):
+ rm = RecoveryManager(True)
+ self.assertTrue(rm.configured_for_recovery("A"))
+ self.assertTrue(rm.configured_for_recovery("B"))
+
+ rm.update_config(5, 5, 1, 11, True, False, "", "")
+ self.assertTrue(rm.configured_for_recovery("A"))
+ self.assertTrue(rm.configured_for_recovery("B"))
+
+ rm.update_config(5, 5, 1, 11, True, False, "A", "")
+ self.assertTrue(rm.configured_for_recovery("A"))
+ self.assertFalse(rm.configured_for_recovery("B"))
+
+ rm.update_config(5, 5, 1, 11, True, False, "", "B,C")
+ self.assertTrue(rm.configured_for_recovery("A"))
+ self.assertFalse(rm.configured_for_recovery("B"))
+ self.assertFalse(rm.configured_for_recovery("C"))
+
+ rm.update_config(5, 5, 1, 11, True, False, "A, D, F ", "B,C")
+ self.assertTrue(rm.configured_for_recovery("A"))
+ self.assertFalse(rm.configured_for_recovery("B"))
+ self.assertFalse(rm.configured_for_recovery("C"))
+ self.assertTrue(rm.configured_for_recovery("D"))
+ self.assertFalse(rm.configured_for_recovery("E"))
+ self.assertTrue(rm.configured_for_recovery("F"))
http://git-wip-us.apache.org/repos/asf/ambari/blob/799f8a9c/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java b/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
index 6f34b62..7fcdac9 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
@@ -965,7 +965,7 @@ public class HeartBeatHandler {
}
response.setRecoveryConfig(RecoveryConfig.getRecoveryConfig(config));
if(response.getRecoveryConfig() != null) {
- LOG.debug("Recovery configuration set to " + response.getRecoveryConfig().toString());
+ LOG.info("Recovery configuration set to " + response.getRecoveryConfig().toString());
}
Long requestId = 0L;
http://git-wip-us.apache.org/repos/asf/ambari/blob/799f8a9c/ambari-server/src/main/java/org/apache/ambari/server/agent/RecoveryConfig.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/agent/RecoveryConfig.java b/ambari-server/src/main/java/org/apache/ambari/server/agent/RecoveryConfig.java
index 9ebfb49..3f558eb 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/agent/RecoveryConfig.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/agent/RecoveryConfig.java
@@ -48,6 +48,28 @@ public class RecoveryConfig {
@SerializedName("maxLifetimeCount")
private String maxLifetimeCount;
+ @SerializedName("enabledComponents")
+ private String enabledComponents;
+
+ @SerializedName("disabledComponents")
+ private String disabledComponents;
+
+
+ public String getDisabledComponents() {
+ return disabledComponents;
+ }
+
+ public void setDisabledComponents(String disabledComponents) {
+ this.disabledComponents = disabledComponents;
+ }
+
+ public String getEnabledComponents() {
+ return enabledComponents;
+ }
+
+ public void setEnabledComponents(String enabledComponents) {
+ this.enabledComponents = enabledComponents;
+ }
public String getType() {
return type;
@@ -96,6 +118,8 @@ public class RecoveryConfig {
rc.setRetryGap(conf.getNodeRecoveryRetryGap());
rc.setType(conf.getNodeRecoveryType());
rc.setWindowInMinutes(conf.getNodeRecoveryWindowInMin());
+ rc.setDisabledComponents(conf.getDisabledComponents());
+ rc.setEnabledComponents(conf.getEnabledComponents());
return rc;
}
@@ -107,6 +131,8 @@ public class RecoveryConfig {
buffer.append(", windowInMinutes=").append(windowInMinutes);
buffer.append(", retryGap=").append(retryGap);
buffer.append(", maxLifetimeCount=").append(maxLifetimeCount);
+ buffer.append(", disabledComponents=").append(disabledComponents);
+ buffer.append(", enabledComponents=").append(enabledComponents);
buffer.append('}');
return buffer.toString();
}
http://git-wip-us.apache.org/repos/asf/ambari/blob/799f8a9c/ambari-server/src/main/java/org/apache/ambari/server/configuration/Configuration.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/configuration/Configuration.java b/ambari-server/src/main/java/org/apache/ambari/server/configuration/Configuration.java
index 23f9803fe..702e12d 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/configuration/Configuration.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/configuration/Configuration.java
@@ -300,6 +300,8 @@ public class Configuration {
public static final String RECOVERY_WINDOW_IN_MIN_DEFAULT = "60";
public static final String RECOVERY_RETRY_GAP_KEY = "recovery.retry_interval";
public static final String RECOVERY_RETRY_GAP_DEFAULT = "5";
+ public static final String RECOVERY_DISABLED_COMPONENTS_KEY = "recovery.disabled_components";
+ public static final String RECOVERY_ENABLED_COMPONENTS_KEY = "recovery.enabled_components";
/**
* Allow proxy calls to these hosts and ports only
@@ -1672,17 +1674,17 @@ public class Configuration {
public Integer getRequestReadTimeout() {
return Integer.parseInt(properties.getProperty(REQUEST_READ_TIMEOUT,
- REQUEST_READ_TIMEOUT_DEFAULT));
+ REQUEST_READ_TIMEOUT_DEFAULT));
}
public Integer getRequestConnectTimeout() {
return Integer.parseInt(properties.getProperty(REQUEST_CONNECT_TIMEOUT,
- REQUEST_CONNECT_TIMEOUT_DEFAULT));
+ REQUEST_CONNECT_TIMEOUT_DEFAULT));
}
public String getExecutionSchedulerConnections() {
return properties.getProperty(EXECUTION_SCHEDULER_CONNECTIONS,
- DEFAULT_SCHEDULER_MAX_CONNECTIONS);
+ DEFAULT_SCHEDULER_MAX_CONNECTIONS);
}
public Long getExecutionSchedulerMisfireToleration() {
@@ -1708,7 +1710,7 @@ public class Configuration {
public String getCustomActionDefinitionPath() {
return properties.getProperty(CUSTOM_ACTION_DEFINITION_KEY,
- CUSTOM_ACTION_DEFINITION_DEF_VALUE);
+ CUSTOM_ACTION_DEFINITION_DEF_VALUE);
}
public int getAgentPackageParallelCommandsLimit() {
@@ -1757,7 +1759,7 @@ public class Configuration {
*/
public int getClientThreadPoolSize() {
return Integer.parseInt(properties.getProperty(
- CLIENT_THREADPOOL_SIZE_KEY, String.valueOf(CLIENT_THREADPOOL_SIZE_DEFAULT)));
+ CLIENT_THREADPOOL_SIZE_KEY, String.valueOf(CLIENT_THREADPOOL_SIZE_DEFAULT)));
}
/**
@@ -1795,7 +1797,7 @@ public class Configuration {
*/
public long getViewExtractionThreadPoolTimeout() {
return Long.parseLong(properties.getProperty(
- VIEW_EXTRACTION_THREADPOOL_TIMEOUT_KEY, String.valueOf(VIEW_EXTRACTION_THREADPOOL_TIMEOUT_DEFAULT)));
+ VIEW_EXTRACTION_THREADPOOL_TIMEOUT_KEY, String.valueOf(VIEW_EXTRACTION_THREADPOOL_TIMEOUT_DEFAULT)));
}
/**
@@ -1865,6 +1867,22 @@ public class Configuration {
}
/**
+ * Get the components for which recovery is disabled
+ * @return
+ */
+ public String getDisabledComponents() {
+ return properties.getProperty(RECOVERY_DISABLED_COMPONENTS_KEY, "");
+ }
+
+ /**
+ * Get the components for which recovery is enabled
+ * @return
+ */
+ public String getEnabledComponents() {
+ return properties.getProperty(RECOVERY_ENABLED_COMPONENTS_KEY, "");
+ }
+
+ /**
* Get the configured retry gap between tries per host component
* @return
*/