You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@slider.apache.org by sm...@apache.org on 2014/10/12 02:57:59 UTC
[11/50] git commit: SLIDER-341. Add a window based failure count for
auto-start to limit indefinite attempt
SLIDER-341. Add a window based failure count for auto-start to limit indefinite attempt
Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/961e1704
Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/961e1704
Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/961e1704
Branch: refs/heads/feature/SLIDER-481_allow_dedicated_handling_of_exports
Commit: 961e17043f589c4096dce896d64a582dd000dd4e
Parents: d8b36ca
Author: Sumit Mohanty <sm...@hortonworks.com>
Authored: Mon Oct 6 20:47:44 2014 -0700
Committer: Sumit Mohanty <sm...@hortonworks.com>
Committed: Mon Oct 6 20:47:44 2014 -0700
----------------------------------------------------------------------
slider-agent/conf/agent.ini | 1 +
.../src/main/python/agent/AgentConfig.py | 14 +++++
.../src/main/python/agent/Controller.py | 35 ++++++++++-
.../src/test/python/agent/TestController.py | 63 ++++++++++++++++++++
slider-agent/src/test/python/agent/TestMain.py | 37 ++++++++++++
5 files changed, 148 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/961e1704/slider-agent/conf/agent.ini
----------------------------------------------------------------------
diff --git a/slider-agent/conf/agent.ini b/slider-agent/conf/agent.ini
index 7b9d57d..48113e3 100644
--- a/slider-agent/conf/agent.ini
+++ b/slider-agent/conf/agent.ini
@@ -43,6 +43,7 @@ log_level=INFO
[command]
max_retries=2
sleep_between_retries=1
+auto_restart=5,5
[security]
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/961e1704/slider-agent/src/main/python/agent/AgentConfig.py
----------------------------------------------------------------------
diff --git a/slider-agent/src/main/python/agent/AgentConfig.py b/slider-agent/src/main/python/agent/AgentConfig.py
index e45ba23..86925b1 100644
--- a/slider-agent/src/main/python/agent/AgentConfig.py
+++ b/slider-agent/src/main/python/agent/AgentConfig.py
@@ -61,6 +61,7 @@ log_level=INFO
[command]
max_retries=2
sleep_between_retries=1
+auto_restart=5,5
[security]
keysdir=security/keys
@@ -109,6 +110,8 @@ class AgentConfig:
# agent version file
VERSION_FILE = "version_file"
+ AUTO_RESTART = "auto_restart"
+
FOLDER_MAPPING = {
APP_PACKAGE_DIR: "WORK",
APP_INSTALL_DIR: "WORK",
@@ -164,6 +167,17 @@ class AgentConfig:
return ""
return command
+ # return max, window - max failures within window minutes
+ def getErrorWindow(self):
+ window = config.get(AgentConfig.COMMAND_SECTION, AgentConfig.AUTO_RESTART)
+ if window != None:
+ parts = window.split(',')
+ if len(parts) == 2:
+ if parts[0].isdigit() and parts[1].isdigit():
+ return (int(parts[0]), int(parts[1]))
+ pass
+ return (0, 0)
+
def set(self, category, name, value):
global config
return config.set(category, name, value)
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/961e1704/slider-agent/src/main/python/agent/Controller.py
----------------------------------------------------------------------
diff --git a/slider-agent/src/main/python/agent/Controller.py b/slider-agent/src/main/python/agent/Controller.py
index 11db21c..77f932c 100644
--- a/slider-agent/src/main/python/agent/Controller.py
+++ b/slider-agent/src/main/python/agent/Controller.py
@@ -27,6 +27,7 @@ import time
import threading
import urllib2
import pprint
+import math
from random import randint
from AgentConfig import AgentConfig
@@ -86,7 +87,8 @@ class Controller(threading.Thread):
self.statusCommand = None
self.failureCount = 0
self.heartBeatRetryCount = 0
- self.autoRestart = False
+ self.autoRestartFailures = 0
+ self.autoRestartTrackingSince = 0
def __del__(self):
@@ -275,7 +277,7 @@ class Controller(threading.Thread):
stored_command = self.actionQueue.customServiceOrchestrator.stored_command
if len(stored_command) > 0:
auto_start_command = self.create_start_command(stored_command)
- if auto_start_command:
+ if auto_start_command and self.shouldAutoRestart():
logger.info("Automatically adding a start command.")
logger.debug("Auto start command: " + pprint.pformat(auto_start_command))
self.updateStateBasedOnCommand([auto_start_command], False)
@@ -486,6 +488,35 @@ class Controller(threading.Thread):
return {'exitstatus': 1, 'log': err_msg}
+ # Basic window that only counts failures till the window duration expires
+ def shouldAutoRestart(self):
+ max, window = self.config.getErrorWindow()
+ if max <= 0 or window <= 0:
+ return True
+
+ seconds_now = time.time()
+ if self.autoRestartTrackingSince == 0:
+ self.autoRestartTrackingSince = seconds_now
+ self.autoRestartFailures = 1
+ return True
+
+ self.autoRestartFailures += 1
+ minutes = math.floor((seconds_now - self.autoRestartTrackingSince) / 60)
+ if self.autoRestartFailures > max:
+ logger.info("Auto restart not allowed due to " + str(self.autoRestartFailures) + " failures in " + str(minutes) +
+ " minutes. Max restarts allowed is " + str(max) + " in " + str(window) + " minutes.")
+ return False
+
+ if minutes > window:
+ logger.info("Resetting window as number of minutes passed is " + str(minutes))
+ self.autoRestartTrackingSince = seconds_now
+ self.autoRestartFailures = 1
+ return True
+ return True
+
+ pass
+
+
def main(argv=None):
# Allow Ctrl-C
signal.signal(signal.SIGINT, signal.SIG_DFL)
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/961e1704/slider-agent/src/test/python/agent/TestController.py
----------------------------------------------------------------------
diff --git a/slider-agent/src/test/python/agent/TestController.py b/slider-agent/src/test/python/agent/TestController.py
index 401d69a..02b0d0e 100644
--- a/slider-agent/src/test/python/agent/TestController.py
+++ b/slider-agent/src/test/python/agent/TestController.py
@@ -25,6 +25,7 @@ import unittest, threading
from agent import Controller, ActionQueue
from agent import hostname
import sys
+import time
from Controller import AGENT_AUTO_RESTART_EXIT_CODE
from Controller import State
from AgentConfig import AgentConfig
@@ -255,6 +256,68 @@ class TestController(unittest.TestCase):
self.assertTrue(os_exit_mock.call_args[0][0] == AGENT_AUTO_RESTART_EXIT_CODE)
+ @patch("time.time")
+ def test_failure_window(self, mock_time):
+ config = AgentConfig("", "")
+ original_config = config.get(AgentConfig.COMMAND_SECTION, AgentConfig.AUTO_RESTART)
+ config.set(AgentConfig.COMMAND_SECTION, AgentConfig.AUTO_RESTART, '2,1')
+ ## The behavior of side_effect is different when you run tests in command line and when you do it through IDE
+ ## So few extra items are there in the list
+ mock_time.side_effect = [200, 500, 500]
+ controller5 = Controller.Controller(config)
+
+ try:
+ self.assertTrue(controller5.shouldAutoRestart())
+ self.assertTrue(controller5.shouldAutoRestart())
+ finally:
+ config.set(AgentConfig.COMMAND_SECTION, AgentConfig.AUTO_RESTART, original_config)
+
+
+ @patch("time.time")
+ def test_failure_window(self, mock_time):
+ config = AgentConfig("", "")
+ original_config = config.get(AgentConfig.COMMAND_SECTION, AgentConfig.AUTO_RESTART)
+ config.set(AgentConfig.COMMAND_SECTION, AgentConfig.AUTO_RESTART, '3,1')
+ ## The behavior of side_effect is different when you run tests in command line and when you do it through IDE
+ ## So few extra items are there in the list
+ mock_time.side_effect = [200, 210, 220, 230, 240, 250]
+ controller5 = Controller.Controller(config)
+
+ try:
+ self.assertTrue(controller5.shouldAutoRestart())
+ self.assertTrue(controller5.shouldAutoRestart())
+ self.assertTrue(controller5.shouldAutoRestart())
+ self.assertFalse(controller5.shouldAutoRestart())
+ finally:
+ config.set(AgentConfig.COMMAND_SECTION, AgentConfig.AUTO_RESTART, original_config)
+
+
+ def test_failure_window2(self):
+ config = MagicMock()
+ config.getErrorWindow.return_value = (0, 0)
+ controller = Controller.Controller(config)
+
+ self.assertTrue(controller.shouldAutoRestart())
+
+ config.getErrorWindow.return_value = (0, 1)
+ self.assertTrue(controller.shouldAutoRestart())
+
+ config.getErrorWindow.return_value = (1, 0)
+ self.assertTrue(controller.shouldAutoRestart())
+
+ config.getErrorWindow.return_value = (-1, -1)
+ self.assertTrue(controller.shouldAutoRestart())
+
+ config.getErrorWindow.return_value = (1, 1)
+ self.assertTrue(controller.shouldAutoRestart())
+
+ #second failure within a minute
+ self.assertFalse(controller.shouldAutoRestart())
+
+ #do not reset unless window expires
+ self.assertFalse(controller.shouldAutoRestart())
+
+
@patch("urllib2.urlopen")
def test_sendRequest(self, requestMock):
http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/961e1704/slider-agent/src/test/python/agent/TestMain.py
----------------------------------------------------------------------
diff --git a/slider-agent/src/test/python/agent/TestMain.py b/slider-agent/src/test/python/agent/TestMain.py
index e73a05a..7c0036b 100644
--- a/slider-agent/src/test/python/agent/TestMain.py
+++ b/slider-agent/src/test/python/agent/TestMain.py
@@ -312,6 +312,43 @@ class TestMain(unittest.TestCase):
AgentConfig_set_mock.assert_any_call("server", "zk_reg_path", "/registry/org-apache-slider/cl1")
+ def test_config1(self):
+ config = AgentConfig("", "")
+ (max, window) = config.getErrorWindow()
+ self.assertEqual(max, 5)
+ self.assertEqual(window, 5)
+
+ config.set(AgentConfig.COMMAND_SECTION, AgentConfig.AUTO_RESTART, '')
+ (max, window) = config.getErrorWindow()
+ self.assertEqual(max, 0)
+ self.assertEqual(window, 0)
+
+ config.set(AgentConfig.COMMAND_SECTION, AgentConfig.AUTO_RESTART, '33')
+ (max, window) = config.getErrorWindow()
+ self.assertEqual(max, 0)
+ self.assertEqual(window, 0)
+
+ config.set(AgentConfig.COMMAND_SECTION, AgentConfig.AUTO_RESTART, '-4,-6')
+ (max, window) = config.getErrorWindow()
+ self.assertEqual(max, 0)
+ self.assertEqual(window, 0)
+
+ config.set(AgentConfig.COMMAND_SECTION, AgentConfig.AUTO_RESTART, 'wd,er')
+ (max, window) = config.getErrorWindow()
+ self.assertEqual(max, 0)
+ self.assertEqual(window, 0)
+
+ config.set(AgentConfig.COMMAND_SECTION, AgentConfig.AUTO_RESTART, '2,20')
+ (max, window) = config.getErrorWindow()
+ self.assertEqual(max, 2)
+ self.assertEqual(window, 20)
+
+ config.set(AgentConfig.COMMAND_SECTION, AgentConfig.AUTO_RESTART, ' 2, 30')
+ (max, window) = config.getErrorWindow()
+ self.assertEqual(max, 0)
+ self.assertEqual(window, 0)
+
+
if __name__ == "__main__":
logging.basicConfig(format='%(asctime)s %(message)s', level=logging.DEBUG)
unittest.main()
\ No newline at end of file