You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by jo...@apache.org on 2014/09/29 14:10:04 UTC

git commit: AMBARI-7534 - Alerts: Disabling an Alert Should Propagate to Agent (jonathanhurley)

Repository: ambari
Updated Branches:
  refs/heads/branch-alerts-dev 73243741b -> 10f1f73b8


AMBARI-7534 - Alerts: Disabling an Alert Should Propagate to Agent (jonathanhurley)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/10f1f73b
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/10f1f73b
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/10f1f73b

Branch: refs/heads/branch-alerts-dev
Commit: 10f1f73b810e2c84d5818314ff9d1743c644a3bf
Parents: 7324374
Author: Jonathan Hurley <jh...@hortonworks.com>
Authored: Sat Sep 27 09:33:43 2014 -0400
Committer: Jonathan Hurley <jh...@hortonworks.com>
Committed: Sat Sep 27 09:33:43 2014 -0400

----------------------------------------------------------------------
 .../ambari_agent/AlertSchedulerHandler.py       | 27 ++++++--
 .../python/ambari_agent/alerts/base_alert.py    | 12 +++-
 .../src/test/python/ambari_agent/TestAlerts.py  | 68 +++++++++++++++++++-
 .../ambari_agent/dummy_files/definitions.json   |  1 +
 4 files changed, 101 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/10f1f73b/ambari-agent/src/main/python/ambari_agent/AlertSchedulerHandler.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/python/ambari_agent/AlertSchedulerHandler.py b/ambari-agent/src/main/python/ambari_agent/AlertSchedulerHandler.py
index 8dcce50..c645cba 100644
--- a/ambari-agent/src/main/python/ambari_agent/AlertSchedulerHandler.py
+++ b/ambari-agent/src/main/python/ambari_agent/AlertSchedulerHandler.py
@@ -125,7 +125,7 @@ class AlertSchedulerHandler():
       uuid_valid = False
       
       for definition in definitions:
-        definition_uuid = definition.definition_uuid()
+        definition_uuid = definition.get_uuid()
         if scheduled_job.name == definition_uuid:
           uuid_valid = True
           break
@@ -141,7 +141,7 @@ class AlertSchedulerHandler():
     for definition in definitions:
       definition_scheduled = False
       for scheduled_job in scheduled_jobs:
-        definition_uuid = definition.definition_uuid()
+        definition_uuid = definition.get_uuid()
         if definition_uuid == scheduled_job.name:
           definition_scheduled = True
           break
@@ -264,7 +264,16 @@ class AlertSchedulerHandler():
     '''
     Schedule a definition (callable). Scheduled jobs are given the UUID
     as their name so that they can be identified later on.
+    <p/>
+    This function can be called with a definition that is disabled; it will
+    simply NOOP.
     '''
+    # NOOP if the definition is disabled; don't schedule it
+    if definition.is_enabled() == False:
+      logger.info("The alert {0} with UUID {1} is disabled and will not be scheduled".format(
+          definition.get_name(),definition.get_uuid()))
+      return
+    
     job = None
 
     if self.__in_minutes:
@@ -277,10 +286,20 @@ class AlertSchedulerHandler():
     # although the documentation states that Job(kwargs) takes a name 
     # key/value pair, it does not actually set the name; do it manually
     if job is not None:
-      job.name = definition.definition_uuid()
+      job.name = definition.get_uuid()
       
     logger.info("Scheduling {0} with UUID {1}".format(
-      definition.definition_name(), definition.definition_uuid()))
+      definition.get_name(), definition.get_uuid()))
+  
+  def get_job_count(self):
+    '''
+    Gets the number of jobs currently scheduled. This is mainly used for
+    test verification of scheduling
+    '''
+    if self.__scheduler is None:
+      return 0
+    
+    return len(self.__scheduler.get_jobs())   
 
 def main():
   args = list(sys.argv)

http://git-wip-us.apache.org/repos/asf/ambari/blob/10f1f73b/ambari-agent/src/main/python/ambari_agent/alerts/base_alert.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/python/ambari_agent/alerts/base_alert.py b/ambari-agent/src/main/python/ambari_agent/alerts/base_alert.py
index 10dcff8..a3eb404 100644
--- a/ambari-agent/src/main/python/ambari_agent/alerts/base_alert.py
+++ b/ambari-agent/src/main/python/ambari_agent/alerts/base_alert.py
@@ -48,14 +48,21 @@ class BaseAlert(object):
       return 1 if interval < 1 else interval
 
 
-  def definition_name(self):
+  def is_enabled(self):
+    '''
+    gets whether the definition is enabled
+    '''
+    return self.alert_meta['enabled']
+  
+
+  def get_name(self):
     '''
     gets the unique name of the alert definition
     '''
     return self.alert_meta['name']
 
 
-  def definition_uuid(self):
+  def get_uuid(self):
     '''
     gets the unique has of the alert definition
     '''
@@ -101,6 +108,7 @@ class BaseAlert(object):
     data['component'] = self._find_value('componentName')
     data['timestamp'] = long(time.time() * 1000)
     data['uuid'] = self._find_value('uuid')
+    data['enabled'] = self._find_value('enabled')
 
     if logger.isEnabledFor(logging.DEBUG):
       logger.debug("debug alert text: {0}".format(data['text']))

http://git-wip-us.apache.org/repos/asf/ambari/blob/10f1f73b/ambari-agent/src/test/python/ambari_agent/TestAlerts.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/test/python/ambari_agent/TestAlerts.py b/ambari-agent/src/test/python/ambari_agent/TestAlerts.py
index 662d8ee..f9c2ab4 100644
--- a/ambari-agent/src/test/python/ambari_agent/TestAlerts.py
+++ b/ambari-agent/src/test/python/ambari_agent/TestAlerts.py
@@ -34,9 +34,11 @@ class TestAlerts(TestCase):
   def setUp(self):
     pass
 
+
   def tearDown(self):
     sys.stdout == sys.__stdout__
 
+
   @patch.object(Scheduler, "add_interval_job")
   @patch.object(Scheduler, "start")
   def test_start(self, aps_add_interval_job_mock, aps_start_mock):
@@ -49,6 +51,7 @@ class TestAlerts(TestCase):
     self.assertTrue(aps_add_interval_job_mock.called)
     self.assertTrue(aps_start_mock.called)
 
+
   def test_port_alert(self):
     json = { "name": "namenode_process",
       "service": "HDFS",
@@ -56,6 +59,7 @@ class TestAlerts(TestCase):
       "label": "NameNode process",
       "interval": 6,
       "scope": "host",
+      "enabled": True,
       "uuid": "c1f73191-4481-4435-8dae-fd380e4c0be1",
       "source": {
         "type": "PORT",
@@ -80,6 +84,7 @@ class TestAlerts(TestCase):
 
     res = pa.collect()
 
+
   def test_port_alert_no_sub(self):
     json = { "name": "namenode_process",
       "service": "HDFS",
@@ -87,6 +92,7 @@ class TestAlerts(TestCase):
       "label": "NameNode process",
       "interval": 6,
       "scope": "host",
+      "enabled": True,
       "uuid": "c1f73191-4481-4435-8dae-fd380e4c0be1",
       "source": {
         "type": "PORT",
@@ -109,6 +115,7 @@ class TestAlerts(TestCase):
 
     res = pa.collect()
 
+
   def test_script_alert(self):
     json = {
       "name": "namenode_process",
@@ -117,6 +124,7 @@ class TestAlerts(TestCase):
       "label": "NameNode process",
       "interval": 6,
       "scope": "host",
+      "enabled": True,
       "uuid": "c1f73191-4481-4435-8dae-fd380e4c0be1",
       "source": {
         "type": "SCRIPT",
@@ -145,7 +153,8 @@ class TestAlerts(TestCase):
 
     self.assertEquals('WARNING', collector.alerts()[0]['state'])
     self.assertEquals('all is not well', collector.alerts()[0]['text'])
-   
+
+
   @patch.object(MetricAlert, "_load_jmx")
   def test_metric_alert(self, ma_load_jmx_mock):
     json = {
@@ -155,6 +164,7 @@ class TestAlerts(TestCase):
       "label": "NameNode process",
       "interval": 6,
       "scope": "host",
+      "enabled": True,
       "uuid": "c1f73191-4481-4435-8dae-fd380e4c0be1",
       "source": {
         "type": "METRIC",
@@ -201,13 +211,17 @@ class TestAlerts(TestCase):
     self.assertEquals('OK', collector.alerts()[0]['state'])
     self.assertEquals('ok_arr: 1 3 None', collector.alerts()[0]['text'])
     
+
   def test_reschedule(self):
     test_file_path = os.path.join('ambari_agent', 'dummy_files')
     test_stack_path = os.path.join('ambari_agent', 'dummy_files')
 
     ash = AlertSchedulerHandler(test_file_path, test_stack_path)
     ash.start()
+    
+    self.assertEquals(1, ash.get_job_count())
     ash.reschedule()
+    self.assertEquals(1, ash.get_job_count())
         
   
   def test_alert_collector_purge(self):
@@ -217,6 +231,7 @@ class TestAlerts(TestCase):
       "label": "NameNode process",
       "interval": 6,
       "scope": "host",
+      "enabled": True,
       "uuid": "c1f73191-4481-4435-8dae-fd380e4c0be1",
       "source": {
         "type": "PORT",
@@ -247,3 +262,54 @@ class TestAlerts(TestCase):
     collector.remove_by_uuid('c1f73191-4481-4435-8dae-fd380e4c0be1')
     self.assertEquals(0,len(collector.alerts()))
     
+
+  def test_disabled_definitions(self):
+    test_file_path = os.path.join('ambari_agent', 'dummy_files')
+    test_stack_path = os.path.join('ambari_agent', 'dummy_files')
+
+    ash = AlertSchedulerHandler(test_file_path, test_stack_path)
+    ash.start()
+
+    self.assertEquals(1, ash.get_job_count())
+
+    json = { "name": "namenode_process",
+      "service": "HDFS",
+      "component": "NAMENODE",
+      "label": "NameNode process",
+      "interval": 6,
+      "scope": "host",
+      "enabled": True,
+      "uuid": "c1f73191-4481-4435-8dae-fd380e4c0be1",
+      "source": {
+        "type": "PORT",
+        "uri": "{{hdfs-site/my-key}}",
+        "default_port": 50070,
+        "reporting": {
+          "ok": {
+            "text": "TCP OK - {0:.4f} response time on port {1}"
+          },
+          "critical": {
+            "text": "Could not load process info: {0}"
+          }
+        }
+      }
+    }
+
+    pa = PortAlert(json, json['source'])
+    ash.schedule_definition(pa)
+    
+    self.assertEquals(2, ash.get_job_count())
+    
+    json['enabled'] = False
+    pa = PortAlert(json, json['source'])
+    ash.schedule_definition(pa)
+    
+    # verify disabled alert not scheduled
+    self.assertEquals(2, ash.get_job_count())
+    
+    json['enabled'] = True
+    pa = PortAlert(json, json['source'])
+    ash.schedule_definition(pa)
+    
+    # verify enabled alert was scheduled
+    self.assertEquals(3, ash.get_job_count())    
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/ambari/blob/10f1f73b/ambari-agent/src/test/python/ambari_agent/dummy_files/definitions.json
----------------------------------------------------------------------
diff --git a/ambari-agent/src/test/python/ambari_agent/dummy_files/definitions.json b/ambari-agent/src/test/python/ambari_agent/dummy_files/definitions.json
index 30973c2..8e01833 100644
--- a/ambari-agent/src/test/python/ambari_agent/dummy_files/definitions.json
+++ b/ambari-agent/src/test/python/ambari_agent/dummy_files/definitions.json
@@ -17,6 +17,7 @@
         "interval": 6,
         "scope": "host",
         "uuid": "3f82ae27-fa6a-465b-b77d-67963ac55d2f",
+        "enabled": true,
         "source": {
           "type": "PORT",
           "uri": "{{hdfs-site/dfs.namenode.http-address}}",