You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by jo...@apache.org on 2014/11/03 14:06:31 UTC

git commit: AMBARI-8104 - Add alerts for ZKFC down (jonathanhurley)

Repository: ambari
Updated Branches:
  refs/heads/trunk 3ee81bce1 -> 38853c94f


AMBARI-8104 - Add alerts for ZKFC down (jonathanhurley)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/38853c94
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/38853c94
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/38853c94

Branch: refs/heads/trunk
Commit: 38853c94f8e1c3968eb28049bb2876fe64c2b8fb
Parents: 3ee81bc
Author: Jonathan Hurley <jh...@hortonworks.com>
Authored: Sat Nov 1 23:53:55 2014 -0400
Committer: Jonathan Hurley <jh...@hortonworks.com>
Committed: Mon Nov 3 08:06:17 2014 -0500

----------------------------------------------------------------------
 .../python/ambari_agent/alerts/base_alert.py    |  6 ++-
 .../python/ambari_agent/alerts/port_alert.py    | 47 +++++++++++++++-----
 .../src/test/python/ambari_agent/TestAlerts.py  | 46 +++++++++++++++++--
 .../stacks/HDP/2.0.6/services/HDFS/alerts.json  | 22 +++++++++
 4 files changed, 105 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/38853c94/ambari-agent/src/main/python/ambari_agent/alerts/base_alert.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/python/ambari_agent/alerts/base_alert.py b/ambari-agent/src/main/python/ambari_agent/alerts/base_alert.py
index 5152570..d93ec48 100644
--- a/ambari-agent/src/main/python/ambari_agent/alerts/base_alert.py
+++ b/ambari-agent/src/main/python/ambari_agent/alerts/base_alert.py
@@ -89,7 +89,11 @@ class BaseAlert(object):
     
     try:
       res = self._collect()
-      res_base_text = self.alert_source_meta['reporting'][res[0].lower()]['text']
+      reporting_state = res[0].lower()
+
+      if reporting_state in self.alert_source_meta['reporting']:
+        res_base_text = self.alert_source_meta['reporting'][reporting_state]['text']
+
     except Exception as e:
       message = "Unable to run alert {0}".format(str(self.alert_meta['name']))
       

http://git-wip-us.apache.org/repos/asf/ambari/blob/38853c94/ambari-agent/src/main/python/ambari_agent/alerts/port_alert.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/python/ambari_agent/alerts/port_alert.py b/ambari-agent/src/main/python/ambari_agent/alerts/port_alert.py
index aa75e34..c8e7b3a 100644
--- a/ambari-agent/src/main/python/ambari_agent/alerts/port_alert.py
+++ b/ambari-agent/src/main/python/ambari_agent/alerts/port_alert.py
@@ -30,34 +30,57 @@ class PortAlert(BaseAlert):
 
   def __init__(self, alert_meta, alert_source_meta):
     super(PortAlert, self).__init__(alert_meta, alert_source_meta)
-    
-    # can be parameterized
-    self.uri = self._find_lookup_property(alert_source_meta['uri'])
-    self.port = alert_source_meta['default_port']
+
+    self.uri = None
+    self.default_port = None
+
+    # can be parameterized or static
+    if 'uri' in alert_source_meta:
+      self.uri = self._find_lookup_property(alert_source_meta['uri'])
+
+    # always static
+    if 'default_port' in alert_source_meta:
+      self.default_port = alert_source_meta['default_port']
     
   def _collect(self):
-    urivalue = self._lookup_property_value(self.uri)
+    # if not parameterized, this will return the static value
+    uri_value = self._lookup_property_value(self.uri)
+    if uri_value is None:
+      uri_value = self.host_name
+
+    # in some cases, a single property is a comma-separated list like
+    # host1:8080,host2:8081,host3:8083
+    uri_value_array = uri_value.split(',')
+    if len(uri_value_array) > 1:
+      for item in uri_value_array:
+        if item.startswith(self.host_name):
+          uri_value = item
 
-    port = self.port
-    host = BaseAlert.get_host_from_url(urivalue)
+    host = BaseAlert.get_host_from_url(uri_value)
     if host is None:
       host = self.host_name
 
     try:
-      port = int(get_port_from_url(urivalue))
+      port = int(get_port_from_url(uri_value))
     except:
-      # if port not found,  default port already set to port
-      pass
-    
+      if self.default_port is None:
+        label = 'Unable to determine port from URI {0}'.format(uri_value)
+        return (self.RESULT_UNKNOWN, [label])
+
+      port = self.default_port
+
+
     if logger.isEnabledFor(logging.DEBUG):
       logger.debug("checking {0} listening on port {1}".format(host, str(port)))
     
     try:
       s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
       s.settimeout(1.5)
+
       t = time.time()
       s.connect((host, port))
       millis = time.time() - t
+
       return (self.RESULT_OK, [millis/1000, port])
     except Exception as e:
       return (self.RESULT_CRITICAL, [str(e), host, port])
@@ -66,5 +89,5 @@ class PortAlert(BaseAlert):
         try:
           s.close()
         except:
+          # no need to log a close failure
           pass
-

http://git-wip-us.apache.org/repos/asf/ambari/blob/38853c94/ambari-agent/src/test/python/ambari_agent/TestAlerts.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/test/python/ambari_agent/TestAlerts.py b/ambari-agent/src/test/python/ambari_agent/TestAlerts.py
index 88584db..1f8d0c0 100644
--- a/ambari-agent/src/test/python/ambari_agent/TestAlerts.py
+++ b/ambari-agent/src/test/python/ambari_agent/TestAlerts.py
@@ -18,8 +18,10 @@ See the License for the specific language governing permissions and
 limitations under the License.
 '''
 
-import os
+import socket
 import sys
+import os
+
 from ambari_agent.AlertSchedulerHandler import AlertSchedulerHandler
 from ambari_agent.alerts.collector import AlertCollector
 from ambari_agent.alerts.metric_alert import MetricAlert
@@ -85,8 +87,46 @@ class TestAlerts(TestCase):
     pa = PortAlert(json, json['source'])
     pa.set_helpers(collector, {'hdfs-site/my-key': 'value1'})
     self.assertEquals(6, pa.interval())
+    pa.collect()
 
-    res = pa.collect()
+
+  @patch.object(socket.socket,"connect")
+  def test_port_alert_complex_uri(self, socket_connect_mock):
+    json = { "name": "namenode_process",
+      "service": "HDFS",
+      "component": "NAMENODE",
+      "label": "NameNode process",
+      "interval": 6,
+      "scope": "host",
+      "enabled": True,
+      "uuid": "c1f73191-4481-4435-8dae-fd380e4c0be1",
+      "source": {
+        "type": "PORT",
+        "uri": "{{hdfs-site/my-key}}",
+        "default_port": 50070,
+        "reporting": {
+          "ok": {
+            "text": "TCP OK - {0:.4f} response time on port {1}"
+          },
+          "critical": {
+            "text": "Could not load process info: {0}"
+          }
+        }
+      }
+    }
+
+    collector = AlertCollector()
+
+    pa = PortAlert(json, json['source'])
+
+    # use a URI that has commas to verify that we properly parse it
+    pa.set_helpers(collector, {'hdfs-site/my-key': 'c6401.ambari.apache.org:2181,c6402.ambari.apache.org:2181,c6403.ambari.apache.org:2181'})
+    pa.host_name = 'c6402.ambari.apache.org'
+    self.assertEquals(6, pa.interval())
+
+    pa.collect()
+    self.assertEquals('OK', collector.alerts()[0]['state'])
+    self.assertTrue('response time on port 2181' in collector.alerts()[0]['text'])
 
 
   def test_port_alert_no_sub(self):
@@ -117,7 +157,7 @@ class TestAlerts(TestCase):
     pa.set_helpers(AlertCollector(), '')
     self.assertEquals('http://c6401.ambari.apache.org', pa.uri)
 
-    res = pa.collect()
+    pa.collect()
 
 
   def test_script_alert(self):

http://git-wip-us.apache.org/repos/asf/ambari/blob/38853c94/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/HDFS/alerts.json
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/HDFS/alerts.json b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/HDFS/alerts.json
index 0a6a455..a409230 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/HDFS/alerts.json
+++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/HDFS/alerts.json
@@ -441,6 +441,28 @@
           }
         }
       }    
+    ],
+    "ZKFC": [
+      {
+        "name": "hdfs_zookeeper_failover_controller_process",
+        "label": "ZooKeeper Failover Controller Process",
+        "interval": 1,
+        "scope": "ANY",
+        "enabled": true,
+        "source": {
+          "type": "PORT",        
+          "uri": "{{core-site/ha.zookeeper.quorum}}",
+          "default_port": 2181,
+          "reporting": {
+            "ok": {
+              "text": "TCP OK - {0:.4f} response on port {1}"
+            },
+            "critical": {
+              "text": "Connection failed: {0} on host {1}:{2}"
+            }
+          }
+        }
+      }
     ]
   }
 }
\ No newline at end of file