You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by jo...@apache.org on 2014/11/03 14:06:31 UTC
git commit: AMBARI-8104 - Add alerts for ZKFC down (jonathanhurley)
Repository: ambari
Updated Branches:
refs/heads/trunk 3ee81bce1 -> 38853c94f
AMBARI-8104 - Add alerts for ZKFC down (jonathanhurley)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/38853c94
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/38853c94
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/38853c94
Branch: refs/heads/trunk
Commit: 38853c94f8e1c3968eb28049bb2876fe64c2b8fb
Parents: 3ee81bc
Author: Jonathan Hurley <jh...@hortonworks.com>
Authored: Sat Nov 1 23:53:55 2014 -0400
Committer: Jonathan Hurley <jh...@hortonworks.com>
Committed: Mon Nov 3 08:06:17 2014 -0500
----------------------------------------------------------------------
.../python/ambari_agent/alerts/base_alert.py | 6 ++-
.../python/ambari_agent/alerts/port_alert.py | 47 +++++++++++++++-----
.../src/test/python/ambari_agent/TestAlerts.py | 46 +++++++++++++++++--
.../stacks/HDP/2.0.6/services/HDFS/alerts.json | 22 +++++++++
4 files changed, 105 insertions(+), 16 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/ambari/blob/38853c94/ambari-agent/src/main/python/ambari_agent/alerts/base_alert.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/python/ambari_agent/alerts/base_alert.py b/ambari-agent/src/main/python/ambari_agent/alerts/base_alert.py
index 5152570..d93ec48 100644
--- a/ambari-agent/src/main/python/ambari_agent/alerts/base_alert.py
+++ b/ambari-agent/src/main/python/ambari_agent/alerts/base_alert.py
@@ -89,7 +89,11 @@ class BaseAlert(object):
try:
res = self._collect()
- res_base_text = self.alert_source_meta['reporting'][res[0].lower()]['text']
+ reporting_state = res[0].lower()
+
+ if reporting_state in self.alert_source_meta['reporting']:
+ res_base_text = self.alert_source_meta['reporting'][reporting_state]['text']
+
except Exception as e:
message = "Unable to run alert {0}".format(str(self.alert_meta['name']))
http://git-wip-us.apache.org/repos/asf/ambari/blob/38853c94/ambari-agent/src/main/python/ambari_agent/alerts/port_alert.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/python/ambari_agent/alerts/port_alert.py b/ambari-agent/src/main/python/ambari_agent/alerts/port_alert.py
index aa75e34..c8e7b3a 100644
--- a/ambari-agent/src/main/python/ambari_agent/alerts/port_alert.py
+++ b/ambari-agent/src/main/python/ambari_agent/alerts/port_alert.py
@@ -30,34 +30,57 @@ class PortAlert(BaseAlert):
def __init__(self, alert_meta, alert_source_meta):
super(PortAlert, self).__init__(alert_meta, alert_source_meta)
-
- # can be parameterized
- self.uri = self._find_lookup_property(alert_source_meta['uri'])
- self.port = alert_source_meta['default_port']
+
+ self.uri = None
+ self.default_port = None
+
+ # can be parameterized or static
+ if 'uri' in alert_source_meta:
+ self.uri = self._find_lookup_property(alert_source_meta['uri'])
+
+ # always static
+ if 'default_port' in alert_source_meta:
+ self.default_port = alert_source_meta['default_port']
def _collect(self):
- urivalue = self._lookup_property_value(self.uri)
+ # if not parameterized, this will return the static value
+ uri_value = self._lookup_property_value(self.uri)
+ if uri_value is None:
+ uri_value = self.host_name
+
+ # in some cases, a single property is a comma-separated list like
+ # host1:8080,host2:8081,host3:8083
+ uri_value_array = uri_value.split(',')
+ if len(uri_value_array) > 1:
+ for item in uri_value_array:
+ if item.startswith(self.host_name):
+ uri_value = item
- port = self.port
- host = BaseAlert.get_host_from_url(urivalue)
+ host = BaseAlert.get_host_from_url(uri_value)
if host is None:
host = self.host_name
try:
- port = int(get_port_from_url(urivalue))
+ port = int(get_port_from_url(uri_value))
except:
- # if port not found, default port already set to port
- pass
-
+ if self.default_port is None:
+ label = 'Unable to determine port from URI {0}'.format(uri_value)
+ return (self.RESULT_UNKNOWN, [label])
+
+ port = self.default_port
+
+
if logger.isEnabledFor(logging.DEBUG):
logger.debug("checking {0} listening on port {1}".format(host, str(port)))
try:
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.settimeout(1.5)
+
t = time.time()
s.connect((host, port))
millis = time.time() - t
+
return (self.RESULT_OK, [millis/1000, port])
except Exception as e:
return (self.RESULT_CRITICAL, [str(e), host, port])
@@ -66,5 +89,5 @@ class PortAlert(BaseAlert):
try:
s.close()
except:
+ # no need to log a close failure
pass
-
http://git-wip-us.apache.org/repos/asf/ambari/blob/38853c94/ambari-agent/src/test/python/ambari_agent/TestAlerts.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/test/python/ambari_agent/TestAlerts.py b/ambari-agent/src/test/python/ambari_agent/TestAlerts.py
index 88584db..1f8d0c0 100644
--- a/ambari-agent/src/test/python/ambari_agent/TestAlerts.py
+++ b/ambari-agent/src/test/python/ambari_agent/TestAlerts.py
@@ -18,8 +18,10 @@ See the License for the specific language governing permissions and
limitations under the License.
'''
-import os
+import socket
import sys
+import os
+
from ambari_agent.AlertSchedulerHandler import AlertSchedulerHandler
from ambari_agent.alerts.collector import AlertCollector
from ambari_agent.alerts.metric_alert import MetricAlert
@@ -85,8 +87,46 @@ class TestAlerts(TestCase):
pa = PortAlert(json, json['source'])
pa.set_helpers(collector, {'hdfs-site/my-key': 'value1'})
self.assertEquals(6, pa.interval())
+ pa.collect()
- res = pa.collect()
+
+ @patch.object(socket.socket,"connect")
+ def test_port_alert_complex_uri(self, socket_connect_mock):
+ json = { "name": "namenode_process",
+ "service": "HDFS",
+ "component": "NAMENODE",
+ "label": "NameNode process",
+ "interval": 6,
+ "scope": "host",
+ "enabled": True,
+ "uuid": "c1f73191-4481-4435-8dae-fd380e4c0be1",
+ "source": {
+ "type": "PORT",
+ "uri": "{{hdfs-site/my-key}}",
+ "default_port": 50070,
+ "reporting": {
+ "ok": {
+ "text": "TCP OK - {0:.4f} response time on port {1}"
+ },
+ "critical": {
+ "text": "Could not load process info: {0}"
+ }
+ }
+ }
+ }
+
+ collector = AlertCollector()
+
+ pa = PortAlert(json, json['source'])
+
+ # use a URI that has commas to verify that we properly parse it
+ pa.set_helpers(collector, {'hdfs-site/my-key': 'c6401.ambari.apache.org:2181,c6402.ambari.apache.org:2181,c6403.ambari.apache.org:2181'})
+ pa.host_name = 'c6402.ambari.apache.org'
+ self.assertEquals(6, pa.interval())
+
+ pa.collect()
+ self.assertEquals('OK', collector.alerts()[0]['state'])
+ self.assertTrue('response time on port 2181' in collector.alerts()[0]['text'])
def test_port_alert_no_sub(self):
@@ -117,7 +157,7 @@ class TestAlerts(TestCase):
pa.set_helpers(AlertCollector(), '')
self.assertEquals('http://c6401.ambari.apache.org', pa.uri)
- res = pa.collect()
+ pa.collect()
def test_script_alert(self):
http://git-wip-us.apache.org/repos/asf/ambari/blob/38853c94/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/HDFS/alerts.json
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/HDFS/alerts.json b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/HDFS/alerts.json
index 0a6a455..a409230 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/HDFS/alerts.json
+++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/HDFS/alerts.json
@@ -441,6 +441,28 @@
}
}
}
+ ],
+ "ZKFC": [
+ {
+ "name": "hdfs_zookeeper_failover_controller_process",
+ "label": "ZooKeeper Failover Controller Process",
+ "interval": 1,
+ "scope": "ANY",
+ "enabled": true,
+ "source": {
+ "type": "PORT",
+ "uri": "{{core-site/ha.zookeeper.quorum}}",
+ "default_port": 2181,
+ "reporting": {
+ "ok": {
+ "text": "TCP OK - {0:.4f} response on port {1}"
+ },
+ "critical": {
+ "text": "Connection failed: {0} on host {1}:{2}"
+ }
+ }
+ }
+ }
]
}
}
\ No newline at end of file