You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by nc...@apache.org on 2016/09/28 17:27:59 UTC

[02/19] ambari git commit: AMBARI-18464. Provide Warnings When ulimit Is High To Prevent Heartbeat Lost Issues (aonishuk)

AMBARI-18464. Provide Warnings When ulimit Is High To Prevent Heartbeat Lost Issues (aonishuk)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/ee4e63a9
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/ee4e63a9
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/ee4e63a9

Branch: refs/heads/branch-dev-patch-upgrade
Commit: ee4e63a9a75131119f3e157eeb1f8f1462a1798f
Parents: 8192601
Author: Andrew Onishuk <ao...@hortonworks.com>
Authored: Mon Sep 26 19:09:52 2016 +0300
Committer: Andrew Onishuk <ao...@hortonworks.com>
Committed: Mon Sep 26 19:09:52 2016 +0300

----------------------------------------------------------------------
 ambari-server/src/main/resources/alerts.json    | 31 ++++++++
 .../main/resources/host_scripts/alert_ulimit.py | 83 ++++++++++++++++++++
 .../test/python/host_scripts/TestAlertUlimit.py | 44 +++++++++++
 3 files changed, 158 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/ee4e63a9/ambari-server/src/main/resources/alerts.json
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/alerts.json b/ambari-server/src/main/resources/alerts.json
index 9cffff5..2559b3a 100644
--- a/ambari-server/src/main/resources/alerts.json
+++ b/ambari-server/src/main/resources/alerts.json
@@ -149,7 +149,38 @@
             }
           ]
         }
+      },
+      {
+        "name": "ambari_agent_ulimit",
+        "label": "Ulimit for open files",
+        "description": "This host-level alert is triggered if value of ulimit for open files (-n) goes above specific thresholds. The default threshold values are 200000 for WARNING and 800000 for CRITICAL.",
+        "interval": 1,
+        "scope": "HOST",
+        "enabled": true,
+        "source": {
+          "type": "SCRIPT",
+          "path": "alert_ulimit.py",
+          "parameters": [
+            {
+              "name": "ulimit.warning.threshold",
+              "display_name": "Warning",
+              "value": 200000,
+              "type": "NUMERIC",
+              "description": "The threshold of ulimit for open files (-n) for WARNING alert.",
+              "threshold": "WARNING"
+            },
+            {
+              "name": "ulimit.critical.threshold",
+              "display_name": "Critical",
+              "value": 800000,
+              "type": "NUMERIC",
+              "description": "The threshold of ulimit for open files (-n) for CRITICAL alert.",
+              "threshold": "CRITICAL"
+            }
+          ]
+        }
       }
+
     ]
   }
 }

http://git-wip-us.apache.org/repos/asf/ambari/blob/ee4e63a9/ambari-server/src/main/resources/host_scripts/alert_ulimit.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/host_scripts/alert_ulimit.py b/ambari-server/src/main/resources/host_scripts/alert_ulimit.py
new file mode 100644
index 0000000..8c57b84
--- /dev/null
+++ b/ambari-server/src/main/resources/host_scripts/alert_ulimit.py
@@ -0,0 +1,83 @@
+#!/usr/bin/env python
+
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import resource
+
+WARNING_KEY = "ulimit.warning.threshold"
+CRITICAL_KEY = "ulimit.critical.threshold"
+
+DEFAULT_WARNING_KEY = 200000
+DEFAULT_CRITICAL_KEY = 800000
+
+def get_tokens():
+  """
+  Returns a tuple of tokens in the format {{site/property}} that will be used
+  to build the dictionary passed into execute
+  """
+  return None
+
+def execute(configurations={}, parameters={}, host_name=None):
+  """
+  Performs advanced ulimit checks under Linux.
+
+  Returns a tuple containing the result code and a pre-formatted result label
+
+  Keyword arguments:
+  configurations (dictionary): a mapping of configuration key to value
+  parameters (dictionary): a mapping of script parameter key to value
+  host_name (string): the name of this host where the alert is running
+
+  """
+
+  # try:
+  soft_ulimit, hard_ulimiit = resource.getrlimit(resource.RLIMIT_NOFILE)
+  return_code, label = _get_warnings_for_partition(parameters, soft_ulimit)
+  # except Exception as e:
+  #   return 'CRITICAL', ["Unable to determine ulimit for open files (-n)"]
+
+  return return_code, [label]
+
+def _get_warnings_for_partition(parameters, soft_ulimit):
+
+  # start with hard coded defaults
+  warning_count = DEFAULT_WARNING_KEY
+  critical_count = DEFAULT_CRITICAL_KEY
+
+  if WARNING_KEY in parameters:
+    warning_count = int(parameters[WARNING_KEY])
+
+  if CRITICAL_KEY in parameters:
+    critical_count = int(parameters[CRITICAL_KEY])
+
+  if soft_ulimit is None or soft_ulimit == "":
+    return 'CRITICAL', ['Unable to determine ulimit for open files (-n)']
+
+  return_code = "OK"
+  label = "Ulimit for open files (-n) is {0}".format(soft_ulimit)
+
+  if soft_ulimit >= critical_count:
+    label = "Ulimit for open files (-n) is {0} which is higher or equal than critical value of {1}".format(soft_ulimit, critical_count)
+    return_code = 'CRITICAL'
+  elif soft_ulimit >= warning_count:
+    label = "Ulimit for open files (-n) is {0} which is higher or equal than warning value of {1}".format(soft_ulimit, warning_count)
+    return_code = 'WARNING'
+
+  return return_code, label
+

http://git-wip-us.apache.org/repos/asf/ambari/blob/ee4e63a9/ambari-server/src/test/python/host_scripts/TestAlertUlimit.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/python/host_scripts/TestAlertUlimit.py b/ambari-server/src/test/python/host_scripts/TestAlertUlimit.py
new file mode 100644
index 0000000..09bf4e6
--- /dev/null
+++ b/ambari-server/src/test/python/host_scripts/TestAlertUlimit.py
@@ -0,0 +1,44 @@
+#!/usr/bin/env python
+
+'''
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+'''
+
+import alert_ulimit
+from mock.mock import patch, MagicMock
+from unittest import TestCase
+
+
+class TestAlertUlimit(TestCase):
+
+  @patch('resource.getrlimit')
+  def test_ulimits(self, ulimit_mock):
+
+    # OK
+    ulimit_mock.return_value = 1024, 1024
+    res = alert_ulimit.execute()
+    self.assertEquals(res, ('OK', ['Ulimit for open files (-n) is 1024']))
+
+    # WARNING
+    ulimit_mock.return_value = 200000, 200000
+    res = alert_ulimit.execute()
+    self.assertEquals(res, ('WARNING', ['Ulimit for open files (-n) is 200000 which is higher or equal than warning value of 200000']))
+
+    # OK
+    ulimit_mock.return_value = 1000000, 1000000
+    res = alert_ulimit.execute()
+    self.assertEquals(res, ('CRITICAL', ['Ulimit for open files (-n) is 1000000 which is higher or equal than critical value of 800000']))
\ No newline at end of file