You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by jo...@apache.org on 2015/04/29 18:21:00 UTC
[1/2] ambari git commit: AMBARI-10816 - Expose Customizable
Parameters For SCRIPT Alerts (jonathanhurley)
Repository: ambari
Updated Branches:
refs/heads/trunk bc55f2d4e -> 6727c1dc8
http://git-wip-us.apache.org/repos/asf/ambari/blob/6727c1dc/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/files/alert_ha_namenode_health.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/files/alert_ha_namenode_health.py b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/files/alert_ha_namenode_health.py
index 2066d46..19e3170 100644
--- a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/files/alert_ha_namenode_health.py
+++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/files/alert_ha_namenode_health.py
@@ -35,7 +35,8 @@ NN_HTTP_ADDRESS_KEY = '{{hdfs-site/dfs.namenode.http-address}}'
NN_HTTPS_ADDRESS_KEY = '{{hdfs-site/dfs.namenode.https-address}}'
DFS_POLICY_KEY = '{{hdfs-site/dfs.http.policy}}'
-CONNECTION_TIMEOUT = 5.0
+CONNECTION_TIMEOUT_KEY = 'connection.timeout'
+CONNECTION_TIMEOUT_DEFAULT = 5.0
def get_tokens():
"""
@@ -44,36 +45,43 @@ def get_tokens():
"""
return (HDFS_SITE_KEY, NAMESERVICE_KEY, NN_HTTP_ADDRESS_KEY,
NN_HTTPS_ADDRESS_KEY, DFS_POLICY_KEY)
-
-def execute(parameters=None, host_name=None):
+
+def execute(configurations={}, parameters={}, host_name=None):
"""
Returns a tuple containing the result code and a pre-formatted result label
Keyword arguments:
- parameters (dictionary): a mapping of parameter key to value
+ configurations (dictionary): a mapping of configuration key to value
+ parameters (dictionary): a mapping of script parameter key to value
host_name (string): the name of this host where the alert is running
"""
- if parameters is None:
- return (RESULT_STATE_UNKNOWN, ['There were no parameters supplied to the script.'])
+ if configurations is None:
+ return (RESULT_STATE_UNKNOWN, ['There were no configurations supplied to the script.'])
# if not in HA mode, then SKIP
- if not NAMESERVICE_KEY in parameters:
+ if not NAMESERVICE_KEY in configurations:
return (RESULT_STATE_SKIPPED, ['NameNode HA is not enabled'])
# hdfs-site is required
- if not HDFS_SITE_KEY in parameters:
+ if not HDFS_SITE_KEY in configurations:
return (RESULT_STATE_UNKNOWN, ['{0} is a required parameter for the script'.format(HDFS_SITE_KEY)])
+ # parse script arguments
+ connection_timeout = CONNECTION_TIMEOUT_DEFAULT
+ if CONNECTION_TIMEOUT_KEY in parameters:
+ connection_timeout = float(parameters[CONNECTION_TIMEOUT_KEY])
+
+
# determine whether or not SSL is enabled
is_ssl_enabled = False
- if DFS_POLICY_KEY in parameters:
- dfs_policy = parameters[DFS_POLICY_KEY]
+ if DFS_POLICY_KEY in configurations:
+ dfs_policy = configurations[DFS_POLICY_KEY]
if dfs_policy == "HTTPS_ONLY":
is_ssl_enabled = True
- name_service = parameters[NAMESERVICE_KEY]
- hdfs_site = parameters[HDFS_SITE_KEY]
+ name_service = configurations[NAMESERVICE_KEY]
+ hdfs_site = configurations[HDFS_SITE_KEY]
# look for dfs.ha.namenodes.foo
nn_unique_ids_key = 'dfs.ha.namenodes.' + name_service
@@ -105,7 +113,7 @@ def execute(parameters=None, host_name=None):
try:
jmx_uri = jmx_uri_fragment.format(value)
- state = get_value_from_jmx(jmx_uri,'State')
+ state = get_value_from_jmx(jmx_uri, 'State', connection_timeout)
if state == HDFS_NN_STATE_ACTIVE:
active_namenodes.append(value)
@@ -161,11 +169,11 @@ def execute(parameters=None, host_name=None):
return (RESULT_STATE_SKIPPED, ['Another host will report this alert'])
-def get_value_from_jmx(query, jmx_property):
+def get_value_from_jmx(query, jmx_property, connection_timeout):
response = None
-
+
try:
- response = urllib2.urlopen(query, timeout=CONNECTION_TIMEOUT)
+ response = urllib2.urlopen(query, timeout=connection_timeout)
data = response.read()
data_dict = json.loads(data)
http://git-wip-us.apache.org/repos/asf/ambari/blob/6727c1dc/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HIVE/package/files/alert_hive_thrift_port.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HIVE/package/files/alert_hive_thrift_port.py b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HIVE/package/files/alert_hive_thrift_port.py
index 2837226..35217fc 100644
--- a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HIVE/package/files/alert_hive_thrift_port.py
+++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HIVE/package/files/alert_hive_thrift_port.py
@@ -40,15 +40,21 @@ SMOKEUSER_KEY = '{{cluster-env/smokeuser}}'
# The configured Kerberos executable search paths, if any
KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY = '{{kerberos-env/executable_search_paths}}'
-PERCENT_WARNING = 200
-PERCENT_CRITICAL = 200
-
THRIFT_PORT_DEFAULT = 10000
HIVE_SERVER_TRANSPORT_MODE_DEFAULT = 'binary'
HIVE_SERVER_PRINCIPAL_DEFAULT = 'hive/_HOST@EXAMPLE.COM'
HIVE_SERVER2_AUTHENTICATION_DEFAULT = 'NOSASL'
+
+# default keytab location
+SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY = 'default.smoke.keytab'
SMOKEUSER_KEYTAB_DEFAULT = '/etc/security/keytabs/smokeuser.headless.keytab'
+
+# default smoke principal
+SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY = 'default.smoke.principal'
SMOKEUSER_PRINCIPAL_DEFAULT = 'ambari-qa@EXAMPLE.COM'
+
+# default smoke user
+SMOKEUSER_SCRIPT_PARAM_KEY = 'default.smoke.user'
SMOKEUSER_DEFAULT = 'ambari-qa'
def get_tokens():
@@ -59,62 +65,76 @@ def get_tokens():
return (HIVE_SERVER_THRIFT_PORT_KEY,SECURITY_ENABLED_KEY, SMOKEUSER_KEY,
HIVE_SERVER2_AUTHENTICATION_KEY,HIVE_SERVER_PRINCIPAL_KEY,
SMOKEUSER_KEYTAB_KEY,SMOKEUSER_PRINCIPAL_KEY,HIVE_SERVER_THRIFT_HTTP_PORT_KEY,
- HIVE_SERVER_TRANSPORT_MODE_KEY, KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY)
+ HIVE_SERVER_TRANSPORT_MODE_KEY,KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY)
-def execute(parameters=None, host_name=None):
+def execute(configurations={}, parameters={}, host_name=None):
"""
Returns a tuple containing the result code and a pre-formatted result label
Keyword arguments:
- parameters (dictionary): a mapping of parameter key to value
+ configurations (dictionary): a mapping of configuration key to value
+ parameters (dictionary): a mapping of script parameter key to value
host_name (string): the name of this host where the alert is running
"""
- if parameters is None:
- return ('UNKNOWN', ['There were no parameters supplied to the script.'])
+ if configurations is None:
+ return ('UNKNOWN', ['There were no configurations supplied to the script.'])
transport_mode = HIVE_SERVER_TRANSPORT_MODE_DEFAULT
- if HIVE_SERVER_TRANSPORT_MODE_KEY in parameters:
- transport_mode = parameters[HIVE_SERVER_TRANSPORT_MODE_KEY]
+ if HIVE_SERVER_TRANSPORT_MODE_KEY in configurations:
+ transport_mode = configurations[HIVE_SERVER_TRANSPORT_MODE_KEY]
port = THRIFT_PORT_DEFAULT
- if transport_mode.lower() == 'binary' and HIVE_SERVER_THRIFT_PORT_KEY in parameters:
- port = int(parameters[HIVE_SERVER_THRIFT_PORT_KEY])
- elif transport_mode.lower() == 'http' and HIVE_SERVER_THRIFT_HTTP_PORT_KEY in parameters:
- port = int(parameters[HIVE_SERVER_THRIFT_HTTP_PORT_KEY])
+ if transport_mode.lower() == 'binary' and HIVE_SERVER_THRIFT_PORT_KEY in configurations:
+ port = int(configurations[HIVE_SERVER_THRIFT_PORT_KEY])
+ elif transport_mode.lower() == 'http' and HIVE_SERVER_THRIFT_HTTP_PORT_KEY in configurations:
+ port = int(configurations[HIVE_SERVER_THRIFT_HTTP_PORT_KEY])
security_enabled = False
- if SECURITY_ENABLED_KEY in parameters:
- security_enabled = str(parameters[SECURITY_ENABLED_KEY]).upper() == 'TRUE'
+ if SECURITY_ENABLED_KEY in configurations:
+ security_enabled = str(configurations[SECURITY_ENABLED_KEY]).upper() == 'TRUE'
hive_server2_authentication = HIVE_SERVER2_AUTHENTICATION_DEFAULT
- if HIVE_SERVER2_AUTHENTICATION_KEY in parameters:
- hive_server2_authentication = parameters[HIVE_SERVER2_AUTHENTICATION_KEY]
+ if HIVE_SERVER2_AUTHENTICATION_KEY in configurations:
+ hive_server2_authentication = configurations[HIVE_SERVER2_AUTHENTICATION_KEY]
+ # defaults
+ smokeuser_keytab = SMOKEUSER_KEYTAB_DEFAULT
smokeuser_principal = SMOKEUSER_PRINCIPAL_DEFAULT
- if SMOKEUSER_PRINCIPAL_KEY in parameters:
- smokeuser_principal = parameters[SMOKEUSER_PRINCIPAL_KEY]
-
smokeuser = SMOKEUSER_DEFAULT
- if SMOKEUSER_KEY in parameters:
- smokeuser = parameters[SMOKEUSER_KEY]
+
+ # check script params
+ if SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY in parameters:
+ smokeuser_principal = parameters[SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY]
+
+ if SMOKEUSER_SCRIPT_PARAM_KEY in parameters:
+ smokeuser = parameters[SMOKEUSER_SCRIPT_PARAM_KEY]
+
+ if SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY in parameters:
+ smokeuser_keytab = parameters[SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY]
+
+
+ # check configurations last as they should always take precedence
+ if SMOKEUSER_PRINCIPAL_KEY in configurations:
+ smokeuser_principal = configurations[SMOKEUSER_PRINCIPAL_KEY]
+
+ if SMOKEUSER_KEY in configurations:
+ smokeuser = configurations[SMOKEUSER_KEY]
result_code = None
if security_enabled:
hive_server_principal = HIVE_SERVER_PRINCIPAL_DEFAULT
- if HIVE_SERVER_PRINCIPAL_KEY in parameters:
- hive_server_principal = parameters[HIVE_SERVER_PRINCIPAL_KEY]
-
- smokeuser_keytab = SMOKEUSER_KEYTAB_DEFAULT
+ if HIVE_SERVER_PRINCIPAL_KEY in configurations:
+ hive_server_principal = configurations[HIVE_SERVER_PRINCIPAL_KEY]
- if SMOKEUSER_KEYTAB_KEY in parameters:
- smokeuser_keytab = parameters[SMOKEUSER_KEYTAB_KEY]
+ if SMOKEUSER_KEYTAB_KEY in configurations:
+ smokeuser_keytab = configurations[SMOKEUSER_KEYTAB_KEY]
# Get the configured Kerberos executable search paths, if any
- if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in parameters:
- kerberos_executable_search_paths = parameters[KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY]
+ if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations:
+ kerberos_executable_search_paths = configurations[KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY]
else:
kerberos_executable_search_paths = None
http://git-wip-us.apache.org/repos/asf/ambari/blob/6727c1dc/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/OOZIE/package/files/alert_check_oozie_server.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/OOZIE/package/files/alert_check_oozie_server.py b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/OOZIE/package/files/alert_check_oozie_server.py
index eaad265..c0dc18a 100644
--- a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/OOZIE/package/files/alert_check_oozie_server.py
+++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/OOZIE/package/files/alert_check_oozie_server.py
@@ -24,7 +24,8 @@ from resource_management.libraries.functions import format
from resource_management.libraries.functions import get_kinit_path
from resource_management.libraries.functions import get_klist_path
from ambari_commons.os_check import OSConst, OSCheck
-from os import getpid, sep
+from ambari_commons.os_family_impl import OsFamilyFuncImpl, OsFamilyImpl
+import os
from urlparse import urlparse
RESULT_CODE_OK = 'OK'
@@ -39,6 +40,17 @@ SECURITY_ENABLED = '{{cluster-env/security_enabled}}'
OOZIE_PRINCIPAL = '{{oozie-site/oozie.authentication.kerberos.principal}}'
OOZIE_KEYTAB = '{{oozie-site/oozie.authentication.kerberos.keytab}}'
+class KerberosPropertiesNotFound(Exception): pass
+
+@OsFamilyFuncImpl(os_family=OSConst.WINSRV_FAMILY)
+def get_tokens():
+ """
+ Returns a tuple of tokens in the format {{site/property}} that will be used
+ to build the dictionary passed into execute
+ """
+ return (OOZIE_URL_KEY,)
+
+@OsFamilyFuncImpl(os_family=OsFamilyImpl.DEFAULT)
def get_tokens():
"""
Returns a tuple of tokens in the format {{site/property}} that will be used
@@ -46,78 +58,88 @@ def get_tokens():
"""
return (OOZIE_URL_KEY, OOZIE_PRINCIPAL, SECURITY_ENABLED, OOZIE_KEYTAB, KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY)
-def execute(parameters=None, host_name=None):
+@OsFamilyFuncImpl(os_family=OSConst.WINSRV_FAMILY)
+def get_check_command(oozie_url, host_name, configurations):
+ from resource_management.libraries.functions import reload_windows_env
+ reload_windows_env()
+ oozie_home = os.environ['OOZIE_HOME']
+ command = format("{oozie_home}\\bin\\oozie.cmd admin -oozie {oozie_url} -status")
+ return (command, None)
+
+@OsFamilyFuncImpl(os_family=OsFamilyImpl.DEFAULT)
+def get_check_command(oozie_url, host_name, configurations):
+ security_enabled = False
+ if SECURITY_ENABLED in configurations:
+ security_enabled = str(configurations[SECURITY_ENABLED]).upper() == 'TRUE'
+ kerberos_env = None
+ if security_enabled:
+ if OOZIE_KEYTAB in configurations and OOZIE_PRINCIPAL in configurations:
+ oozie_keytab = configurations[OOZIE_KEYTAB]
+ oozie_principal = configurations[OOZIE_PRINCIPAL]
+
+ # substitute _HOST in kerberos principal with actual fqdn
+ oozie_principal = oozie_principal.replace('_HOST', host_name)
+ else:
+ raise KerberosPropertiesNotFound('The Oozie keytab and principal are required configurations when security is enabled.')
+
+ # Create the kerberos credentials cache (ccache) file and set it in the environment to use
+ # when executing curl
+ env = Environment.get_instance()
+ ccache_file = "{0}{1}oozie_alert_cc_{2}".format(env.tmp_dir, os.sep, os.getpid())
+ kerberos_env = {'KRB5CCNAME': ccache_file}
+
+ # Get the configured Kerberos executable search paths, if any
+ if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations:
+ kerberos_executable_search_paths = configurations[KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY]
+ else:
+ kerberos_executable_search_paths = None
+
+ klist_path_local = get_klist_path(kerberos_executable_search_paths)
+ klist_command = format("{klist_path_local} -s {ccache_file}")
+
+ # Determine if we need to kinit by testing to see if the relevant cache exists and has
+ # non-expired tickets. Tickets are marked to expire after 5 minutes to help reduce the number
+ # it kinits we do but recover quickly when keytabs are regenerated
+ return_code, _ = call(klist_command)
+ if return_code != 0:
+ kinit_path_local = get_kinit_path(kerberos_executable_search_paths)
+ kinit_command = format("{kinit_path_local} -l 5m -kt {oozie_keytab} {oozie_principal}; ")
+
+ # kinit
+ Execute(kinit_command, environment=kerberos_env)
+ command = format("source /etc/oozie/conf/oozie-env.sh ; oozie admin -oozie {oozie_url} -status")
+ return (command, kerberos_env)
+
+def execute(configurations={}, parameters={}, host_name=None):
"""
Returns a tuple containing the result code and a pre-formatted result label
Keyword arguments:
- parameters (dictionary): a mapping of parameter key to value
+ configurations (dictionary): a mapping of configuration key to value
+ parameters (dictionary): a mapping of script parameter key to value
host_name (string): the name of this host where the alert is running
"""
- if parameters is None:
- return (RESULT_CODE_UNKNOWN, ['There were no parameters supplied to the script.'])
+ if configurations is None:
+ return (RESULT_CODE_UNKNOWN, ['There were no configurations supplied to the script.'])
- if not OOZIE_URL_KEY in parameters:
+ if not OOZIE_URL_KEY in configurations:
return (RESULT_CODE_UNKNOWN, ['The Oozie URL is a required parameter.'])
# use localhost on Windows, 0.0.0.0 on others; 0.0.0.0 means bind to all
# interfaces, which doesn't work on Windows
localhost_address = 'localhost' if OSCheck.get_os_family() == OSConst.WINSRV_FAMILY else '0.0.0.0'
- oozie_url = parameters[OOZIE_URL_KEY]
+ oozie_url = configurations[OOZIE_URL_KEY]
oozie_url = oozie_url.replace(urlparse(oozie_url).hostname,localhost_address)
- security_enabled = False
- if SECURITY_ENABLED in parameters:
- security_enabled = str(parameters[SECURITY_ENABLED]).upper() == 'TRUE'
-
- command = format("source /etc/oozie/conf/oozie-env.sh ; oozie admin -oozie {oozie_url} -status")
-
try:
- # kinit if security is enabled so that oozie-env.sh can make the web request
- kerberos_env = None
-
- if security_enabled:
- if OOZIE_KEYTAB in parameters and OOZIE_PRINCIPAL in parameters:
- oozie_keytab = parameters[OOZIE_KEYTAB]
- oozie_principal = parameters[OOZIE_PRINCIPAL]
-
- # substitute _HOST in kerberos principal with actual fqdn
- oozie_principal = oozie_principal.replace('_HOST', host_name)
- else:
- return (RESULT_CODE_UNKNOWN, ['The Oozie keytab and principal are required parameters when security is enabled.'])
-
- # Create the kerberos credentials cache (ccache) file and set it in the environment to use
- # when executing curl
- env = Environment.get_instance()
- ccache_file = "{0}{1}oozie_alert_cc_{2}".format(env.tmp_dir, sep, getpid())
- kerberos_env = {'KRB5CCNAME': ccache_file}
-
- # Get the configured Kerberos executable search paths, if any
- if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in parameters:
- kerberos_executable_search_paths = parameters[KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY]
- else:
- kerberos_executable_search_paths = None
-
- klist_path_local = get_klist_path(kerberos_executable_search_paths)
- klist_command = format("{klist_path_local} -s {ccache_file}")
-
- # Determine if we need to kinit by testing to see if the relevant cache exists and has
- # non-expired tickets. Tickets are marked to expire after 5 minutes to help reduce the number
- # it kinits we do but recover quickly when keytabs are regenerated
- return_code, _ = call(klist_command)
- if return_code != 0:
- kinit_path_local = get_kinit_path(kerberos_executable_search_paths)
- kinit_command = format("{kinit_path_local} -l 5m -kt {oozie_keytab} {oozie_principal}; ")
-
- # kinit
- Execute(kinit_command, environment=kerberos_env)
-
+ command, env = get_check_command(oozie_url, host_name, configurations)
# execute the command
- Execute(command, environment=kerberos_env)
+ Execute(command, environment=env)
return (RESULT_CODE_OK, ["Successful connection to {0}".format(oozie_url)])
-
+ except KerberosPropertiesNotFound, ex:
+ return (RESULT_CODE_UNKNOWN, [str(ex)])
except Exception, ex:
return (RESULT_CODE_CRITICAL, [str(ex)])
http://git-wip-us.apache.org/repos/asf/ambari/blob/6727c1dc/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/WEBHCAT/package/files/alert_webhcat_server.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/WEBHCAT/package/files/alert_webhcat_server.py b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/WEBHCAT/package/files/alert_webhcat_server.py
index dd20be4..7ee375e 100644
--- a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/WEBHCAT/package/files/alert_webhcat_server.py
+++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/WEBHCAT/package/files/alert_webhcat_server.py
@@ -53,8 +53,10 @@ KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY = '{{kerberos-env/executable_search_paths}}
WEBHCAT_OK_RESPONSE = 'ok'
WEBHCAT_PORT_DEFAULT = 50111
-CURL_CONNECTION_TIMEOUT = '5'
-CONNECTION_TIMEOUT = 5.0
+CONNECTION_TIMEOUT_KEY = 'connection.timeout'
+CONNECTION_TIMEOUT_DEFAULT = 5.0
+CURL_CONNECTION_TIMEOUT_DEFAULT = str(int(CONNECTION_TIMEOUT_DEFAULT))
+
def get_tokens():
"""
@@ -64,27 +66,36 @@ def get_tokens():
return (TEMPLETON_PORT_KEY, SECURITY_ENABLED_KEY, WEBHCAT_KEYTAB_KEY, WEBHCAT_PRINCIPAL_KEY, KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY)
-def execute(parameters=None, host_name=None):
+def execute(configurations={}, parameters={}, host_name=None):
"""
Returns a tuple containing the result code and a pre-formatted result label
Keyword arguments:
- parameters (dictionary): a mapping of parameter key to value
+ configurations (dictionary): a mapping of configuration key to value
+ parameters (dictionary): a mapping of script parameter key to value
host_name (string): the name of this host where the alert is running
"""
result_code = RESULT_CODE_UNKNOWN
- if parameters is None:
- return (result_code, ['There were no parameters supplied to the script.'])
+ if configurations is None:
+ return (result_code, ['There were no configurations supplied to the script.'])
webhcat_port = WEBHCAT_PORT_DEFAULT
- if TEMPLETON_PORT_KEY in parameters:
- webhcat_port = int(parameters[TEMPLETON_PORT_KEY])
+ if TEMPLETON_PORT_KEY in configurations:
+ webhcat_port = int(configurations[TEMPLETON_PORT_KEY])
security_enabled = False
- if SECURITY_ENABLED_KEY in parameters:
- security_enabled = parameters[SECURITY_ENABLED_KEY].lower() == 'true'
+ if SECURITY_ENABLED_KEY in configurations:
+ security_enabled = configurations[SECURITY_ENABLED_KEY].lower() == 'true'
+
+ # parse script arguments
+ connection_timeout = CONNECTION_TIMEOUT_DEFAULT
+ curl_connection_timeout = CURL_CONNECTION_TIMEOUT_DEFAULT
+ if CONNECTION_TIMEOUT_KEY in parameters:
+ connection_timeout = float(parameters[CONNECTION_TIMEOUT_KEY])
+ curl_connection_timeout = str(int(connection_timeout))
+
# the alert will always run on the webhcat host
if host_name is None:
@@ -98,12 +109,12 @@ def execute(parameters=None, host_name=None):
json_response = {}
if security_enabled:
- if WEBHCAT_KEYTAB_KEY not in parameters or WEBHCAT_PRINCIPAL_KEY not in parameters:
- return (RESULT_CODE_UNKNOWN, [str(parameters)])
+ if WEBHCAT_KEYTAB_KEY not in configurations or WEBHCAT_PRINCIPAL_KEY not in configurations:
+ return (RESULT_CODE_UNKNOWN, [str(configurations)])
try:
- webhcat_keytab = parameters[WEBHCAT_KEYTAB_KEY]
- webhcat_principal = parameters[WEBHCAT_PRINCIPAL_KEY]
+ webhcat_keytab = configurations[WEBHCAT_KEYTAB_KEY]
+ webhcat_principal = configurations[WEBHCAT_PRINCIPAL_KEY]
# substitute _HOST in kerberos principal with actual fqdn
webhcat_principal = webhcat_principal.replace('_HOST', host_name)
@@ -115,8 +126,8 @@ def execute(parameters=None, host_name=None):
kerberos_env = {'KRB5CCNAME': ccache_file}
# Get the configured Kerberos executable search paths, if any
- if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in parameters:
- kerberos_executable_search_paths = parameters[KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY]
+ if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations:
+ kerberos_executable_search_paths = configurations[KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY]
else:
kerberos_executable_search_paths = None
@@ -136,7 +147,7 @@ def execute(parameters=None, host_name=None):
# make a single curl call to get just the http code
curl = subprocess.Popen(['curl', '--negotiate', '-u', ':', '-sL', '-w',
- '%{http_code}', '--connect-timeout', CURL_CONNECTION_TIMEOUT,
+ '%{http_code}', '--connect-timeout', curl_connection_timeout,
'-o', '/dev/null', query_url], stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=kerberos_env)
stdout, stderr = curl.communicate()
@@ -160,7 +171,7 @@ def execute(parameters=None, host_name=None):
# now that we have the http status and it was 200, get the content
start_time = time.time()
curl = subprocess.Popen(['curl', '--negotiate', '-u', ':', '-sL',
- '--connect-timeout', CURL_CONNECTION_TIMEOUT, query_url, ],
+ '--connect-timeout', curl_connection_timeout, query_url, ],
stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=kerberos_env)
stdout, stderr = curl.communicate()
@@ -178,7 +189,7 @@ def execute(parameters=None, host_name=None):
try:
# execute the query for the JSON that includes WebHCat status
start_time = time.time()
- url_response = urllib2.urlopen(query_url, timeout=CONNECTION_TIMEOUT)
+ url_response = urllib2.urlopen(query_url, timeout=connection_timeout)
total_time = time.time() - start_time
json_response = json.loads(url_response.read())
http://git-wip-us.apache.org/repos/asf/ambari/blob/6727c1dc/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/YARN/alerts.json
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/YARN/alerts.json b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/YARN/alerts.json
index 94f0926..fa1e20a 100644
--- a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/YARN/alerts.json
+++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/YARN/alerts.json
@@ -203,7 +203,18 @@
"enabled": true,
"source": {
"type": "SCRIPT",
- "path": "YARN/2.1.0.2.0/package/alerts/alert_nodemanager_health.py"
+ "path": "YARN/2.1.0.2.0/package/alerts/alert_nodemanager_health.py",
+ "parameters": [
+ {
+ "name": "connection.timeout",
+ "display_name": "Connection Timeout",
+ "value": 5.0,
+ "type": "NUMERIC",
+ "description": "The maximum time before this alert is considered to be CRITICAL",
+ "units": "seconds",
+ "threshold": "CRITICAL"
+ }
+ ]
}
}
],
http://git-wip-us.apache.org/repos/asf/ambari/blob/6727c1dc/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/YARN/package/files/alert_nodemanager_health.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/YARN/package/files/alert_nodemanager_health.py b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/YARN/package/files/alert_nodemanager_health.py
index 516d858..1cdeb97 100644
--- a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/YARN/package/files/alert_nodemanager_health.py
+++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/YARN/package/files/alert_nodemanager_health.py
@@ -40,7 +40,8 @@ CRITICAL_NODEMANAGER_UNKNOWN_JSON_MESSAGE = 'Unable to determine NodeManager hea
NODEMANAGER_DEFAULT_PORT = 8042
-CONNECTION_TIMEOUT = 5.0
+CONNECTION_TIMEOUT_KEY = 'connection.timeout'
+CONNECTION_TIMEOUT_DEFAULT = 5.0
def get_tokens():
"""
@@ -51,32 +52,40 @@ def get_tokens():
YARN_HTTP_POLICY_KEY)
-def execute(parameters=None, host_name=None):
+def execute(configurations={}, parameters={}, host_name=None):
"""
Returns a tuple containing the result code and a pre-formatted result label
Keyword arguments:
- parameters (dictionary): a mapping of parameter key to value
+ configurations (dictionary): a mapping of configuration key to value
+ parameters (dictionary): a mapping of script parameter key to value
host_name (string): the name of this host where the alert is running
"""
result_code = RESULT_CODE_UNKNOWN
- if parameters is None:
- return (result_code, ['There were no parameters supplied to the script.'])
+ if configurations is None:
+ return (result_code, ['There were no configurations supplied to the script.'])
scheme = 'http'
http_uri = None
https_uri = None
http_policy = 'HTTP_ONLY'
- if NODEMANAGER_HTTP_ADDRESS_KEY in parameters:
- http_uri = parameters[NODEMANAGER_HTTP_ADDRESS_KEY]
+ if NODEMANAGER_HTTP_ADDRESS_KEY in configurations:
+ http_uri = configurations[NODEMANAGER_HTTP_ADDRESS_KEY]
- if NODEMANAGER_HTTPS_ADDRESS_KEY in parameters:
- https_uri = parameters[NODEMANAGER_HTTPS_ADDRESS_KEY]
+ if NODEMANAGER_HTTPS_ADDRESS_KEY in configurations:
+ https_uri = configurations[NODEMANAGER_HTTPS_ADDRESS_KEY]
+
+ if YARN_HTTP_POLICY_KEY in configurations:
+ http_policy = configurations[YARN_HTTP_POLICY_KEY]
+
+
+ # parse script arguments
+ connection_timeout = CONNECTION_TIMEOUT_DEFAULT
+ if CONNECTION_TIMEOUT_KEY in parameters:
+ connection_timeout = float(parameters[CONNECTION_TIMEOUT_KEY])
- if YARN_HTTP_POLICY_KEY in parameters:
- http_policy = parameters[YARN_HTTP_POLICY_KEY]
# determine the right URI and whether to use SSL
uri = http_uri
@@ -108,7 +117,7 @@ def execute(parameters=None, host_name=None):
try:
# execute the query for the JSON that includes templeton status
- url_response = urllib2.urlopen(query, timeout=CONNECTION_TIMEOUT)
+ url_response = urllib2.urlopen(query, timeout=connection_timeout)
except urllib2.HTTPError, httpError:
label = CRITICAL_HTTP_STATUS_MESSAGE.format(str(httpError.code), query,
str(httpError))
@@ -122,6 +131,7 @@ def execute(parameters=None, host_name=None):
try:
json_response = json.loads(url_response.read())
node_healthy = json_response['nodeInfo']['nodeHealthy']
+ node_healthy_report = json_response['nodeInfo']['healthReport']
# convert boolean to string
node_healthy = str(node_healthy)
@@ -138,6 +148,9 @@ def execute(parameters=None, host_name=None):
if node_healthy.lower() == 'true':
result_code = RESULT_CODE_OK
label = OK_MESSAGE
+ elif node_healthy.lower() == 'false':
+ result_code = RESULT_CODE_CRITICAL
+ label = node_healthy_report
else:
result_code = RESULT_CODE_CRITICAL
label = CRITICAL_NODEMANAGER_STATUS_MESSAGE.format(node_healthy)
[2/2] ambari git commit: AMBARI-10816 - Expose Customizable
Parameters For SCRIPT Alerts (jonathanhurley)
Posted by jo...@apache.org.
AMBARI-10816 - Expose Customizable Parameters For SCRIPT Alerts (jonathanhurley)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/6727c1dc
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/6727c1dc
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/6727c1dc
Branch: refs/heads/trunk
Commit: 6727c1dc8c32abbc4ea1a7e3c9652707bb4fd23d
Parents: bc55f2d
Author: Jonathan Hurley <jh...@hortonworks.com>
Authored: Wed Apr 29 11:51:57 2015 -0400
Committer: Jonathan Hurley <jh...@hortonworks.com>
Committed: Wed Apr 29 11:52:04 2015 -0400
----------------------------------------------------------------------
.../python/ambari_agent/alerts/script_alert.py | 22 ++-
.../src/test/python/ambari_agent/TestAlerts.py | 55 ++++++
.../ambari_agent/dummy_files/test_script.py | 19 +-
.../ambari/server/state/alert/ScriptSource.java | 197 ++++++++++++++++++-
ambari-server/src/main/resources/alerts.json | 31 ++-
.../alerts/alert_ambari_metrics_monitor.py | 13 +-
.../common-services/FLUME/1.4.0.2.0/alerts.json | 11 +-
.../package/alerts/alert_flume_agent_status.py | 23 ++-
.../common-services/HDFS/2.1.0.2.0/alerts.json | 44 ++++-
.../package/alerts/alert_checkpoint_time.py | 68 ++++---
.../package/alerts/alert_ha_namenode_health.py | 36 ++--
.../common-services/HIVE/0.12.0.2.0/alerts.json | 63 +++++-
.../package/alerts/alert_hive_metastore.py | 66 ++++---
.../package/alerts/alert_hive_thrift_port.py | 81 +++++---
.../package/alerts/alert_webhcat_server.py | 49 +++--
.../package/alerts/alert_check_oozie_server.py | 35 ++--
.../alerts/check_supervisor_process_win.py | 5 +-
.../common-services/YARN/2.1.0.2.0/alerts.json | 26 ++-
.../package/alerts/alert_nodemanager_health.py | 33 ++--
.../alerts/alert_nodemanagers_summary.py | 38 ++--
.../resources/host_scripts/alert_disk_space.py | 55 ++++--
.../BIGTOP/0.8/services/FLUME/alerts.json | 11 +-
.../package/files/alert_flume_agent_status.py | 25 ++-
.../stacks/BIGTOP/0.8/services/HDFS/alerts.json | 44 ++++-
.../HDFS/package/files/alert_checkpoint_time.py | 68 ++++---
.../package/files/alert_ha_namenode_health.py | 40 ++--
.../package/files/alert_hive_thrift_port.py | 82 +++++---
.../package/files/alert_check_oozie_server.py | 130 +++++++-----
.../package/files/alert_webhcat_server.py | 49 +++--
.../stacks/BIGTOP/0.8/services/YARN/alerts.json | 13 +-
.../package/files/alert_nodemanager_health.py | 37 ++--
31 files changed, 1096 insertions(+), 373 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/ambari/blob/6727c1dc/ambari-agent/src/main/python/ambari_agent/alerts/script_alert.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/python/ambari_agent/alerts/script_alert.py b/ambari-agent/src/main/python/ambari_agent/alerts/script_alert.py
index d79e0a7..b8b4daf 100644
--- a/ambari-agent/src/main/python/ambari_agent/alerts/script_alert.py
+++ b/ambari-agent/src/main/python/ambari_agent/alerts/script_alert.py
@@ -46,6 +46,7 @@ class ScriptAlert(BaseAlert):
self.common_services_dir = None
self.host_scripts_dir = None
self.path_to_script = None
+ self.parameters = {}
if 'path' in alert_source_meta:
self.path = alert_source_meta['path']
@@ -59,11 +60,24 @@ class ScriptAlert(BaseAlert):
if 'host_scripts_directory' in alert_source_meta:
self.host_scripts_dir = alert_source_meta['host_scripts_directory']
+ # convert a list of script parameters, like timeouts, into a dictionary
+ # so the the scripts can easily lookup the data
+ if 'parameters' in alert_source_meta:
+ parameters = alert_source_meta['parameters']
+ for parameter in parameters:
+ if 'name' not in parameter or 'value' not in parameter:
+ continue
+
+ # create the dictionary value
+ parameter_name = parameter['name']
+ parameter_value = parameter['value']
+ self.parameters[parameter_name] = parameter_value
+
def _collect(self):
cmd_module = self._load_source()
if cmd_module is not None:
- parameters = {}
+ configurations = {}
try:
tokens = cmd_module.get_tokens()
@@ -73,7 +87,7 @@ class ScriptAlert(BaseAlert):
for token in tokens:
value = self._get_configuration_value(token)
if value is not None:
- parameters[token] = value
+ configurations[token] = value
except AttributeError:
# it's OK if the module doesn't have get_tokens() ; no tokens will
# be passed in so hopefully the script doesn't need any
@@ -85,9 +99,9 @@ class ScriptAlert(BaseAlert):
if matchObj:
basedir = matchObj.group(1)
with Environment(basedir, tmp_dir=self.config.get('agent', 'tmp_dir')) as env:
- return cmd_module.execute(parameters, self.host_name)
+ return cmd_module.execute(configurations, self.parameters, self.host_name)
else:
- return cmd_module.execute(parameters, self.host_name)
+ return cmd_module.execute(configurations, self.parameters, self.host_name)
else:
return (self.RESULT_UNKNOWN, ["Unable to execute script {0}".format(self.path)])
http://git-wip-us.apache.org/repos/asf/ambari/blob/6727c1dc/ambari-agent/src/test/python/ambari_agent/TestAlerts.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/test/python/ambari_agent/TestAlerts.py b/ambari-agent/src/test/python/ambari_agent/TestAlerts.py
index a574422..af357bc 100644
--- a/ambari-agent/src/test/python/ambari_agent/TestAlerts.py
+++ b/ambari-agent/src/test/python/ambari_agent/TestAlerts.py
@@ -210,6 +210,40 @@ class TestAlerts(TestCase):
self.assertEquals('bar is rendered-bar, baz is rendered-baz', alerts[0]['text'])
+ def test_script_alert_with_parameters(self):
+ definition_json = self._get_script_alert_definition_with_parameters()
+
+ # normally set by AlertSchedulerHandler
+ definition_json['source']['stacks_directory'] = os.path.join('ambari_agent', 'dummy_files')
+ definition_json['source']['common_services_directory'] = os.path.join('ambari_agent', 'common-services')
+ definition_json['source']['host_scripts_directory'] = os.path.join('ambari_agent', 'host_scripts')
+
+ configuration = {'foo-site' :
+ { 'bar': 'rendered-bar', 'baz' : 'rendered-baz' }
+ }
+
+ collector = AlertCollector()
+ cluster_configuration = self.__get_cluster_configuration()
+ self.__update_cluster_configuration(cluster_configuration, configuration)
+
+ alert = ScriptAlert(definition_json, definition_json['source'], MagicMock())
+ alert.set_helpers(collector, cluster_configuration )
+ alert.set_cluster("c1", "c6401.ambari.apache.org")
+
+ self.assertEquals(definition_json['source']['path'], alert.path)
+ self.assertEquals(definition_json['source']['stacks_directory'], alert.stacks_dir)
+ self.assertEquals(definition_json['source']['common_services_directory'], alert.common_services_dir)
+ self.assertEquals(definition_json['source']['host_scripts_directory'], alert.host_scripts_dir)
+
+ alert.collect()
+
+ alerts = collector.alerts()
+ self.assertEquals(0, len(collector.alerts()))
+
+ self.assertEquals('OK', alerts[0]['state'])
+ self.assertEquals('Script parameter detected: foo bar baz', alerts[0]['text'])
+
+
@patch.object(MetricAlert, "_load_jmx")
def test_metric_alert(self, ma_load_jmx_mock):
definition_json = self._get_metric_alert_definition()
@@ -969,6 +1003,27 @@ class TestAlerts(TestCase):
}
}
+ def _get_script_alert_definition_with_parameters(self):
+ return {
+ "name": "namenode_process",
+ "service": "HDFS",
+ "component": "NAMENODE",
+ "label": "NameNode process",
+ "interval": 6,
+ "scope": "host",
+ "enabled": True,
+ "uuid": "c1f73191-4481-4435-8dae-fd380e4c0be1",
+ "source": {
+ "type": "SCRIPT",
+ "path": "test_script.py",
+ "parameters": [
+ {
+ "name": "script.parameter.foo",
+ "value": "foo bar baz"
+ }
+ ]
+ }
+ }
def _get_port_alert_definition(self):
return { "name": "namenode_process",
http://git-wip-us.apache.org/repos/asf/ambari/blob/6727c1dc/ambari-agent/src/test/python/ambari_agent/dummy_files/test_script.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/test/python/ambari_agent/dummy_files/test_script.py b/ambari-agent/src/test/python/ambari_agent/dummy_files/test_script.py
index 3598d86..de4f9b9 100644
--- a/ambari-agent/src/test/python/ambari_agent/dummy_files/test_script.py
+++ b/ambari-agent/src/test/python/ambari_agent/dummy_files/test_script.py
@@ -26,18 +26,23 @@ def get_tokens():
return ('{{foo-site/bar}}','{{foo-site/baz}}')
-def execute(parameters=None, host_name=None):
+def execute(configurations={}, parameters={}, host_name=None):
'''
returns a tuple containing the result code and a pre-formatted result label
'''
- if parameters is not None:
- if '{{foo-site/bar}}' in parameters:
- bar = parameters['{{foo-site/bar}}']
+
+ # short circuit the script when a parameter is present
+ if "script.parameter.foo" in parameters:
+ return "OK", ["Script parameter detected: " + parameters["script.parameter.foo"]]
+
+ if configurations is not None:
+ if '{{foo-site/bar}}' in configurations:
+ bar = configurations['{{foo-site/bar}}']
- if '{{foo-site/baz}}' in parameters:
- baz = parameters['{{foo-site/baz}}']
+ if '{{foo-site/baz}}' in configurations:
+ baz = configurations['{{foo-site/baz}}']
- if '{{foo-site/skip}}' in parameters:
+ if '{{foo-site/skip}}' in configurations:
return ('SKIPPED', ['This alert is skipped and will not be in the collector'])
label = "bar is {0}, baz is {1}".format(bar, baz)
http://git-wip-us.apache.org/repos/asf/ambari/blob/6727c1dc/ambari-server/src/main/java/org/apache/ambari/server/state/alert/ScriptSource.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/state/alert/ScriptSource.java b/ambari-server/src/main/java/org/apache/ambari/server/state/alert/ScriptSource.java
index 4c84bcf..f67a135 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/state/alert/ScriptSource.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/state/alert/ScriptSource.java
@@ -17,6 +17,10 @@
*/
package org.apache.ambari.server.state.alert;
+import java.util.List;
+
+import org.apache.ambari.server.state.AlertState;
+
import com.google.gson.annotations.SerializedName;
/**
@@ -31,6 +35,12 @@ public class ScriptSource extends Source {
private String m_path = null;
/**
+ * A list of all of the script parameters, if any.
+ */
+ @SerializedName("parameters")
+ private List<ScriptParameter> m_parameters;
+
+ /**
* @return the path to the script file.
*/
public String getPath() {
@@ -38,7 +48,7 @@ public class ScriptSource extends Source {
}
/**
- *
+ * {@inheritDoc}
*/
@Override
public int hashCode() {
@@ -50,7 +60,7 @@ public class ScriptSource extends Source {
}
/**
- *
+ * {@inheritDoc}
*/
@Override
public boolean equals(Object obj) {
@@ -78,4 +88,187 @@ public class ScriptSource extends Source {
return true;
}
+
+ /**
+ * The {@link ScriptParameter} class represents a single parameter that can be
+ * passed into a script alert.
+ */
+ public static class ScriptParameter {
+ @SerializedName("name")
+ private String m_name;
+
+ @SerializedName("display_name")
+ private String m_displayName;
+
+ @SerializedName("units")
+ private String m_units;
+
+ @SerializedName("value")
+ private Object m_value;
+
+ @SerializedName("description")
+ private String m_description;
+
+ @SerializedName("type")
+ private ScriptParameterType m_type;
+
+ /**
+ * If this script parameter controls a threshold, then its specified here,
+ * otherwise it's {@code null}.
+ */
+ @SerializedName("threshold")
+ private AlertState m_threshold;
+
+ /**
+ * Gets the unique name of the parameter.
+ *
+ * @return the name
+ */
+ public String getName() {
+ return m_name;
+ }
+
+ /**
+ * Gets the human readable name of the parameter.
+ *
+ * @return the displayName
+ */
+ public String getDisplayName() {
+ return m_displayName;
+ }
+
+ /**
+ * Gets the display units of the paramter.
+ *
+ * @return the units
+ */
+ public String getUnits() {
+ return m_units;
+ }
+
+ /**
+ * Gets the value of the parameter.
+ *
+ * @return the value
+ */
+ public Object getValue() {
+ return m_value;
+ }
+
+ /**
+ * Gets the description of the parameter.
+ *
+ * @return the description
+ */
+ public String getDescription() {
+ return m_description;
+ }
+
+ /**
+ * Gets the threshold that this parameter directly controls, or {@code null}
+ * for none.
+ *
+ * @return the threshold, or {@code null}.
+ */
+ public AlertState getThreshold() {
+ return m_threshold;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + ((m_description == null) ? 0 : m_description.hashCode());
+ result = prime * result + ((m_displayName == null) ? 0 : m_displayName.hashCode());
+ result = prime * result + ((m_name == null) ? 0 : m_name.hashCode());
+ result = prime * result + ((m_threshold == null) ? 0 : m_threshold.hashCode());
+ result = prime * result + ((m_type == null) ? 0 : m_type.hashCode());
+ result = prime * result + ((m_units == null) ? 0 : m_units.hashCode());
+ result = prime * result + ((m_value == null) ? 0 : m_value.hashCode());
+ return result;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj) {
+ return true;
+ }
+ if (obj == null) {
+ return false;
+ }
+ if (getClass() != obj.getClass()) {
+ return false;
+ }
+ ScriptParameter other = (ScriptParameter) obj;
+ if (m_description == null) {
+ if (other.m_description != null) {
+ return false;
+ }
+ } else if (!m_description.equals(other.m_description)) {
+ return false;
+ }
+ if (m_displayName == null) {
+ if (other.m_displayName != null) {
+ return false;
+ }
+ } else if (!m_displayName.equals(other.m_displayName)) {
+ return false;
+ }
+ if (m_name == null) {
+ if (other.m_name != null) {
+ return false;
+ }
+ } else if (!m_name.equals(other.m_name)) {
+ return false;
+ }
+ if (m_threshold != other.m_threshold) {
+ return false;
+ }
+ if (m_type != other.m_type) {
+ return false;
+ }
+ if (m_units == null) {
+ if (other.m_units != null) {
+ return false;
+ }
+ } else if (!m_units.equals(other.m_units)) {
+ return false;
+ }
+ if (m_value == null) {
+ if (other.m_value != null) {
+ return false;
+ }
+ } else if (!m_value.equals(other.m_value)) {
+ return false;
+ }
+ return true;
+ }
+
+
+ /**
+ * The {@link ScriptParameterType} enum represents the value type.
+ */
+ public enum ScriptParameterType {
+ /**
+ * String
+ */
+ STRING,
+
+ /**
+ * Integers, longs, floats, etc.
+ */
+ NUMERIC,
+
+ /**
+ * A percent value, expessed as a float.
+ */
+ PERCENT
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/ambari/blob/6727c1dc/ambari-server/src/main/resources/alerts.json
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/alerts.json b/ambari-server/src/main/resources/alerts.json
index 0d19f42..a7cef7f 100644
--- a/ambari-server/src/main/resources/alerts.json
+++ b/ambari-server/src/main/resources/alerts.json
@@ -38,7 +38,36 @@
"enabled": true,
"source": {
"type": "SCRIPT",
- "path": "alert_disk_space.py"
+ "path": "alert_disk_space.py",
+ "parameters": [
+ {
+ "name": "minimum.free.space",
+ "display_name": "Minimum Free Space",
+ "value": 5000000000,
+ "type": "NUMERIC",
+ "description": "The overall amount of free disk space left before an alert is triggered.",
+ "units": "bytes",
+ "threshold": "WARNING"
+ },
+ {
+ "name": "percent.used.space.warning.threshold",
+ "display_name": "Warning",
+ "value": 0.5,
+ "type": "PERCENT",
+ "description": "The percent of disk space consumed before a warning is triggered.",
+ "units": "%",
+ "threshold": "WARNING"
+ },
+ {
+ "name": "percent.free.space.critical.threshold",
+ "display_name": "Critical",
+ "value": 0.8,
+ "type": "PERCENT",
+ "description": "The percent of disk space consumed before a critical alert is triggered.",
+ "units": "%",
+ "threshold": "CRITICAL"
+ }
+ ]
}
}
]
http://git-wip-us.apache.org/repos/asf/ambari/blob/6727c1dc/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/alerts/alert_ambari_metrics_monitor.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/alerts/alert_ambari_metrics_monitor.py b/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/alerts/alert_ambari_metrics_monitor.py
index 04a2e01..fa44a7f 100644
--- a/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/alerts/alert_ambari_metrics_monitor.py
+++ b/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/alerts/alert_ambari_metrics_monitor.py
@@ -73,20 +73,21 @@ def is_monitor_process_live(pid_file):
return live
-def execute(parameters=None, host_name=None):
+def execute(configurations={}, parameters={}, host_name=None):
"""
Returns a tuple containing the result code and a pre-formatted result label
Keyword arguments:
- parameters (dictionary): a mapping of parameter key to value
+ configurations (dictionary): a mapping of configuration key to value
+ parameters (dictionary): a mapping of script parameter key to value
host_name (string): the name of this host where the alert is running
"""
- if parameters is None:
- return (RESULT_CODE_UNKNOWN, ['There were no parameters supplied to the script.'])
+ if configurations is None:
+ return (RESULT_CODE_UNKNOWN, ['There were no configurations supplied to the script.'])
- if set([AMS_MONITOR_PID_DIR]).issubset(parameters):
- AMS_MONITOR_PID_PATH = os.path.join(parameters[AMS_MONITOR_PID_DIR], 'ambari-metrics-monitor.pid')
+ if set([AMS_MONITOR_PID_DIR]).issubset(configurations):
+ AMS_MONITOR_PID_PATH = os.path.join(configurations[AMS_MONITOR_PID_DIR], 'ambari-metrics-monitor.pid')
else:
return (RESULT_CODE_UNKNOWN, ['The ams_monitor_pid_dir is a required parameter.'])
http://git-wip-us.apache.org/repos/asf/ambari/blob/6727c1dc/ambari-server/src/main/resources/common-services/FLUME/1.4.0.2.0/alerts.json
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/FLUME/1.4.0.2.0/alerts.json b/ambari-server/src/main/resources/common-services/FLUME/1.4.0.2.0/alerts.json
index e23e77b..80ebb91 100644
--- a/ambari-server/src/main/resources/common-services/FLUME/1.4.0.2.0/alerts.json
+++ b/ambari-server/src/main/resources/common-services/FLUME/1.4.0.2.0/alerts.json
@@ -10,7 +10,16 @@
"scope": "ANY",
"source": {
"type": "SCRIPT",
- "path": "FLUME/1.4.0.2.0/package/alerts/alert_flume_agent_status.py"
+ "path": "FLUME/1.4.0.2.0/package/alerts/alert_flume_agent_status.py",
+ "parameters": [
+ {
+ "name": "run.directory",
+ "display_name": "Run Directory",
+ "value": "/var/run/flume",
+ "type": "STRING",
+ "description": "The directory where flume agent processes will place their PID files."
+ }
+ ]
}
}
]
http://git-wip-us.apache.org/repos/asf/ambari/blob/6727c1dc/ambari-server/src/main/resources/common-services/FLUME/1.4.0.2.0/package/alerts/alert_flume_agent_status.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/FLUME/1.4.0.2.0/package/alerts/alert_flume_agent_status.py b/ambari-server/src/main/resources/common-services/FLUME/1.4.0.2.0/package/alerts/alert_flume_agent_status.py
index b183bbc..5236d37 100644
--- a/ambari-server/src/main/resources/common-services/FLUME/1.4.0.2.0/package/alerts/alert_flume_agent_status.py
+++ b/ambari-server/src/main/resources/common-services/FLUME/1.4.0.2.0/package/alerts/alert_flume_agent_status.py
@@ -29,7 +29,8 @@ RESULT_CODE_UNKNOWN = 'UNKNOWN'
FLUME_CONF_DIR_KEY = '{{flume-env/flume_conf_dir}}'
-FLUME_RUN_DIR = '/var/run/flume'
+FLUME_RUN_DIR_KEY = "run.directory"
+FLUME_RUN_DIR_DEFAULT = '/var/run/flume'
def get_tokens():
"""
@@ -39,21 +40,22 @@ def get_tokens():
return (FLUME_CONF_DIR_KEY,)
-def execute(parameters=None, host_name=None):
+def execute(configurations={}, parameters={}, host_name=None):
"""
Returns a tuple containing the result code and a pre-formatted result label
Keyword arguments:
- parameters (dictionary): a mapping of parameter key to value
+ configurations (dictionary): a mapping of configuration key to value
+ parameters (dictionary): a mapping of script parameter key to value
host_name (string): the name of this host where the alert is running
"""
- if parameters is None:
- return (RESULT_CODE_UNKNOWN, ['There were no parameters supplied to the script.'])
+ if configurations is None:
+ return (RESULT_CODE_UNKNOWN, ['There were no configurations supplied to the script.'])
flume_conf_directory = None
- if FLUME_CONF_DIR_KEY in parameters:
- flume_conf_directory = parameters[FLUME_CONF_DIR_KEY]
+ if FLUME_CONF_DIR_KEY in configurations:
+ flume_conf_directory = configurations[FLUME_CONF_DIR_KEY]
if flume_conf_directory is None:
return (RESULT_CODE_UNKNOWN, ['The Flume configuration directory is a required parameter.'])
@@ -61,7 +63,12 @@ def execute(parameters=None, host_name=None):
if host_name is None:
host_name = socket.getfqdn()
- processes = get_flume_status(flume_conf_directory, FLUME_RUN_DIR)
+ # parse script arguments
+ flume_run_directory = FLUME_RUN_DIR_DEFAULT
+ if FLUME_RUN_DIR_KEY in parameters:
+ flume_run_directory = parameters[FLUME_RUN_DIR_KEY]
+
+ processes = get_flume_status(flume_conf_directory, flume_run_directory)
expected_agents = find_expected_agent_names(flume_conf_directory)
alert_label = ''
http://git-wip-us.apache.org/repos/asf/ambari/blob/6727c1dc/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/alerts.json
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/alerts.json b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/alerts.json
index b67c306..9502e14 100644
--- a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/alerts.json
+++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/alerts.json
@@ -387,7 +387,36 @@
"enabled": true,
"source": {
"type": "SCRIPT",
- "path": "HDFS/2.1.0.2.0/package/alerts/alert_checkpoint_time.py"
+ "path": "HDFS/2.1.0.2.0/package/alerts/alert_checkpoint_time.py",
+ "parameters": [
+ {
+ "name": "connection.timeout",
+ "display_name": "Connection Timeout",
+ "value": 5.0,
+ "type": "NUMERIC",
+ "description": "The maximum time before this alert is considered to be CRITICAL",
+ "units": "seconds",
+ "threshold": "CRITICAL"
+ },
+ {
+ "name": "checkpoint.time.warning.threshold",
+ "display_name": "Checkpoint Warning",
+ "value": 2.0,
+ "type": "PERCENT",
+ "description": "The percentage of the last checkpoint time greater than the interval in order to trigger a warning alert.",
+ "units": "%",
+ "threshold": "WARNING"
+ },
+ {
+ "name": "checkpoint.time.critical.threshold",
+ "display_name": "Checkpoint Critical",
+ "value": 2.0,
+ "type": "PERCENT",
+ "description": "The percentage of the last checkpoint time greater than the interval in order to trigger a critical alert.",
+ "units": "%",
+ "threshold": "CRITICAL"
+ }
+ ]
}
},
{
@@ -400,7 +429,18 @@
"ignore_host": true,
"source": {
"type": "SCRIPT",
- "path": "HDFS/2.1.0.2.0/package/alerts/alert_ha_namenode_health.py"
+ "path": "HDFS/2.1.0.2.0/package/alerts/alert_ha_namenode_health.py",
+ "parameters": [
+ {
+ "name": "connection.timeout",
+ "display_name": "Connection Timeout",
+ "value": 5.0,
+ "type": "NUMERIC",
+ "description": "The maximum time before this alert is considered to be CRITICAL",
+ "units": "seconds",
+ "threshold": "CRITICAL"
+ }
+ ]
}
}
],
http://git-wip-us.apache.org/repos/asf/ambari/blob/6727c1dc/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_checkpoint_time.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_checkpoint_time.py b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_checkpoint_time.py
index 2455d3b..b44c2d4 100644
--- a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_checkpoint_time.py
+++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_checkpoint_time.py
@@ -30,13 +30,17 @@ NN_HTTP_POLICY_KEY = '{{hdfs-site/dfs.http.policy}}'
NN_CHECKPOINT_TX_KEY = '{{hdfs-site/dfs.namenode.checkpoint.txns}}'
NN_CHECKPOINT_PERIOD_KEY = '{{hdfs-site/dfs.namenode.checkpoint.period}}'
-PERCENT_WARNING = 200
-PERCENT_CRITICAL = 200
+PERCENT_WARNING_KEY = 'checkpoint.time.warning.threshold'
+PERCENT_WARNING_DEFAULT = 200
+
+PERCENT_CRITICAL_KEY = 'checkpoint.time.critical.threshold'
+PERCENT_CRITICAL_DEFAULT = 200
CHECKPOINT_TX_DEFAULT = 1000000
CHECKPOINT_PERIOD_DEFAULT = 21600
-CONNECTION_TIMEOUT = 5.0
+CONNECTION_TIMEOUT_KEY = 'connection.timeout'
+CONNECTION_TIMEOUT_DEFAULT = 5.0
def get_tokens():
"""
@@ -47,43 +51,55 @@ def get_tokens():
NN_CHECKPOINT_TX_KEY, NN_CHECKPOINT_PERIOD_KEY)
-def execute(parameters=None, host_name=None):
+def execute(configurations={}, parameters={}, host_name=None):
"""
Returns a tuple containing the result code and a pre-formatted result label
Keyword arguments:
- parameters (dictionary): a mapping of parameter key to value
+ configurations (dictionary): a mapping of configuration key to value
+ parameters (dictionary): a mapping of script parameter key to value
host_name (string): the name of this host where the alert is running
"""
- if parameters is None:
- return (('UNKNOWN', ['There were no parameters supplied to the script.']))
+ if configurations is None:
+ return (('UNKNOWN', ['There were no configurations supplied to the script.']))
uri = None
scheme = 'http'
http_uri = None
https_uri = None
http_policy = 'HTTP_ONLY'
- percent_warning = PERCENT_WARNING
- percent_critical = PERCENT_CRITICAL
checkpoint_tx = CHECKPOINT_TX_DEFAULT
checkpoint_period = CHECKPOINT_PERIOD_DEFAULT
- if NN_HTTP_ADDRESS_KEY in parameters:
- http_uri = parameters[NN_HTTP_ADDRESS_KEY]
+ if NN_HTTP_ADDRESS_KEY in configurations:
+ http_uri = configurations[NN_HTTP_ADDRESS_KEY]
- if NN_HTTPS_ADDRESS_KEY in parameters:
- https_uri = parameters[NN_HTTPS_ADDRESS_KEY]
+ if NN_HTTPS_ADDRESS_KEY in configurations:
+ https_uri = configurations[NN_HTTPS_ADDRESS_KEY]
- if NN_HTTP_POLICY_KEY in parameters:
- http_policy = parameters[NN_HTTP_POLICY_KEY]
+ if NN_HTTP_POLICY_KEY in configurations:
+ http_policy = configurations[NN_HTTP_POLICY_KEY]
- if NN_CHECKPOINT_TX_KEY in parameters:
- checkpoint_tx = parameters[NN_CHECKPOINT_TX_KEY]
+ if NN_CHECKPOINT_TX_KEY in configurations:
+ checkpoint_tx = configurations[NN_CHECKPOINT_TX_KEY]
+
+ if NN_CHECKPOINT_PERIOD_KEY in configurations:
+ checkpoint_period = configurations[NN_CHECKPOINT_PERIOD_KEY]
+
+ # parse script arguments
+ connection_timeout = CONNECTION_TIMEOUT_DEFAULT
+ if CONNECTION_TIMEOUT_KEY in parameters:
+ connection_timeout = float(parameters[CONNECTION_TIMEOUT_KEY])
+
+ percent_warning = PERCENT_WARNING_DEFAULT
+ if PERCENT_WARNING_KEY in parameters:
+ percent_warning = float(parameters[PERCENT_WARNING_KEY]) * 100
+
+ percent_critical = PERCENT_CRITICAL_DEFAULT
+ if PERCENT_CRITICAL_KEY in parameters:
+ percent_critical = float(parameters[PERCENT_CRITICAL_KEY]) * 100
- if NN_CHECKPOINT_PERIOD_KEY in parameters:
- checkpoint_period = parameters[NN_CHECKPOINT_PERIOD_KEY]
-
# determine the right URI and whether to use SSL
uri = http_uri
if http_policy == 'HTTPS_ONLY':
@@ -102,8 +118,12 @@ def execute(parameters=None, host_name=None):
result_code = "OK"
try:
- last_checkpoint_time = int(get_value_from_jmx(last_checkpoint_time_qry,"LastCheckpointTime"))
- journal_transaction_info = get_value_from_jmx(journal_transaction_info_qry,"JournalTransactionInfo")
+ last_checkpoint_time = int(get_value_from_jmx(last_checkpoint_time_qry,
+ "LastCheckpointTime", connection_timeout))
+
+ journal_transaction_info = get_value_from_jmx(journal_transaction_info_qry,
+ "JournalTransactionInfo", connection_timeout)
+
journal_transaction_info_dict = json.loads(journal_transaction_info)
last_tx = int(journal_transaction_info_dict['LastAppliedOrWrittenTxId'])
@@ -131,11 +151,11 @@ def get_time(delta):
return {'h':h, 'm':m}
-def get_value_from_jmx(query, jmx_property):
+def get_value_from_jmx(query, jmx_property, connection_timeout):
response = None
try:
- response = urllib2.urlopen(query, timeout=CONNECTION_TIMEOUT)
+ response = urllib2.urlopen(query, timeout=connection_timeout)
data = response.read()
data_dict = json.loads(data)
http://git-wip-us.apache.org/repos/asf/ambari/blob/6727c1dc/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_ha_namenode_health.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_ha_namenode_health.py b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_ha_namenode_health.py
index 2066d46..0d0e473 100644
--- a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_ha_namenode_health.py
+++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_ha_namenode_health.py
@@ -35,7 +35,8 @@ NN_HTTP_ADDRESS_KEY = '{{hdfs-site/dfs.namenode.http-address}}'
NN_HTTPS_ADDRESS_KEY = '{{hdfs-site/dfs.namenode.https-address}}'
DFS_POLICY_KEY = '{{hdfs-site/dfs.http.policy}}'
-CONNECTION_TIMEOUT = 5.0
+CONNECTION_TIMEOUT_KEY = 'connection.timeout'
+CONNECTION_TIMEOUT_DEFAULT = 5.0
def get_tokens():
"""
@@ -46,34 +47,41 @@ def get_tokens():
NN_HTTPS_ADDRESS_KEY, DFS_POLICY_KEY)
-def execute(parameters=None, host_name=None):
+def execute(configurations={}, parameters={}, host_name=None):
"""
Returns a tuple containing the result code and a pre-formatted result label
Keyword arguments:
- parameters (dictionary): a mapping of parameter key to value
+ configurations (dictionary): a mapping of configuration key to value
+ parameters (dictionary): a mapping of script parameter key to value
host_name (string): the name of this host where the alert is running
"""
- if parameters is None:
- return (RESULT_STATE_UNKNOWN, ['There were no parameters supplied to the script.'])
+ if configurations is None:
+ return (RESULT_STATE_UNKNOWN, ['There were no configurations supplied to the script.'])
# if not in HA mode, then SKIP
- if not NAMESERVICE_KEY in parameters:
+ if not NAMESERVICE_KEY in configurations:
return (RESULT_STATE_SKIPPED, ['NameNode HA is not enabled'])
# hdfs-site is required
- if not HDFS_SITE_KEY in parameters:
+ if not HDFS_SITE_KEY in configurations:
return (RESULT_STATE_UNKNOWN, ['{0} is a required parameter for the script'.format(HDFS_SITE_KEY)])
+ # parse script arguments
+ connection_timeout = CONNECTION_TIMEOUT_DEFAULT
+ if CONNECTION_TIMEOUT_KEY in parameters:
+ connection_timeout = float(parameters[CONNECTION_TIMEOUT_KEY])
+
+
# determine whether or not SSL is enabled
is_ssl_enabled = False
- if DFS_POLICY_KEY in parameters:
- dfs_policy = parameters[DFS_POLICY_KEY]
+ if DFS_POLICY_KEY in configurations:
+ dfs_policy = configurations[DFS_POLICY_KEY]
if dfs_policy == "HTTPS_ONLY":
is_ssl_enabled = True
- name_service = parameters[NAMESERVICE_KEY]
- hdfs_site = parameters[HDFS_SITE_KEY]
+ name_service = configurations[NAMESERVICE_KEY]
+ hdfs_site = configurations[HDFS_SITE_KEY]
# look for dfs.ha.namenodes.foo
nn_unique_ids_key = 'dfs.ha.namenodes.' + name_service
@@ -105,7 +113,7 @@ def execute(parameters=None, host_name=None):
try:
jmx_uri = jmx_uri_fragment.format(value)
- state = get_value_from_jmx(jmx_uri,'State')
+ state = get_value_from_jmx(jmx_uri, 'State', connection_timeout)
if state == HDFS_NN_STATE_ACTIVE:
active_namenodes.append(value)
@@ -161,11 +169,11 @@ def execute(parameters=None, host_name=None):
return (RESULT_STATE_SKIPPED, ['Another host will report this alert'])
-def get_value_from_jmx(query, jmx_property):
+def get_value_from_jmx(query, jmx_property, connection_timeout):
response = None
try:
- response = urllib2.urlopen(query, timeout=CONNECTION_TIMEOUT)
+ response = urllib2.urlopen(query, timeout=connection_timeout)
data = response.read()
data_dict = json.loads(data)
http://git-wip-us.apache.org/repos/asf/ambari/blob/6727c1dc/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/alerts.json
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/alerts.json b/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/alerts.json
index 9170b8f..2bdc532 100644
--- a/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/alerts.json
+++ b/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/alerts.json
@@ -11,7 +11,30 @@
"enabled": true,
"source": {
"type": "SCRIPT",
- "path": "HIVE/0.12.0.2.0/package/alerts/alert_hive_metastore.py"
+ "path": "HIVE/0.12.0.2.0/package/alerts/alert_hive_metastore.py",
+ "parameters": [
+ {
+ "name": "default.smoke.user",
+ "display_name": "Default Smoke User",
+ "value": "ambari-qa",
+ "type": "STRING",
+ "description": "The user that will run the Hive commands if not specified in cluster-env/smokeuser"
+ },
+ {
+ "name": "default.smoke.principal",
+ "display_name": "Default Smoke Principal",
+ "value": "ambari-qa@EXAMPLE.COM",
+ "type": "STRING",
+ "description": "The principal to use when retrieving the kerberos ticket if not specified in cluster-env/smokeuser_principal_name"
+ },
+ {
+ "name": "default.smoke.keytab",
+ "display_name": "Default Smoke Keytab",
+ "value": "/etc/security/keytabs/smokeuser.headless.keytab",
+ "type": "STRING",
+ "description": "The keytab to use when retrieving the kerberos ticket if not specified in cluster-env/smokeuser_keytab"
+ }
+ ]
}
}
],
@@ -25,7 +48,30 @@
"enabled": true,
"source": {
"type": "SCRIPT",
- "path": "HIVE/0.12.0.2.0/package/alerts/alert_hive_thrift_port.py"
+ "path": "HIVE/0.12.0.2.0/package/alerts/alert_hive_thrift_port.py",
+ "parameters": [
+ {
+ "name": "default.smoke.user",
+ "display_name": "Default Smoke User",
+ "value": "ambari-qa",
+ "type": "STRING",
+ "description": "The user that will run the Hive commands if not specified in cluster-env/smokeuser"
+ },
+ {
+ "name": "default.smoke.principal",
+ "display_name": "Default Smoke Principal",
+ "value": "ambari-qa@EXAMPLE.COM",
+ "type": "STRING",
+ "description": "The principal to use when retrieving the kerberos ticket if not specified in cluster-env/smokeuser_principal_name"
+ },
+ {
+ "name": "default.smoke.keytab",
+ "display_name": "Default Smoke Keytab",
+ "value": "/etc/security/keytabs/smokeuser.headless.keytab",
+ "type": "STRING",
+ "description": "The keytab to use when retrieving the kerberos ticket if not specified in cluster-env/smokeuser_keytab"
+ }
+ ]
}
}
],
@@ -39,7 +85,18 @@
"enabled": true,
"source": {
"type": "SCRIPT",
- "path": "HIVE/0.12.0.2.0/package/alerts/alert_webhcat_server.py"
+ "path": "HIVE/0.12.0.2.0/package/alerts/alert_webhcat_server.py",
+ "parameters": [
+ {
+ "name": "connection.timeout",
+ "display_name": "Connection Timeout",
+ "value": 5.0,
+ "type": "NUMERIC",
+ "description": "The maximum time before this alert is considered to be CRITICAL",
+ "units": "seconds",
+ "threshold": "CRITICAL"
+ }
+ ]
}
}
]
http://git-wip-us.apache.org/repos/asf/ambari/blob/6727c1dc/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_hive_metastore.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_hive_metastore.py b/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_hive_metastore.py
index 9f871be..a7ed54f 100644
--- a/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_hive_metastore.py
+++ b/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_hive_metastore.py
@@ -33,15 +33,20 @@ SMOKEUSER_KEYTAB_KEY = '{{cluster-env/smokeuser_keytab}}'
SMOKEUSER_PRINCIPAL_KEY = '{{cluster-env/smokeuser_principal_name}}'
SMOKEUSER_KEY = '{{cluster-env/smokeuser}}'
HIVE_METASTORE_URIS_KEY = '{{hive-site/hive.metastore.uris}}'
+
# The configured Kerberos executable search paths, if any
KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY = '{{kerberos-env/executable_search_paths}}'
-PERCENT_WARNING = 200
-PERCENT_CRITICAL = 200
-
-
+# default keytab location
+SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY = 'default.smoke.keytab'
SMOKEUSER_KEYTAB_DEFAULT = '/etc/security/keytabs/smokeuser.headless.keytab'
+
+# default smoke principal
+SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY = 'default.smoke.principal'
SMOKEUSER_PRINCIPAL_DEFAULT = 'ambari-qa@EXAMPLE.COM'
+
+# default smoke user
+SMOKEUSER_SCRIPT_PARAM_KEY = 'default.smoke.user'
SMOKEUSER_DEFAULT = 'ambari-qa'
def get_tokens():
@@ -53,46 +58,61 @@ def get_tokens():
HIVE_METASTORE_URIS_KEY, SMOKEUSER_KEY, KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY)
-def execute(parameters=None, host_name=None):
+def execute(configurations={}, parameters={}, host_name=None):
"""
Returns a tuple containing the result code and a pre-formatted result label
Keyword arguments:
- parameters (dictionary): a mapping of parameter key to value
+ configurations (dictionary): a mapping of configuration key to value
+ parameters (dictionary): a mapping of script parameter key to value
host_name (string): the name of this host where the alert is running
"""
- if parameters is None:
- return (('UNKNOWN', ['There were no parameters supplied to the script.']))
+ if configurations is None:
+ return (('UNKNOWN', ['There were no configurations supplied to the script.']))
- if not HIVE_METASTORE_URIS_KEY in parameters:
+ if not HIVE_METASTORE_URIS_KEY in configurations:
return (('UNKNOWN', ['Hive metastore uris were not supplied to the script.']))
- metastore_uris = parameters[HIVE_METASTORE_URIS_KEY].split(',')
+
+ metastore_uris = configurations[HIVE_METASTORE_URIS_KEY].split(',')
security_enabled = False
- if SECURITY_ENABLED_KEY in parameters:
- security_enabled = str(parameters[SECURITY_ENABLED_KEY]).upper() == 'TRUE'
+ if SECURITY_ENABLED_KEY in configurations:
+ security_enabled = str(configurations[SECURITY_ENABLED_KEY]).upper() == 'TRUE'
+ # defaults
+ smokeuser_keytab = SMOKEUSER_KEYTAB_DEFAULT
smokeuser_principal = SMOKEUSER_PRINCIPAL_DEFAULT
- if SMOKEUSER_PRINCIPAL_KEY in parameters:
- smokeuser_principal = parameters[SMOKEUSER_PRINCIPAL_KEY]
-
smokeuser = SMOKEUSER_DEFAULT
- if SMOKEUSER_KEY in parameters:
- smokeuser = parameters[SMOKEUSER_KEY]
+
+ # check script params
+ if SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY in parameters:
+ smokeuser_principal = parameters[SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY]
+
+ if SMOKEUSER_SCRIPT_PARAM_KEY in parameters:
+ smokeuser = parameters[SMOKEUSER_SCRIPT_PARAM_KEY]
+
+ if SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY in parameters:
+ smokeuser_keytab = parameters[SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY]
+
+
+ # check configurations last as they should always take precedence
+ if SMOKEUSER_PRINCIPAL_KEY in configurations:
+ smokeuser_principal = configurations[SMOKEUSER_PRINCIPAL_KEY]
+
+ if SMOKEUSER_KEY in configurations:
+ smokeuser = configurations[SMOKEUSER_KEY]
result_code = None
try:
if security_enabled:
- smokeuser_keytab = SMOKEUSER_KEYTAB_DEFAULT
-
- if SMOKEUSER_KEYTAB_KEY in parameters:
- smokeuser_keytab = parameters[SMOKEUSER_KEYTAB_KEY]
+ if SMOKEUSER_KEYTAB_KEY in configurations:
+ smokeuser_keytab = configurations[SMOKEUSER_KEYTAB_KEY]
# Get the configured Kerberos executable search paths, if any
- if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in parameters:
- kerberos_executable_search_paths = parameters[KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY]
+ if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations:
+ kerberos_executable_search_paths = configurations[KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY]
else:
kerberos_executable_search_paths = None
http://git-wip-us.apache.org/repos/asf/ambari/blob/6727c1dc/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_hive_thrift_port.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_hive_thrift_port.py b/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_hive_thrift_port.py
index 3e32db9..35217fc 100644
--- a/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_hive_thrift_port.py
+++ b/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_hive_thrift_port.py
@@ -36,18 +36,25 @@ HIVE_SERVER_PRINCIPAL_KEY = '{{hive-site/hive.server2.authentication.kerberos.pr
SMOKEUSER_KEYTAB_KEY = '{{cluster-env/smokeuser_keytab}}'
SMOKEUSER_PRINCIPAL_KEY = '{{cluster-env/smokeuser_principal_name}}'
SMOKEUSER_KEY = '{{cluster-env/smokeuser}}'
+
# The configured Kerberos executable search paths, if any
KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY = '{{kerberos-env/executable_search_paths}}'
-PERCENT_WARNING = 200
-PERCENT_CRITICAL = 200
-
THRIFT_PORT_DEFAULT = 10000
HIVE_SERVER_TRANSPORT_MODE_DEFAULT = 'binary'
HIVE_SERVER_PRINCIPAL_DEFAULT = 'hive/_HOST@EXAMPLE.COM'
HIVE_SERVER2_AUTHENTICATION_DEFAULT = 'NOSASL'
+
+# default keytab location
+SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY = 'default.smoke.keytab'
SMOKEUSER_KEYTAB_DEFAULT = '/etc/security/keytabs/smokeuser.headless.keytab'
+
+# default smoke principal
+SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY = 'default.smoke.principal'
SMOKEUSER_PRINCIPAL_DEFAULT = 'ambari-qa@EXAMPLE.COM'
+
+# default smoke user
+SMOKEUSER_SCRIPT_PARAM_KEY = 'default.smoke.user'
SMOKEUSER_DEFAULT = 'ambari-qa'
def get_tokens():
@@ -61,59 +68,73 @@ def get_tokens():
HIVE_SERVER_TRANSPORT_MODE_KEY,KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY)
-def execute(parameters=None, host_name=None):
+def execute(configurations={}, parameters={}, host_name=None):
"""
Returns a tuple containing the result code and a pre-formatted result label
Keyword arguments:
- parameters (dictionary): a mapping of parameter key to value
+ configurations (dictionary): a mapping of configuration key to value
+ parameters (dictionary): a mapping of script parameter key to value
host_name (string): the name of this host where the alert is running
"""
- if parameters is None:
- return ('UNKNOWN', ['There were no parameters supplied to the script.'])
+ if configurations is None:
+ return ('UNKNOWN', ['There were no configurations supplied to the script.'])
transport_mode = HIVE_SERVER_TRANSPORT_MODE_DEFAULT
- if HIVE_SERVER_TRANSPORT_MODE_KEY in parameters:
- transport_mode = parameters[HIVE_SERVER_TRANSPORT_MODE_KEY]
+ if HIVE_SERVER_TRANSPORT_MODE_KEY in configurations:
+ transport_mode = configurations[HIVE_SERVER_TRANSPORT_MODE_KEY]
port = THRIFT_PORT_DEFAULT
- if transport_mode.lower() == 'binary' and HIVE_SERVER_THRIFT_PORT_KEY in parameters:
- port = int(parameters[HIVE_SERVER_THRIFT_PORT_KEY])
- elif transport_mode.lower() == 'http' and HIVE_SERVER_THRIFT_HTTP_PORT_KEY in parameters:
- port = int(parameters[HIVE_SERVER_THRIFT_HTTP_PORT_KEY])
+ if transport_mode.lower() == 'binary' and HIVE_SERVER_THRIFT_PORT_KEY in configurations:
+ port = int(configurations[HIVE_SERVER_THRIFT_PORT_KEY])
+ elif transport_mode.lower() == 'http' and HIVE_SERVER_THRIFT_HTTP_PORT_KEY in configurations:
+ port = int(configurations[HIVE_SERVER_THRIFT_HTTP_PORT_KEY])
security_enabled = False
- if SECURITY_ENABLED_KEY in parameters:
- security_enabled = str(parameters[SECURITY_ENABLED_KEY]).upper() == 'TRUE'
+ if SECURITY_ENABLED_KEY in configurations:
+ security_enabled = str(configurations[SECURITY_ENABLED_KEY]).upper() == 'TRUE'
hive_server2_authentication = HIVE_SERVER2_AUTHENTICATION_DEFAULT
- if HIVE_SERVER2_AUTHENTICATION_KEY in parameters:
- hive_server2_authentication = parameters[HIVE_SERVER2_AUTHENTICATION_KEY]
+ if HIVE_SERVER2_AUTHENTICATION_KEY in configurations:
+ hive_server2_authentication = configurations[HIVE_SERVER2_AUTHENTICATION_KEY]
+ # defaults
+ smokeuser_keytab = SMOKEUSER_KEYTAB_DEFAULT
smokeuser_principal = SMOKEUSER_PRINCIPAL_DEFAULT
- if SMOKEUSER_PRINCIPAL_KEY in parameters:
- smokeuser_principal = parameters[SMOKEUSER_PRINCIPAL_KEY]
-
smokeuser = SMOKEUSER_DEFAULT
- if SMOKEUSER_KEY in parameters:
- smokeuser = parameters[SMOKEUSER_KEY]
+
+ # check script params
+ if SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY in parameters:
+ smokeuser_principal = parameters[SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY]
+
+ if SMOKEUSER_SCRIPT_PARAM_KEY in parameters:
+ smokeuser = parameters[SMOKEUSER_SCRIPT_PARAM_KEY]
+
+ if SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY in parameters:
+ smokeuser_keytab = parameters[SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY]
+
+
+ # check configurations last as they should always take precedence
+ if SMOKEUSER_PRINCIPAL_KEY in configurations:
+ smokeuser_principal = configurations[SMOKEUSER_PRINCIPAL_KEY]
+
+ if SMOKEUSER_KEY in configurations:
+ smokeuser = configurations[SMOKEUSER_KEY]
result_code = None
if security_enabled:
hive_server_principal = HIVE_SERVER_PRINCIPAL_DEFAULT
- if HIVE_SERVER_PRINCIPAL_KEY in parameters:
- hive_server_principal = parameters[HIVE_SERVER_PRINCIPAL_KEY]
-
- smokeuser_keytab = SMOKEUSER_KEYTAB_DEFAULT
+ if HIVE_SERVER_PRINCIPAL_KEY in configurations:
+ hive_server_principal = configurations[HIVE_SERVER_PRINCIPAL_KEY]
- if SMOKEUSER_KEYTAB_KEY in parameters:
- smokeuser_keytab = parameters[SMOKEUSER_KEYTAB_KEY]
+ if SMOKEUSER_KEYTAB_KEY in configurations:
+ smokeuser_keytab = configurations[SMOKEUSER_KEYTAB_KEY]
# Get the configured Kerberos executable search paths, if any
- if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in parameters:
- kerberos_executable_search_paths = parameters[KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY]
+ if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations:
+ kerberos_executable_search_paths = configurations[KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY]
else:
kerberos_executable_search_paths = None
http://git-wip-us.apache.org/repos/asf/ambari/blob/6727c1dc/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_webhcat_server.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_webhcat_server.py b/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_webhcat_server.py
index dd20be4..7ee375e 100644
--- a/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_webhcat_server.py
+++ b/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_webhcat_server.py
@@ -53,8 +53,10 @@ KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY = '{{kerberos-env/executable_search_paths}}
WEBHCAT_OK_RESPONSE = 'ok'
WEBHCAT_PORT_DEFAULT = 50111
-CURL_CONNECTION_TIMEOUT = '5'
-CONNECTION_TIMEOUT = 5.0
+CONNECTION_TIMEOUT_KEY = 'connection.timeout'
+CONNECTION_TIMEOUT_DEFAULT = 5.0
+CURL_CONNECTION_TIMEOUT_DEFAULT = str(int(CONNECTION_TIMEOUT_DEFAULT))
+
def get_tokens():
"""
@@ -64,27 +66,36 @@ def get_tokens():
return (TEMPLETON_PORT_KEY, SECURITY_ENABLED_KEY, WEBHCAT_KEYTAB_KEY, WEBHCAT_PRINCIPAL_KEY, KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY)
-def execute(parameters=None, host_name=None):
+def execute(configurations={}, parameters={}, host_name=None):
"""
Returns a tuple containing the result code and a pre-formatted result label
Keyword arguments:
- parameters (dictionary): a mapping of parameter key to value
+ configurations (dictionary): a mapping of configuration key to value
+ parameters (dictionary): a mapping of script parameter key to value
host_name (string): the name of this host where the alert is running
"""
result_code = RESULT_CODE_UNKNOWN
- if parameters is None:
- return (result_code, ['There were no parameters supplied to the script.'])
+ if configurations is None:
+ return (result_code, ['There were no configurations supplied to the script.'])
webhcat_port = WEBHCAT_PORT_DEFAULT
- if TEMPLETON_PORT_KEY in parameters:
- webhcat_port = int(parameters[TEMPLETON_PORT_KEY])
+ if TEMPLETON_PORT_KEY in configurations:
+ webhcat_port = int(configurations[TEMPLETON_PORT_KEY])
security_enabled = False
- if SECURITY_ENABLED_KEY in parameters:
- security_enabled = parameters[SECURITY_ENABLED_KEY].lower() == 'true'
+ if SECURITY_ENABLED_KEY in configurations:
+ security_enabled = configurations[SECURITY_ENABLED_KEY].lower() == 'true'
+
+ # parse script arguments
+ connection_timeout = CONNECTION_TIMEOUT_DEFAULT
+ curl_connection_timeout = CURL_CONNECTION_TIMEOUT_DEFAULT
+ if CONNECTION_TIMEOUT_KEY in parameters:
+ connection_timeout = float(parameters[CONNECTION_TIMEOUT_KEY])
+ curl_connection_timeout = str(int(connection_timeout))
+
# the alert will always run on the webhcat host
if host_name is None:
@@ -98,12 +109,12 @@ def execute(parameters=None, host_name=None):
json_response = {}
if security_enabled:
- if WEBHCAT_KEYTAB_KEY not in parameters or WEBHCAT_PRINCIPAL_KEY not in parameters:
- return (RESULT_CODE_UNKNOWN, [str(parameters)])
+ if WEBHCAT_KEYTAB_KEY not in configurations or WEBHCAT_PRINCIPAL_KEY not in configurations:
+ return (RESULT_CODE_UNKNOWN, [str(configurations)])
try:
- webhcat_keytab = parameters[WEBHCAT_KEYTAB_KEY]
- webhcat_principal = parameters[WEBHCAT_PRINCIPAL_KEY]
+ webhcat_keytab = configurations[WEBHCAT_KEYTAB_KEY]
+ webhcat_principal = configurations[WEBHCAT_PRINCIPAL_KEY]
# substitute _HOST in kerberos principal with actual fqdn
webhcat_principal = webhcat_principal.replace('_HOST', host_name)
@@ -115,8 +126,8 @@ def execute(parameters=None, host_name=None):
kerberos_env = {'KRB5CCNAME': ccache_file}
# Get the configured Kerberos executable search paths, if any
- if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in parameters:
- kerberos_executable_search_paths = parameters[KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY]
+ if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations:
+ kerberos_executable_search_paths = configurations[KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY]
else:
kerberos_executable_search_paths = None
@@ -136,7 +147,7 @@ def execute(parameters=None, host_name=None):
# make a single curl call to get just the http code
curl = subprocess.Popen(['curl', '--negotiate', '-u', ':', '-sL', '-w',
- '%{http_code}', '--connect-timeout', CURL_CONNECTION_TIMEOUT,
+ '%{http_code}', '--connect-timeout', curl_connection_timeout,
'-o', '/dev/null', query_url], stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=kerberos_env)
stdout, stderr = curl.communicate()
@@ -160,7 +171,7 @@ def execute(parameters=None, host_name=None):
# now that we have the http status and it was 200, get the content
start_time = time.time()
curl = subprocess.Popen(['curl', '--negotiate', '-u', ':', '-sL',
- '--connect-timeout', CURL_CONNECTION_TIMEOUT, query_url, ],
+ '--connect-timeout', curl_connection_timeout, query_url, ],
stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=kerberos_env)
stdout, stderr = curl.communicate()
@@ -178,7 +189,7 @@ def execute(parameters=None, host_name=None):
try:
# execute the query for the JSON that includes WebHCat status
start_time = time.time()
- url_response = urllib2.urlopen(query_url, timeout=CONNECTION_TIMEOUT)
+ url_response = urllib2.urlopen(query_url, timeout=connection_timeout)
total_time = time.time() - start_time
json_response = json.loads(url_response.read())
http://git-wip-us.apache.org/repos/asf/ambari/blob/6727c1dc/ambari-server/src/main/resources/common-services/OOZIE/4.0.0.2.0/package/alerts/alert_check_oozie_server.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/OOZIE/4.0.0.2.0/package/alerts/alert_check_oozie_server.py b/ambari-server/src/main/resources/common-services/OOZIE/4.0.0.2.0/package/alerts/alert_check_oozie_server.py
index cbb6299..c0dc18a 100644
--- a/ambari-server/src/main/resources/common-services/OOZIE/4.0.0.2.0/package/alerts/alert_check_oozie_server.py
+++ b/ambari-server/src/main/resources/common-services/OOZIE/4.0.0.2.0/package/alerts/alert_check_oozie_server.py
@@ -59,7 +59,7 @@ def get_tokens():
return (OOZIE_URL_KEY, OOZIE_PRINCIPAL, SECURITY_ENABLED, OOZIE_KEYTAB, KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY)
@OsFamilyFuncImpl(os_family=OSConst.WINSRV_FAMILY)
-def get_check_command(oozie_url, host_name, parameters):
+def get_check_command(oozie_url, host_name, configurations):
from resource_management.libraries.functions import reload_windows_env
reload_windows_env()
oozie_home = os.environ['OOZIE_HOME']
@@ -67,20 +67,20 @@ def get_check_command(oozie_url, host_name, parameters):
return (command, None)
@OsFamilyFuncImpl(os_family=OsFamilyImpl.DEFAULT)
-def get_check_command(oozie_url, host_name, parameters):
+def get_check_command(oozie_url, host_name, configurations):
security_enabled = False
- if SECURITY_ENABLED in parameters:
- security_enabled = str(parameters[SECURITY_ENABLED]).upper() == 'TRUE'
+ if SECURITY_ENABLED in configurations:
+ security_enabled = str(configurations[SECURITY_ENABLED]).upper() == 'TRUE'
kerberos_env = None
if security_enabled:
- if OOZIE_KEYTAB in parameters and OOZIE_PRINCIPAL in parameters:
- oozie_keytab = parameters[OOZIE_KEYTAB]
- oozie_principal = parameters[OOZIE_PRINCIPAL]
+ if OOZIE_KEYTAB in configurations and OOZIE_PRINCIPAL in configurations:
+ oozie_keytab = configurations[OOZIE_KEYTAB]
+ oozie_principal = configurations[OOZIE_PRINCIPAL]
# substitute _HOST in kerberos principal with actual fqdn
oozie_principal = oozie_principal.replace('_HOST', host_name)
else:
- raise KerberosPropertiesNotFound('The Oozie keytab and principal are required parameters when security is enabled.')
+ raise KerberosPropertiesNotFound('The Oozie keytab and principal are required configurations when security is enabled.')
# Create the kerberos credentials cache (ccache) file and set it in the environment to use
# when executing curl
@@ -89,8 +89,8 @@ def get_check_command(oozie_url, host_name, parameters):
kerberos_env = {'KRB5CCNAME': ccache_file}
# Get the configured Kerberos executable search paths, if any
- if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in parameters:
- kerberos_executable_search_paths = parameters[KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY]
+ if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations:
+ kerberos_executable_search_paths = configurations[KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY]
else:
kerberos_executable_search_paths = None
@@ -110,30 +110,31 @@ def get_check_command(oozie_url, host_name, parameters):
command = format("source /etc/oozie/conf/oozie-env.sh ; oozie admin -oozie {oozie_url} -status")
return (command, kerberos_env)
-def execute(parameters=None, host_name=None):
+def execute(configurations={}, parameters={}, host_name=None):
"""
Returns a tuple containing the result code and a pre-formatted result label
Keyword arguments:
- parameters (dictionary): a mapping of parameter key to value
+ configurations (dictionary): a mapping of configuration key to value
+ parameters (dictionary): a mapping of script parameter key to value
host_name (string): the name of this host where the alert is running
"""
- if parameters is None:
- return (RESULT_CODE_UNKNOWN, ['There were no parameters supplied to the script.'])
+ if configurations is None:
+ return (RESULT_CODE_UNKNOWN, ['There were no configurations supplied to the script.'])
- if not OOZIE_URL_KEY in parameters:
+ if not OOZIE_URL_KEY in configurations:
return (RESULT_CODE_UNKNOWN, ['The Oozie URL is a required parameter.'])
# use localhost on Windows, 0.0.0.0 on others; 0.0.0.0 means bind to all
# interfaces, which doesn't work on Windows
localhost_address = 'localhost' if OSCheck.get_os_family() == OSConst.WINSRV_FAMILY else '0.0.0.0'
- oozie_url = parameters[OOZIE_URL_KEY]
+ oozie_url = configurations[OOZIE_URL_KEY]
oozie_url = oozie_url.replace(urlparse(oozie_url).hostname,localhost_address)
try:
- command, env = get_check_command(oozie_url, host_name, parameters)
+ command, env = get_check_command(oozie_url, host_name, configurations)
# execute the command
Execute(command, environment=env)
http://git-wip-us.apache.org/repos/asf/ambari/blob/6727c1dc/ambari-server/src/main/resources/common-services/STORM/0.9.1.2.1/package/alerts/check_supervisor_process_win.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/STORM/0.9.1.2.1/package/alerts/check_supervisor_process_win.py b/ambari-server/src/main/resources/common-services/STORM/0.9.1.2.1/package/alerts/check_supervisor_process_win.py
index dcae64a..a698415 100644
--- a/ambari-server/src/main/resources/common-services/STORM/0.9.1.2.1/package/alerts/check_supervisor_process_win.py
+++ b/ambari-server/src/main/resources/common-services/STORM/0.9.1.2.1/package/alerts/check_supervisor_process_win.py
@@ -33,12 +33,13 @@ def get_tokens():
"""
return ()
-def execute(parameters=None, host_name=None):
+def execute(configurations={}, parameters={}, host_name=None):
"""
Returns a tuple containing the result code and a pre-formatted result label
Keyword arguments:
- parameters (dictionary): a mapping of parameter key to value
+ configurations (dictionary): a mapping of configuration key to value
+ parameters (dictionary): a mapping of script parameter key to value
host_name (string): the name of this host where the alert is running
"""
http://git-wip-us.apache.org/repos/asf/ambari/blob/6727c1dc/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/alerts.json
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/alerts.json b/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/alerts.json
index a446808..d25dd78 100644
--- a/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/alerts.json
+++ b/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/alerts.json
@@ -203,7 +203,18 @@
"enabled": true,
"source": {
"type": "SCRIPT",
- "path": "YARN/2.1.0.2.0/package/alerts/alert_nodemanager_health.py"
+ "path": "YARN/2.1.0.2.0/package/alerts/alert_nodemanager_health.py",
+ "parameters": [
+ {
+ "name": "connection.timeout",
+ "display_name": "Connection Timeout",
+ "value": 5.0,
+ "type": "NUMERIC",
+ "description": "The maximum time before this alert is considered to be CRITICAL",
+ "units": "seconds",
+ "threshold": "CRITICAL"
+ }
+ ]
}
}
],
@@ -337,7 +348,18 @@
"enabled": true,
"source": {
"type": "SCRIPT",
- "path": "YARN/2.1.0.2.0/package/alerts/alert_nodemanagers_summary.py"
+ "path": "YARN/2.1.0.2.0/package/alerts/alert_nodemanagers_summary.py",
+ "parameters": [
+ {
+ "name": "connection.timeout",
+ "display_name": "Connection Timeout",
+ "value": 5.0,
+ "type": "NUMERIC",
+ "description": "The maximum time before this alert is considered to be CRITICAL",
+ "units": "seconds",
+ "threshold": "CRITICAL"
+ }
+ ]
}
}
],
http://git-wip-us.apache.org/repos/asf/ambari/blob/6727c1dc/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/alerts/alert_nodemanager_health.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/alerts/alert_nodemanager_health.py b/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/alerts/alert_nodemanager_health.py
index 26bf2a0..1cdeb97 100644
--- a/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/alerts/alert_nodemanager_health.py
+++ b/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/alerts/alert_nodemanager_health.py
@@ -40,7 +40,8 @@ CRITICAL_NODEMANAGER_UNKNOWN_JSON_MESSAGE = 'Unable to determine NodeManager hea
NODEMANAGER_DEFAULT_PORT = 8042
-CONNECTION_TIMEOUT = 5.0
+CONNECTION_TIMEOUT_KEY = 'connection.timeout'
+CONNECTION_TIMEOUT_DEFAULT = 5.0
def get_tokens():
"""
@@ -51,32 +52,40 @@ def get_tokens():
YARN_HTTP_POLICY_KEY)
-def execute(parameters=None, host_name=None):
+def execute(configurations={}, parameters={}, host_name=None):
"""
Returns a tuple containing the result code and a pre-formatted result label
Keyword arguments:
- parameters (dictionary): a mapping of parameter key to value
+ configurations (dictionary): a mapping of configuration key to value
+ parameters (dictionary): a mapping of script parameter key to value
host_name (string): the name of this host where the alert is running
"""
result_code = RESULT_CODE_UNKNOWN
- if parameters is None:
- return (result_code, ['There were no parameters supplied to the script.'])
+ if configurations is None:
+ return (result_code, ['There were no configurations supplied to the script.'])
scheme = 'http'
http_uri = None
https_uri = None
http_policy = 'HTTP_ONLY'
- if NODEMANAGER_HTTP_ADDRESS_KEY in parameters:
- http_uri = parameters[NODEMANAGER_HTTP_ADDRESS_KEY]
+ if NODEMANAGER_HTTP_ADDRESS_KEY in configurations:
+ http_uri = configurations[NODEMANAGER_HTTP_ADDRESS_KEY]
- if NODEMANAGER_HTTPS_ADDRESS_KEY in parameters:
- https_uri = parameters[NODEMANAGER_HTTPS_ADDRESS_KEY]
+ if NODEMANAGER_HTTPS_ADDRESS_KEY in configurations:
+ https_uri = configurations[NODEMANAGER_HTTPS_ADDRESS_KEY]
+
+ if YARN_HTTP_POLICY_KEY in configurations:
+ http_policy = configurations[YARN_HTTP_POLICY_KEY]
+
+
+ # parse script arguments
+ connection_timeout = CONNECTION_TIMEOUT_DEFAULT
+ if CONNECTION_TIMEOUT_KEY in parameters:
+ connection_timeout = float(parameters[CONNECTION_TIMEOUT_KEY])
- if YARN_HTTP_POLICY_KEY in parameters:
- http_policy = parameters[YARN_HTTP_POLICY_KEY]
# determine the right URI and whether to use SSL
uri = http_uri
@@ -108,7 +117,7 @@ def execute(parameters=None, host_name=None):
try:
# execute the query for the JSON that includes templeton status
- url_response = urllib2.urlopen(query, timeout=CONNECTION_TIMEOUT)
+ url_response = urllib2.urlopen(query, timeout=connection_timeout)
except urllib2.HTTPError, httpError:
label = CRITICAL_HTTP_STATUS_MESSAGE.format(str(httpError.code), query,
str(httpError))
http://git-wip-us.apache.org/repos/asf/ambari/blob/6727c1dc/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/alerts/alert_nodemanagers_summary.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/alerts/alert_nodemanagers_summary.py b/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/alerts/alert_nodemanagers_summary.py
index 7c00625..eafdbd0 100644
--- a/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/alerts/alert_nodemanagers_summary.py
+++ b/ambari-server/src/main/resources/common-services/YARN/2.1.0.2.0/package/alerts/alert_nodemanagers_summary.py
@@ -30,7 +30,8 @@ NODEMANAGER_HTTP_ADDRESS_KEY = '{{yarn-site/yarn.resourcemanager.webapp.address}
NODEMANAGER_HTTPS_ADDRESS_KEY = '{{yarn-site/yarn.resourcemanager.webapp.https.address}}'
YARN_HTTP_POLICY_KEY = '{{yarn-site/yarn.http.policy}}'
-CONNECTION_TIMEOUT = 5.0
+CONNECTION_TIMEOUT_KEY = 'connection.timeout'
+CONNECTION_TIMEOUT_DEFAULT = 5.0
def get_tokens():
"""
@@ -41,32 +42,38 @@ def get_tokens():
YARN_HTTP_POLICY_KEY
-def execute(parameters=None, host_name=None):
+def execute(configurations={}, parameters={}, host_name=None):
"""
Returns a tuple containing the result code and a pre-formatted result label
Keyword arguments:
- parameters (dictionary): a mapping of parameter key to value
+ configurations (dictionary): a mapping of configuration key to value
+ parameters (dictionary): a mapping of script parameter key to value
host_name (string): the name of this host where the alert is running
"""
- if parameters is None:
- return (('UNKNOWN', ['There were no parameters supplied to the script.']))
+ if configurations is None:
+ return (('UNKNOWN', ['There were no configurations supplied to the script.']))
scheme = 'http'
http_uri = None
https_uri = None
http_policy = 'HTTP_ONLY'
- if NODEMANAGER_HTTP_ADDRESS_KEY in parameters:
- http_uri = parameters[NODEMANAGER_HTTP_ADDRESS_KEY]
+ if NODEMANAGER_HTTP_ADDRESS_KEY in configurations:
+ http_uri = configurations[NODEMANAGER_HTTP_ADDRESS_KEY]
- if NODEMANAGER_HTTPS_ADDRESS_KEY in parameters:
- https_uri = parameters[NODEMANAGER_HTTPS_ADDRESS_KEY]
+ if NODEMANAGER_HTTPS_ADDRESS_KEY in configurations:
+ https_uri = configurations[NODEMANAGER_HTTPS_ADDRESS_KEY]
+
+ if YARN_HTTP_POLICY_KEY in configurations:
+ http_policy = configurations[YARN_HTTP_POLICY_KEY]
+
+ # parse script arguments
+ connection_timeout = CONNECTION_TIMEOUT_DEFAULT
+ if CONNECTION_TIMEOUT_KEY in parameters:
+ connection_timeout = float(parameters[CONNECTION_TIMEOUT_KEY])
- if YARN_HTTP_POLICY_KEY in parameters:
- http_policy = parameters[YARN_HTTP_POLICY_KEY]
-
# determine the right URI and whether to use SSL
uri = http_uri
if http_policy == 'HTTPS_ONLY':
@@ -78,7 +85,8 @@ def execute(parameters=None, host_name=None):
live_nodemanagers_qry = "{0}://{1}/jmx?qry=Hadoop:service=ResourceManager,name=RMNMInfo".format(scheme, uri)
try:
- live_nodemanagers = json.loads(get_value_from_jmx(live_nodemanagers_qry, "LiveNodeManagers"))
+ live_nodemanagers = json.loads(get_value_from_jmx(live_nodemanagers_qry,
+ "LiveNodeManagers", connection_timeout))
unhealthy_count = 0
@@ -104,14 +112,14 @@ def execute(parameters=None, host_name=None):
return (result_code, [label])
-def get_value_from_jmx(query, jmx_property):
+def get_value_from_jmx(query, jmx_property, connection_timeout):
response = None
try:
# use a customer header process that will look for the non-standard
# "Refresh" header and attempt to follow the redirect
url_opener = urllib2.build_opener(RefreshHeaderProcessor())
- response = url_opener.open(query, timeout=CONNECTION_TIMEOUT)
+ response = url_opener.open(query, timeout=connection_timeout)
data = response.read()
data_dict = json.loads(data)
http://git-wip-us.apache.org/repos/asf/ambari/blob/6727c1dc/ambari-server/src/main/resources/host_scripts/alert_disk_space.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/host_scripts/alert_disk_space.py b/ambari-server/src/main/resources/host_scripts/alert_disk_space.py
index 68e22df..94f19d6 100644
--- a/ambari-server/src/main/resources/host_scripts/alert_disk_space.py
+++ b/ambari-server/src/main/resources/host_scripts/alert_disk_space.py
@@ -25,7 +25,16 @@ from ambari_commons.os_family_impl import OsFamilyFuncImpl, OsFamilyImpl
from ambari_commons import OSConst
DiskInfo = collections.namedtuple('DiskInfo', 'total used free path')
-MIN_FREE_SPACE = 5000000000L # 5GB
+
+# script parameter keys
+MIN_FREE_SPACE_KEY = "minimum.free.space"
+PERCENT_USED_WARNING_KEY = "percent.used.space.warning.threshold"
+PERCENT_USED_CRITICAL_KEY = "percent.free.space.critical.threshold"
+
+# defaults in case no script parameters are passed
+MIN_FREE_SPACE_DEFAULT = 5000000000L
+PERCENT_USED_WARNING_DEFAULT = 50
+PERCENT_USED_CRITICAL_DEFAULT = 80
# the location where HDP installs components when using HDP 2.2+
HDP_HOME_DIR = "/usr/hdp"
@@ -40,8 +49,9 @@ def get_tokens():
"""
return None
+
@OsFamilyFuncImpl(os_family=OsFamilyImpl.DEFAULT)
-def execute(parameters=None, host_name=None):
+def execute(configurations={}, parameters={}, host_name=None):
"""
Performs advanced disk checks under Linux. This will first attempt to
check the HDP installation directories if they exist. If they do not exist,
@@ -50,7 +60,8 @@ def execute(parameters=None, host_name=None):
Returns a tuple containing the result code and a pre-formatted result label
Keyword arguments:
- parameters (dictionary): a mapping of parameter key to value
+ configurations (dictionary): a mapping of configuration key to value
+ parameters (dictionary): a mapping of script parameter key to value
host_name (string): the name of this host where the alert is running
"""
@@ -66,21 +77,40 @@ def execute(parameters=None, host_name=None):
try:
disk_usage = _get_disk_usage(path)
- result_code, label = _get_warnings_for_partition(disk_usage)
+ result_code, label = _get_warnings_for_partition(parameters, disk_usage)
except NotImplementedError, platform_error:
return 'CRITICAL', [str(platform_error)]
return result_code, [label]
-def _get_warnings_for_partition(disk_usage):
+
+def _get_warnings_for_partition(parameters, disk_usage):
+
+ # start with hard coded defaults
+ min_free_space = MIN_FREE_SPACE_DEFAULT
+ warning_percent = PERCENT_USED_WARNING_DEFAULT
+ critical_percent = PERCENT_USED_CRITICAL_DEFAULT
+
+ # parse script parameters
+ if MIN_FREE_SPACE_KEY in parameters:
+ # long(float(5e9)) seems like gson likes scientific notation
+ min_free_space = long(float(parameters[MIN_FREE_SPACE_KEY]))
+
+ if PERCENT_USED_WARNING_KEY in parameters:
+ warning_percent = float(parameters[PERCENT_USED_WARNING_KEY]) * 100
+
+ if PERCENT_USED_CRITICAL_KEY in parameters:
+ critical_percent = float(parameters[PERCENT_USED_CRITICAL_KEY]) * 100
+
+
if disk_usage is None or disk_usage.total == 0:
return 'CRITICAL', ['Unable to determine the disk usage']
result_code = 'OK'
percent = disk_usage.used / float(disk_usage.total) * 100
- if percent > 80:
+ if percent > critical_percent:
result_code = 'CRITICAL'
- elif percent > 50:
+ elif percent > warning_percent:
result_code = 'WARNING'
label = 'Capacity Used: [{0:.2f}%, {1}], Capacity Total: [{2}]'.format(
@@ -92,26 +122,27 @@ def _get_warnings_for_partition(disk_usage):
if result_code == 'OK':
# Check absolute disk space value
- if disk_usage.free < MIN_FREE_SPACE:
+ if disk_usage.free < min_free_space:
result_code = 'WARNING'
- label += '. Total free space is less than {0}'.format(_get_formatted_size(MIN_FREE_SPACE))
+ label += '. Total free space is less than {0}'.format(_get_formatted_size(min_free_space))
return result_code, label
@OsFamilyFuncImpl(os_family=OSConst.WINSRV_FAMILY)
-def execute(parameters=None, host_name=None):
+def execute(configurations={}, parameters={}, host_name=None):
"""
Performs simplified disk checks under Windows
Returns a tuple containing the result code and a pre-formatted result label
Keyword arguments:
- parameters (dictionary): a mapping of parameter key to value
+ configurations (dictionary): a mapping of configuration key to value
+ parameters (dictionary): a mapping of script parameter key to value
host_name (string): the name of this host where the alert is running
"""
try:
disk_usage = _get_disk_usage()
- result = _get_warnings_for_partition(disk_usage)
+ result = _get_warnings_for_partition(parameters, disk_usage)
except NotImplementedError, platform_error:
result = ('CRITICAL', [str(platform_error)])
return result
http://git-wip-us.apache.org/repos/asf/ambari/blob/6727c1dc/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/FLUME/alerts.json
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/FLUME/alerts.json b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/FLUME/alerts.json
index 3b117d0..cac5855 100644
--- a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/FLUME/alerts.json
+++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/FLUME/alerts.json
@@ -10,7 +10,16 @@
"scope": "ANY",
"source": {
"type": "SCRIPT",
- "path": "BIGTOP/0.8/services/FLUME/package/files/alert_flume_agent_status.py"
+ "path": "BIGTOP/0.8/services/FLUME/package/files/alert_flume_agent_status.py",
+ "parameters": [
+ {
+ "name": "run.directory",
+ "display_name": "Run Directory",
+ "value": "/var/run/flume",
+ "type": "STRING",
+ "description": "The directory where flume agent processes will place their PID files."
+ }
+ ]
}
}
]
http://git-wip-us.apache.org/repos/asf/ambari/blob/6727c1dc/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/FLUME/package/files/alert_flume_agent_status.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/FLUME/package/files/alert_flume_agent_status.py b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/FLUME/package/files/alert_flume_agent_status.py
index b183bbc..0838b4c 100644
--- a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/FLUME/package/files/alert_flume_agent_status.py
+++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/FLUME/package/files/alert_flume_agent_status.py
@@ -29,7 +29,8 @@ RESULT_CODE_UNKNOWN = 'UNKNOWN'
FLUME_CONF_DIR_KEY = '{{flume-env/flume_conf_dir}}'
-FLUME_RUN_DIR = '/var/run/flume'
+FLUME_RUN_DIR_KEY = "run.directory"
+FLUME_RUN_DIR_DEFAULT = '/var/run/flume'
def get_tokens():
"""
@@ -37,23 +38,24 @@ def get_tokens():
to build the dictionary passed into execute
"""
return (FLUME_CONF_DIR_KEY,)
-
-def execute(parameters=None, host_name=None):
+
+def execute(configurations={}, parameters={}, host_name=None):
"""
Returns a tuple containing the result code and a pre-formatted result label
Keyword arguments:
- parameters (dictionary): a mapping of parameter key to value
+ configurations (dictionary): a mapping of configuration key to value
+ parameters (dictionary): a mapping of script parameter key to value
host_name (string): the name of this host where the alert is running
"""
- if parameters is None:
- return (RESULT_CODE_UNKNOWN, ['There were no parameters supplied to the script.'])
+ if configurations is None:
+ return (RESULT_CODE_UNKNOWN, ['There were no configurations supplied to the script.'])
flume_conf_directory = None
- if FLUME_CONF_DIR_KEY in parameters:
- flume_conf_directory = parameters[FLUME_CONF_DIR_KEY]
+ if FLUME_CONF_DIR_KEY in configurations:
+ flume_conf_directory = configurations[FLUME_CONF_DIR_KEY]
if flume_conf_directory is None:
return (RESULT_CODE_UNKNOWN, ['The Flume configuration directory is a required parameter.'])
@@ -61,7 +63,12 @@ def execute(parameters=None, host_name=None):
if host_name is None:
host_name = socket.getfqdn()
- processes = get_flume_status(flume_conf_directory, FLUME_RUN_DIR)
+ # parse script arguments
+ flume_run_directory = FLUME_RUN_DIR_DEFAULT
+ if FLUME_RUN_DIR_KEY in parameters:
+ flume_run_directory = parameters[FLUME_RUN_DIR_KEY]
+
+ processes = get_flume_status(flume_conf_directory, flume_run_directory)
expected_agents = find_expected_agent_names(flume_conf_directory)
alert_label = ''
http://git-wip-us.apache.org/repos/asf/ambari/blob/6727c1dc/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/alerts.json
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/alerts.json b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/alerts.json
index 8e08ef2..c0b9b0b 100644
--- a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/alerts.json
+++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/alerts.json
@@ -387,7 +387,36 @@
"enabled": true,
"source": {
"type": "SCRIPT",
- "path": "BIGTOP/0.8/services/HDFS/package/files/alert_checkpoint_time.py"
+ "path": "BIGTOP/0.8/services/HDFS/package/files/alert_checkpoint_time.py",
+ "parameters": [
+ {
+ "name": "connection.timeout",
+ "display_name": "Connection Timeout",
+ "value": 5.0,
+ "type": "NUMERIC",
+ "description": "The maximum time before this alert is considered to be CRITICAL",
+ "units": "seconds",
+ "threshold": "CRITICAL"
+ },
+ {
+ "name": "checkpoint_time_warning_threshold",
+ "display_name": "Checkpoint Warning",
+ "value": 2.0,
+ "type": "PERCENT",
+ "description": "The percentage of the last checkpoint time greater than the interval in order to trigger a warning alert.",
+ "units": "%",
+ "threshold": "WARNING"
+ },
+ {
+ "name": "checkpoint_time_critical_threshold",
+ "display_name": "Checkpoint Critical",
+ "value": 2.0,
+ "type": "PERCENT",
+ "description": "The percentage of the last checkpoint time greater than the interval in order to trigger a critical alert.",
+ "units": "%",
+ "threshold": "CRITICAL"
+ }
+ ]
}
},
{
@@ -400,7 +429,18 @@
"ignore_host": true,
"source": {
"type": "SCRIPT",
- "path": "BIGTOP/0.8/services/HDFS/package/files/alert_ha_namenode_health.py"
+ "path": "BIGTOP/0.8/services/HDFS/package/files/alert_ha_namenode_health.py",
+ "parameters": [
+ {
+ "name": "connection.timeout",
+ "display_name": "Connection Timeout",
+ "value": 5.0,
+ "type": "NUMERIC",
+ "description": "The maximum time before this alert is considered to be CRITICAL",
+ "units": "seconds",
+ "threshold": "CRITICAL"
+ }
+ ]
}
}
],
http://git-wip-us.apache.org/repos/asf/ambari/blob/6727c1dc/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/files/alert_checkpoint_time.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/files/alert_checkpoint_time.py b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/files/alert_checkpoint_time.py
index 2455d3b..b44c2d4 100644
--- a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/files/alert_checkpoint_time.py
+++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/files/alert_checkpoint_time.py
@@ -30,13 +30,17 @@ NN_HTTP_POLICY_KEY = '{{hdfs-site/dfs.http.policy}}'
NN_CHECKPOINT_TX_KEY = '{{hdfs-site/dfs.namenode.checkpoint.txns}}'
NN_CHECKPOINT_PERIOD_KEY = '{{hdfs-site/dfs.namenode.checkpoint.period}}'
-PERCENT_WARNING = 200
-PERCENT_CRITICAL = 200
+PERCENT_WARNING_KEY = 'checkpoint.time.warning.threshold'
+PERCENT_WARNING_DEFAULT = 200
+
+PERCENT_CRITICAL_KEY = 'checkpoint.time.critical.threshold'
+PERCENT_CRITICAL_DEFAULT = 200
CHECKPOINT_TX_DEFAULT = 1000000
CHECKPOINT_PERIOD_DEFAULT = 21600
-CONNECTION_TIMEOUT = 5.0
+CONNECTION_TIMEOUT_KEY = 'connection.timeout'
+CONNECTION_TIMEOUT_DEFAULT = 5.0
def get_tokens():
"""
@@ -47,43 +51,55 @@ def get_tokens():
NN_CHECKPOINT_TX_KEY, NN_CHECKPOINT_PERIOD_KEY)
-def execute(parameters=None, host_name=None):
+def execute(configurations={}, parameters={}, host_name=None):
"""
Returns a tuple containing the result code and a pre-formatted result label
Keyword arguments:
- parameters (dictionary): a mapping of parameter key to value
+ configurations (dictionary): a mapping of configuration key to value
+ parameters (dictionary): a mapping of script parameter key to value
host_name (string): the name of this host where the alert is running
"""
- if parameters is None:
- return (('UNKNOWN', ['There were no parameters supplied to the script.']))
+ if configurations is None:
+ return (('UNKNOWN', ['There were no configurations supplied to the script.']))
uri = None
scheme = 'http'
http_uri = None
https_uri = None
http_policy = 'HTTP_ONLY'
- percent_warning = PERCENT_WARNING
- percent_critical = PERCENT_CRITICAL
checkpoint_tx = CHECKPOINT_TX_DEFAULT
checkpoint_period = CHECKPOINT_PERIOD_DEFAULT
- if NN_HTTP_ADDRESS_KEY in parameters:
- http_uri = parameters[NN_HTTP_ADDRESS_KEY]
+ if NN_HTTP_ADDRESS_KEY in configurations:
+ http_uri = configurations[NN_HTTP_ADDRESS_KEY]
- if NN_HTTPS_ADDRESS_KEY in parameters:
- https_uri = parameters[NN_HTTPS_ADDRESS_KEY]
+ if NN_HTTPS_ADDRESS_KEY in configurations:
+ https_uri = configurations[NN_HTTPS_ADDRESS_KEY]
- if NN_HTTP_POLICY_KEY in parameters:
- http_policy = parameters[NN_HTTP_POLICY_KEY]
+ if NN_HTTP_POLICY_KEY in configurations:
+ http_policy = configurations[NN_HTTP_POLICY_KEY]
- if NN_CHECKPOINT_TX_KEY in parameters:
- checkpoint_tx = parameters[NN_CHECKPOINT_TX_KEY]
+ if NN_CHECKPOINT_TX_KEY in configurations:
+ checkpoint_tx = configurations[NN_CHECKPOINT_TX_KEY]
+
+ if NN_CHECKPOINT_PERIOD_KEY in configurations:
+ checkpoint_period = configurations[NN_CHECKPOINT_PERIOD_KEY]
+
+ # parse script arguments
+ connection_timeout = CONNECTION_TIMEOUT_DEFAULT
+ if CONNECTION_TIMEOUT_KEY in parameters:
+ connection_timeout = float(parameters[CONNECTION_TIMEOUT_KEY])
+
+ percent_warning = PERCENT_WARNING_DEFAULT
+ if PERCENT_WARNING_KEY in parameters:
+ percent_warning = float(parameters[PERCENT_WARNING_KEY]) * 100
+
+ percent_critical = PERCENT_CRITICAL_DEFAULT
+ if PERCENT_CRITICAL_KEY in parameters:
+ percent_critical = float(parameters[PERCENT_CRITICAL_KEY]) * 100
- if NN_CHECKPOINT_PERIOD_KEY in parameters:
- checkpoint_period = parameters[NN_CHECKPOINT_PERIOD_KEY]
-
# determine the right URI and whether to use SSL
uri = http_uri
if http_policy == 'HTTPS_ONLY':
@@ -102,8 +118,12 @@ def execute(parameters=None, host_name=None):
result_code = "OK"
try:
- last_checkpoint_time = int(get_value_from_jmx(last_checkpoint_time_qry,"LastCheckpointTime"))
- journal_transaction_info = get_value_from_jmx(journal_transaction_info_qry,"JournalTransactionInfo")
+ last_checkpoint_time = int(get_value_from_jmx(last_checkpoint_time_qry,
+ "LastCheckpointTime", connection_timeout))
+
+ journal_transaction_info = get_value_from_jmx(journal_transaction_info_qry,
+ "JournalTransactionInfo", connection_timeout)
+
journal_transaction_info_dict = json.loads(journal_transaction_info)
last_tx = int(journal_transaction_info_dict['LastAppliedOrWrittenTxId'])
@@ -131,11 +151,11 @@ def get_time(delta):
return {'h':h, 'm':m}
-def get_value_from_jmx(query, jmx_property):
+def get_value_from_jmx(query, jmx_property, connection_timeout):
response = None
try:
- response = urllib2.urlopen(query, timeout=CONNECTION_TIMEOUT)
+ response = urllib2.urlopen(query, timeout=connection_timeout)
data = response.read()
data_dict = json.loads(data)