You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by ao...@apache.org on 2014/12/25 15:28:43 UTC
[1/2] ambari git commit: AMBARI-8755. Oozie server check alert fails
in secured mode (aonishuk)
Repository: ambari
Updated Branches:
refs/heads/trunk 79cffa16d -> ec37c603c
http://git-wip-us.apache.org/repos/asf/ambari/blob/ec37c603/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/HIVE/package/files/alert_hive_thrift_port.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/HIVE/package/files/alert_hive_thrift_port.py b/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/HIVE/package/files/alert_hive_thrift_port.py
deleted file mode 100644
index 36d04b3..0000000
--- a/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/HIVE/package/files/alert_hive_thrift_port.py
+++ /dev/null
@@ -1,127 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements. See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership. The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import json
-import socket
-import time
-import traceback
-import urllib2
-from resource_management.libraries.functions import hive_check
-from resource_management.libraries.functions import format
-from resource_management.libraries.functions import get_kinit_path
-from resource_management.core.environment import Environment
-
-OK_MESSAGE = "TCP OK - %.4f response on port %s"
-CRITICAL_MESSAGE = "Connection failed on host {0}:{1}"
-
-HIVE_SERVER_THRIFT_PORT_KEY = '{{hive-site/hive.server2.thrift.port}}'
-SECURITY_ENABLED_KEY = '{{cluster-env/security_enabled}}'
-HIVE_SERVER2_AUTHENTICATION_KEY = '{{hive-site/hive.server2.authentication}}'
-HIVE_SERVER_PRINCIPAL_KEY = '{{hive-site/hive.server2.authentication.kerberos.principal}}'
-SMOKEUSER_KEYTAB_KEY = '{{cluster-env/smokeuser_keytab}}'
-SMOKEUSER_KEY = '{{cluster-env/smokeuser}}'
-
-PERCENT_WARNING = 200
-PERCENT_CRITICAL = 200
-
-THRIFT_PORT_DEFAULT = 10000
-HIVE_SERVER_PRINCIPAL_DEFAULT = 'hive/_HOST@EXAMPLE.COM'
-HIVE_SERVER2_AUTHENTICATION_DEFAULT = 'NOSASL'
-SMOKEUSER_KEYTAB_DEFAULT = '/etc/security/keytabs/smokeuser.headless.keytab'
-SMOKEUSER_DEFAULT = 'ambari-qa'
-
-def get_tokens():
- """
- Returns a tuple of tokens in the format {{site/property}} that will be used
- to build the dictionary passed into execute
- """
- return (HIVE_SERVER_THRIFT_PORT_KEY,SECURITY_ENABLED_KEY,HIVE_SERVER2_AUTHENTICATION_KEY,HIVE_SERVER_PRINCIPAL_KEY,SMOKEUSER_KEYTAB_KEY,SMOKEUSER_KEY)
-
-
-def execute(parameters=None, host_name=None):
- """
- Returns a tuple containing the result code and a pre-formatted result label
-
- Keyword arguments:
- parameters (dictionary): a mapping of parameter key to value
- host_name (string): the name of this host where the alert is running
- """
-
- if parameters is None:
- return (('UNKNOWN', ['There were no parameters supplied to the script.']))
-
- thrift_port = THRIFT_PORT_DEFAULT
- if HIVE_SERVER_THRIFT_PORT_KEY in parameters:
- thrift_port = int(parameters[HIVE_SERVER_THRIFT_PORT_KEY])
-
- security_enabled = False
- if SECURITY_ENABLED_KEY in parameters:
- security_enabled = str(parameters[SECURITY_ENABLED_KEY]).upper() == 'TRUE'
-
- hive_server2_authentication = HIVE_SERVER2_AUTHENTICATION_DEFAULT
- if HIVE_SERVER2_AUTHENTICATION_KEY in parameters:
- hive_server2_authentication = parameters[HIVE_SERVER2_AUTHENTICATION_KEY]
-
- smokeuser = SMOKEUSER_DEFAULT
- if SMOKEUSER_KEY in parameters:
- smokeuser = parameters[SMOKEUSER_KEY]
-
- result_code = None
-
- if security_enabled:
- hive_server_principal = HIVE_SERVER_PRINCIPAL_DEFAULT
- if HIVE_SERVER_PRINCIPAL_KEY in parameters:
- hive_server_principal = parameters[HIVE_SERVER_PRINCIPAL_KEY]
- smokeuser_keytab = SMOKEUSER_KEYTAB_DEFAULT
- if SMOKEUSER_KEYTAB_KEY in parameters:
- smokeuser_keytab = parameters[SMOKEUSER_KEYTAB_KEY]
- with Environment() as env:
- kinit_path_local = get_kinit_path(["/usr/bin", "/usr/kerberos/bin", "/usr/sbin"])
- kinitcmd=format("{kinit_path_local} -kt {smokeuser_keytab} {smokeuser}; ")
- else:
- hive_server_principal = None
- kinitcmd=None
-
- try:
- if host_name is None:
- host_name = socket.getfqdn()
-
- start_time = time.time()
- try:
- with Environment() as env:
- hive_check.check_thrift_port_sasl(host_name, thrift_port, hive_server2_authentication,
- hive_server_principal, kinitcmd, smokeuser)
- is_thrift_port_ok = True
- except:
- is_thrift_port_ok = False
-
- if is_thrift_port_ok == True:
- result_code = 'OK'
- total_time = time.time() - start_time
- label = OK_MESSAGE % (total_time, thrift_port)
- else:
- result_code = 'CRITICAL'
- label = CRITICAL_MESSAGE.format(host_name,thrift_port)
-
- except Exception, e:
- label = str(e)
- result_code = 'UNKNOWN'
-
- return ((result_code, [label]))
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/ambari/blob/ec37c603/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/HIVE/package/files/alert_webhcat_server.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/HIVE/package/files/alert_webhcat_server.py b/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/HIVE/package/files/alert_webhcat_server.py
deleted file mode 100644
index 44840de..0000000
--- a/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/HIVE/package/files/alert_webhcat_server.py
+++ /dev/null
@@ -1,111 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements. See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership. The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import json
-import socket
-import time
-import urllib2
-
-RESULT_CODE_OK = 'OK'
-RESULT_CODE_CRITICAL = 'CRITICAL'
-RESULT_CODE_UNKNOWN = 'UNKNOWN'
-
-OK_MESSAGE = 'TCP OK - {0:.4f} response on port {1}'
-CRITICAL_CONNECTION_MESSAGE = 'Connection failed on host {0}:{1}'
-CRITICAL_TEMPLETON_STATUS_MESSAGE = 'WebHCat returned an unexpected status of "{0}"'
-CRITICAL_TEMPLETON_UNKNOWN_JSON_MESSAGE = 'Unable to determine WebHCat health from unexpected JSON response'
-
-TEMPLETON_PORT_KEY = '{{webhcat-site/templeton.port}}'
-SECURITY_ENABLED_KEY = '{{cluster-env/security_enabled}}'
-
-TEMPLETON_OK_RESPONSE = 'ok'
-TEMPLETON_PORT_DEFAULT = 50111
-
-def get_tokens():
- """
- Returns a tuple of tokens in the format {{site/property}} that will be used
- to build the dictionary passed into execute
- """
- return (TEMPLETON_PORT_KEY,SECURITY_ENABLED_KEY)
-
-
-def execute(parameters=None, host_name=None):
- """
- Returns a tuple containing the result code and a pre-formatted result label
-
- Keyword arguments:
- parameters (dictionary): a mapping of parameter key to value
- host_name (string): the name of this host where the alert is running
- """
-
- result_code = RESULT_CODE_UNKNOWN
-
- if parameters is None:
- return (result_code, ['There were no parameters supplied to the script.'])
-
- templeton_port = TEMPLETON_PORT_DEFAULT
- if TEMPLETON_PORT_KEY in parameters:
- templeton_port = int(parameters[TEMPLETON_PORT_KEY])
-
- security_enabled = False
- if SECURITY_ENABLED_KEY in parameters:
- security_enabled = parameters[SECURITY_ENABLED_KEY].lower() == 'true'
-
- scheme = 'http'
- if security_enabled is True:
- scheme = 'https'
-
- label = ''
- url_response = None
- templeton_status = ''
- total_time = 0
-
- try:
- # the alert will always run on the webhcat host
- if host_name is None:
- host_name = socket.getfqdn()
-
- query = "{0}://{1}:{2}/templeton/v1/status".format(scheme, host_name,
- templeton_port)
-
- # execute the query for the JSON that includes templeton status
- start_time = time.time()
- url_response = urllib2.urlopen(query)
- total_time = time.time() - start_time
- except:
- label = CRITICAL_CONNECTION_MESSAGE.format(host_name,templeton_port)
- return (RESULT_CODE_CRITICAL, [label])
-
- # URL response received, parse it
- try:
- json_response = json.loads(url_response.read())
- templeton_status = json_response['status']
- except:
- return (RESULT_CODE_CRITICAL, [CRITICAL_TEMPLETON_UNKNOWN_JSON_MESSAGE])
-
- # proper JSON received, compare against known value
- if templeton_status.lower() == TEMPLETON_OK_RESPONSE:
- result_code = RESULT_CODE_OK
- label = OK_MESSAGE.format(total_time, templeton_port)
- else:
- result_code = RESULT_CODE_CRITICAL
- label = CRITICAL_TEMPLETON_STATUS_MESSAGE.format(templeton_status)
-
- return (result_code, [label])
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/ambari/blob/ec37c603/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/MAPREDUCE/alerts.json
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/MAPREDUCE/alerts.json b/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/MAPREDUCE/alerts.json
index 9250e14..f192e2c 100644
--- a/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/MAPREDUCE/alerts.json
+++ b/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/MAPREDUCE/alerts.json
@@ -186,7 +186,7 @@
"enabled": true,
"source": {
"type": "SCRIPT",
- "path": "HDP/1.3.2/services/MAPREDUCE/package/files/alert_mapreduce_directory_space.py"
+ "path": "HDP/1.3.2/services/MAPREDUCE/package/alerts/alert_mapreduce_directory_space.py"
}
}
],
http://git-wip-us.apache.org/repos/asf/ambari/blob/ec37c603/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/MAPREDUCE/package/alerts/alert_mapreduce_directory_space.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/MAPREDUCE/package/alerts/alert_mapreduce_directory_space.py b/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/MAPREDUCE/package/alerts/alert_mapreduce_directory_space.py
new file mode 100644
index 0000000..d6e5d75
--- /dev/null
+++ b/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/MAPREDUCE/package/alerts/alert_mapreduce_directory_space.py
@@ -0,0 +1,93 @@
+#!/usr/bin/env python
+
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import collections
+import os
+import platform
+
+RESULT_CODE_OK = 'OK'
+RESULT_CODE_CRITICAL = 'CRITICAL'
+RESULT_CODE_UNKNOWN = 'UNKNOWN'
+
+MAPREDUCE_LOCAL_DIR_KEY = '{{mapred-site/mapred.local.dir}}'
+
+def get_tokens():
+ """
+ Returns a tuple of tokens in the format {{site/property}} that will be used
+ to build the dictionary passed into execute
+ """
+ return (MAPREDUCE_LOCAL_DIR_KEY,)
+
+
+def execute(parameters=None, host_name=None):
+ """
+ Returns a tuple containing the result code and a pre-formatted result label
+
+ Keyword arguments:
+ parameters (dictionary): a mapping of parameter key to value
+ host_name (string): the name of this host where the alert is running
+ """
+ if parameters is None:
+ return (('UNKNOWN', ['There were no parameters supplied to the script.']))
+
+ if set([MAPREDUCE_LOCAL_DIR_KEY]).issubset(parameters):
+ mapreduce_local_directories = parameters[MAPREDUCE_LOCAL_DIR_KEY]
+ else:
+ return (('UNKNOWN', ['The MapReduce Local Directory is required.']))
+
+ directory_list = mapreduce_local_directories.split(",")
+ for directory in directory_list:
+ disk_usage = None
+ try:
+ disk_usage = _get_disk_usage(directory)
+ except NotImplementedError, platform_error:
+ return (RESULT_CODE_UNKNOWN, [str(platform_error)])
+
+ if disk_usage is None or disk_usage.total == 0:
+ return (RESULT_CODE_UNKNOWN, ['Unable to determine the disk usage.'])
+
+ percent = disk_usage.used / float(disk_usage.total) * 100
+
+ if percent > 85:
+ message = 'The disk usage of {0} is {1:d}%'.format(directory,percent)
+ return (RESULT_CODE_CRITICAL, [message])
+
+ return (RESULT_CODE_OK, ["All MapReduce local directories have sufficient space."])
+
+
+def _get_disk_usage(path):
+ """
+ returns a named tuple that contains the total, used, and free disk space
+ in bytes
+ """
+ used = 0
+ total = 0
+ free = 0
+
+ if 'statvfs' in dir(os):
+ disk_stats = os.statvfs(path)
+ free = disk_stats.f_bavail * disk_stats.f_frsize
+ total = disk_stats.f_blocks * disk_stats.f_frsize
+ used = (disk_stats.f_blocks - disk_stats.f_bfree) * disk_stats.f_frsize
+ else:
+ raise NotImplementedError("{0} is not a supported platform for this alert".format(platform.platform()))
+
+ DiskInfo = collections.namedtuple('DiskInfo', 'total used free')
+ return DiskInfo(total=total, used=used, free=free)
http://git-wip-us.apache.org/repos/asf/ambari/blob/ec37c603/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/MAPREDUCE/package/files/alert_mapreduce_directory_space.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/MAPREDUCE/package/files/alert_mapreduce_directory_space.py b/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/MAPREDUCE/package/files/alert_mapreduce_directory_space.py
deleted file mode 100644
index 2124ad1..0000000
--- a/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/MAPREDUCE/package/files/alert_mapreduce_directory_space.py
+++ /dev/null
@@ -1,95 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements. See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership. The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import collections
-import os
-import platform
-
-RESULT_CODE_OK = 'OK'
-RESULT_CODE_CRITICAL = 'CRITICAL'
-RESULT_CODE_UNKNOWN = 'UNKNOWN'
-
-MAPREDUCE_LOCAL_DIR_KEY = '{{mapred-site/mapred.local.dir}}'
-
-def get_tokens():
- """
- Returns a tuple of tokens in the format {{site/property}} that will be used
- to build the dictionary passed into execute
- """
- return (MAPREDUCE_LOCAL_DIR_KEY,)
-
-
-def execute(parameters=None, host_name=None):
- """
- Returns a tuple containing the result code and a pre-formatted result label
-
- Keyword arguments:
- parameters (dictionary): a mapping of parameter key to value
- host_name (string): the name of this host where the alert is running
- """
- if parameters is None:
- return (('UNKNOWN', ['There were no parameters supplied to the script.']))
-
- mapreduce_local_directories = None
- if MAPREDUCE_LOCAL_DIR_KEY in parameters:
- mapreduce_local_directories = parameters[MAPREDUCE_LOCAL_DIR_KEY]
-
- if MAPREDUCE_LOCAL_DIR_KEY is None:
- return (('UNKNOWN', ['The MapReduce Local Directory is required.']))
-
- directory_list = mapreduce_local_directories.split(",")
- for directory in directory_list:
- disk_usage = None
- try:
- disk_usage = _get_disk_usage(directory)
- except NotImplementedError, platform_error:
- return (RESULT_CODE_UNKNOWN, [str(platform_error)])
-
- if disk_usage is None or disk_usage.total == 0:
- return (RESULT_CODE_UNKNOWN, ['Unable to determine the disk usage.'])
-
- percent = disk_usage.used / float(disk_usage.total) * 100
-
- if percent > 85:
- message = 'The disk usage of {0} is {1:d}%'.format(directory,percent)
- return (RESULT_CODE_CRITICAL, [message])
-
- return (RESULT_CODE_OK, ["All MapReduce local directories have sufficient space."])
-
-
-def _get_disk_usage(path):
- """
- returns a named tuple that contains the total, used, and free disk space
- in bytes
- """
- used = 0
- total = 0
- free = 0
-
- if 'statvfs' in dir(os):
- disk_stats = os.statvfs(path)
- free = disk_stats.f_bavail * disk_stats.f_frsize
- total = disk_stats.f_blocks * disk_stats.f_frsize
- used = (disk_stats.f_blocks - disk_stats.f_bfree) * disk_stats.f_frsize
- else:
- raise NotImplementedError("{0} is not a supported platform for this alert".format(platform.platform()))
-
- DiskInfo = collections.namedtuple('DiskInfo', 'total used free')
- return DiskInfo(total=total, used=used, free=free)
http://git-wip-us.apache.org/repos/asf/ambari/blob/ec37c603/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/OOZIE/alerts.json
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/OOZIE/alerts.json b/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/OOZIE/alerts.json
index 8a1646b..df8ab0f 100644
--- a/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/OOZIE/alerts.json
+++ b/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/OOZIE/alerts.json
@@ -34,7 +34,7 @@
"scope": "ANY",
"source": {
"type": "SCRIPT",
- "path": "HDP/1.3.2/services/OOZIE/package/files/alert_check_oozie_server.py"
+ "path": "HDP/1.3.2/services/OOZIE/package/alerts/alert_check_oozie_server.py"
}
}
]
http://git-wip-us.apache.org/repos/asf/ambari/blob/ec37c603/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/OOZIE/package/alerts/alert_check_oozie_server.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/OOZIE/package/alerts/alert_check_oozie_server.py b/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/OOZIE/package/alerts/alert_check_oozie_server.py
new file mode 100644
index 0000000..4e3e6ae
--- /dev/null
+++ b/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/OOZIE/package/alerts/alert_check_oozie_server.py
@@ -0,0 +1,81 @@
+#!/usr/bin/env python
+
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from resource_management import *
+from resource_management.libraries.functions import format
+from resource_management.libraries.functions import get_kinit_path
+from resource_management.core.environment import Environment
+
+RESULT_CODE_OK = 'OK'
+RESULT_CODE_CRITICAL = 'CRITICAL'
+RESULT_CODE_UNKNOWN = 'UNKNOWN'
+
+OOZIE_URL_KEY = '{{oozie-site/oozie.base.url}}'
+SECURITY_ENABLED = '{{cluster-env/security_enabled}}'
+SMOKEUSER_KEY = '{{cluster-env/smokeuser}}'
+SMOKEUSER_KEYTAB_KEY = '{{cluster-env/smokeuser_keytab}}'
+
+def get_tokens():
+ """
+ Returns a tuple of tokens in the format {{site/property}} that will be used
+ to build the dictionary passed into execute
+ """
+ return (OOZIE_URL_KEY, SMOKEUSER_KEY, SECURITY_ENABLED,SMOKEUSER_KEYTAB_KEY)
+
+def execute(parameters=None, host_name=None):
+ """
+ Returns a tuple containing the result code and a pre-formatted result label
+
+ Keyword arguments:
+ parameters (dictionary): a mapping of parameter key to value
+ host_name (string): the name of this host where the alert is running
+ """
+
+ if parameters is None:
+ return (RESULT_CODE_UNKNOWN, ['There were no parameters supplied to the script.'])
+
+ security_enabled = False
+ if set([OOZIE_URL_KEY, SMOKEUSER_KEY, SECURITY_ENABLED]).issubset(parameters):
+ oozie_url = parameters[OOZIE_URL_KEY]
+ smokeuser = parameters[SMOKEUSER_KEY]
+ security_enabled = str(parameters[SECURITY_ENABLED]).upper() == 'TRUE'
+ else:
+ return (RESULT_CODE_UNKNOWN, ['The Oozie URL and Smokeuser are a required parameters.'])
+
+ try:
+ if security_enabled:
+ if set([SMOKEUSER_KEYTAB_KEY]).issubset(parameters):
+ smokeuser_keytab = parameters[SMOKEUSER_KEYTAB_KEY]
+ else:
+ return (RESULT_CODE_UNKNOWN, ['The Smokeuser keytab is required when security is enabled.'])
+ kinit_path_local = get_kinit_path(["/usr/bin", "/usr/kerberos/bin", "/usr/sbin"])
+ kinitcmd = format("{kinit_path_local} -kt {smokeuser_keytab} {smokeuser}; ")
+
+ Execute(kinitcmd,
+ user=smokeuser,
+ )
+
+ Execute(format("source /etc/oozie/conf/oozie-env.sh ; oozie admin -oozie {oozie_url} -status"),
+ user=smokeuser,
+ )
+ return (RESULT_CODE_OK, ["Oozie check success"])
+
+ except Exception, ex:
+ return (RESULT_CODE_CRITICAL, [str(ex)])
http://git-wip-us.apache.org/repos/asf/ambari/blob/ec37c603/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/OOZIE/package/files/alert_check_oozie_server.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/OOZIE/package/files/alert_check_oozie_server.py b/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/OOZIE/package/files/alert_check_oozie_server.py
deleted file mode 100644
index 7bf1255..0000000
--- a/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/OOZIE/package/files/alert_check_oozie_server.py
+++ /dev/null
@@ -1,74 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements. See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership. The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import subprocess
-from subprocess import CalledProcessError
-
-RESULT_CODE_OK = 'OK'
-RESULT_CODE_CRITICAL = 'CRITICAL'
-RESULT_CODE_UNKNOWN = 'UNKNOWN'
-
-OOZIE_URL_KEY = '{{oozie-site/oozie.base.url}}'
-
-def get_tokens():
- """
- Returns a tuple of tokens in the format {{site/property}} that will be used
- to build the dictionary passed into execute
- """
- return (OOZIE_URL_KEY)
-
-
-def execute(parameters=None, host_name=None):
- """
- Returns a tuple containing the result code and a pre-formatted result label
-
- Keyword arguments:
- parameters (dictionary): a mapping of parameter key to value
- host_name (string): the name of this host where the alert is running
- """
-
- if parameters is None:
- return (RESULT_CODE_UNKNOWN, ['There were no parameters supplied to the script.'])
-
- oozie_url = None
- if OOZIE_URL_KEY in parameters:
- oozie_url = parameters[OOZIE_URL_KEY]
-
- if oozie_url is None:
- return (RESULT_CODE_UNKNOWN, ['The Oozie URL is a required parameter.'])
-
- try:
- # oozie admin -oozie http://server:11000/oozie -status
- oozie_process = subprocess.Popen(['oozie', 'admin', '-oozie',
- oozie_url, '-status'], stderr=subprocess.PIPE, stdout=subprocess.PIPE)
-
- oozie_output, oozie_error = oozie_process.communicate()
- oozie_return_code = oozie_process.returncode
-
- if oozie_return_code == 0:
- # strip trailing newlines
- oozie_output = str(oozie_output).strip('\n')
- return (RESULT_CODE_OK, [oozie_output])
- else:
- oozie_error = str(oozie_error).strip('\n')
- return (RESULT_CODE_CRITICAL, [oozie_error])
-
- except CalledProcessError, cpe:
- return (RESULT_CODE_CRITICAL, [str(cpe)])
http://git-wip-us.apache.org/repos/asf/ambari/blob/ec37c603/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/YARN/alerts.json
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/YARN/alerts.json b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/YARN/alerts.json
index d4a2076..32dc474 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/YARN/alerts.json
+++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/YARN/alerts.json
@@ -199,7 +199,7 @@
"enabled": true,
"source": {
"type": "SCRIPT",
- "path": "HDP/2.0.6/services/YARN/package/files/alert_nodemanager_health.py"
+ "path": "HDP/2.0.6/services/YARN/package/alerts/alert_nodemanager_health.py"
}
}
],
http://git-wip-us.apache.org/repos/asf/ambari/blob/ec37c603/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/YARN/package/alerts/alert_nodemanager_health.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/YARN/package/alerts/alert_nodemanager_health.py b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/YARN/package/alerts/alert_nodemanager_health.py
new file mode 100644
index 0000000..b1de951
--- /dev/null
+++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/YARN/package/alerts/alert_nodemanager_health.py
@@ -0,0 +1,123 @@
+#!/usr/bin/env python
+
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import json
+import socket
+import urllib2
+
+RESULT_CODE_OK = 'OK'
+RESULT_CODE_CRITICAL = 'CRITICAL'
+RESULT_CODE_UNKNOWN = 'UNKNOWN'
+
+NODEMANAGER_HTTP_ADDRESS_KEY = '{{yarn-site/yarn.nodemanager.webapp.address}}'
+NODEMANAGER_HTTPS_ADDRESS_KEY = '{{yarn-site/yarn.nodemanager.webapp.https.address}}'
+YARN_HTTP_POLICY_KEY = '{{yarn-site/yarn.http.policy}}'
+
+OK_MESSAGE = 'NodeManager Healthy'
+CRITICAL_CONNECTION_MESSAGE = 'Connection failed to {0}'
+CRITICAL_NODEMANAGER_STATUS_MESSAGE = 'NodeManager returned an unexpected status of "{0}"'
+CRITICAL_NODEMANAGER_UNKNOWN_JSON_MESSAGE = 'Unable to determine NodeManager health from unexpected JSON response'
+
+NODEMANAGER_DEFAULT_PORT = 8042
+
+def get_tokens():
+ """
+ Returns a tuple of tokens in the format {{site/property}} that will be used
+ to build the dictionary passed into execute
+ """
+ return (NODEMANAGER_HTTP_ADDRESS_KEY,NODEMANAGER_HTTPS_ADDRESS_KEY,
+ YARN_HTTP_POLICY_KEY)
+
+
+def execute(parameters=None, host_name=None):
+ """
+ Returns a tuple containing the result code and a pre-formatted result label
+
+ Keyword arguments:
+ parameters (dictionary): a mapping of parameter key to value
+ host_name (string): the name of this host where the alert is running
+ """
+ result_code = RESULT_CODE_UNKNOWN
+
+ if parameters is None:
+ return (result_code, ['There were no parameters supplied to the script.'])
+
+ scheme = 'http'
+ http_uri = None
+ https_uri = None
+ http_policy = 'HTTP_ONLY'
+
+ if NODEMANAGER_HTTP_ADDRESS_KEY in parameters:
+ http_uri = parameters[NODEMANAGER_HTTP_ADDRESS_KEY]
+
+ if NODEMANAGER_HTTPS_ADDRESS_KEY in parameters:
+ https_uri = parameters[NODEMANAGER_HTTPS_ADDRESS_KEY]
+
+ if YARN_HTTP_POLICY_KEY in parameters:
+ http_policy = parameters[YARN_HTTP_POLICY_KEY]
+
+ # determine the right URI and whether to use SSL
+ uri = http_uri
+ if http_policy == 'HTTPS_ONLY':
+ scheme = 'https'
+
+ if https_uri is not None:
+ uri = https_uri
+
+ label = ''
+ url_response = None
+ node_healthy = 'false'
+ total_time = 0
+
+ # some yarn-site structures don't have the web ui address
+ if uri is None:
+ if host_name is None:
+ host_name = socket.getfqdn()
+
+ uri = '{0}:{1}'.format(host_name, NODEMANAGER_DEFAULT_PORT)
+
+ try:
+ query = "{0}://{1}/ws/v1/node/info".format(scheme,uri)
+
+ # execute the query for the JSON that includes templeton status
+ url_response = urllib2.urlopen(query)
+ except:
+ label = CRITICAL_CONNECTION_MESSAGE.format(uri)
+ return (RESULT_CODE_CRITICAL, [label])
+
+ # URL response received, parse it
+ try:
+ json_response = json.loads(url_response.read())
+ node_healthy = json_response['nodeInfo']['nodeHealthy']
+
+ # convert boolean to string
+ node_healthy = str(node_healthy)
+ except:
+ return (RESULT_CODE_CRITICAL, [query])
+
+ # proper JSON received, compare against known value
+ if node_healthy.lower() == 'true':
+ result_code = RESULT_CODE_OK
+ label = OK_MESSAGE
+ else:
+ result_code = RESULT_CODE_CRITICAL
+ label = CRITICAL_NODEMANAGER_STATUS_MESSAGE.format(node_healthy)
+
+ return (result_code, [label])
http://git-wip-us.apache.org/repos/asf/ambari/blob/ec37c603/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/YARN/package/files/alert_nodemanager_health.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/YARN/package/files/alert_nodemanager_health.py b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/YARN/package/files/alert_nodemanager_health.py
deleted file mode 100644
index b1de951..0000000
--- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/YARN/package/files/alert_nodemanager_health.py
+++ /dev/null
@@ -1,123 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements. See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership. The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import json
-import socket
-import urllib2
-
-RESULT_CODE_OK = 'OK'
-RESULT_CODE_CRITICAL = 'CRITICAL'
-RESULT_CODE_UNKNOWN = 'UNKNOWN'
-
-NODEMANAGER_HTTP_ADDRESS_KEY = '{{yarn-site/yarn.nodemanager.webapp.address}}'
-NODEMANAGER_HTTPS_ADDRESS_KEY = '{{yarn-site/yarn.nodemanager.webapp.https.address}}'
-YARN_HTTP_POLICY_KEY = '{{yarn-site/yarn.http.policy}}'
-
-OK_MESSAGE = 'NodeManager Healthy'
-CRITICAL_CONNECTION_MESSAGE = 'Connection failed to {0}'
-CRITICAL_NODEMANAGER_STATUS_MESSAGE = 'NodeManager returned an unexpected status of "{0}"'
-CRITICAL_NODEMANAGER_UNKNOWN_JSON_MESSAGE = 'Unable to determine NodeManager health from unexpected JSON response'
-
-NODEMANAGER_DEFAULT_PORT = 8042
-
-def get_tokens():
- """
- Returns a tuple of tokens in the format {{site/property}} that will be used
- to build the dictionary passed into execute
- """
- return (NODEMANAGER_HTTP_ADDRESS_KEY,NODEMANAGER_HTTPS_ADDRESS_KEY,
- YARN_HTTP_POLICY_KEY)
-
-
-def execute(parameters=None, host_name=None):
- """
- Returns a tuple containing the result code and a pre-formatted result label
-
- Keyword arguments:
- parameters (dictionary): a mapping of parameter key to value
- host_name (string): the name of this host where the alert is running
- """
- result_code = RESULT_CODE_UNKNOWN
-
- if parameters is None:
- return (result_code, ['There were no parameters supplied to the script.'])
-
- scheme = 'http'
- http_uri = None
- https_uri = None
- http_policy = 'HTTP_ONLY'
-
- if NODEMANAGER_HTTP_ADDRESS_KEY in parameters:
- http_uri = parameters[NODEMANAGER_HTTP_ADDRESS_KEY]
-
- if NODEMANAGER_HTTPS_ADDRESS_KEY in parameters:
- https_uri = parameters[NODEMANAGER_HTTPS_ADDRESS_KEY]
-
- if YARN_HTTP_POLICY_KEY in parameters:
- http_policy = parameters[YARN_HTTP_POLICY_KEY]
-
- # determine the right URI and whether to use SSL
- uri = http_uri
- if http_policy == 'HTTPS_ONLY':
- scheme = 'https'
-
- if https_uri is not None:
- uri = https_uri
-
- label = ''
- url_response = None
- node_healthy = 'false'
- total_time = 0
-
- # some yarn-site structures don't have the web ui address
- if uri is None:
- if host_name is None:
- host_name = socket.getfqdn()
-
- uri = '{0}:{1}'.format(host_name, NODEMANAGER_DEFAULT_PORT)
-
- try:
- query = "{0}://{1}/ws/v1/node/info".format(scheme,uri)
-
- # execute the query for the JSON that includes templeton status
- url_response = urllib2.urlopen(query)
- except:
- label = CRITICAL_CONNECTION_MESSAGE.format(uri)
- return (RESULT_CODE_CRITICAL, [label])
-
- # URL response received, parse it
- try:
- json_response = json.loads(url_response.read())
- node_healthy = json_response['nodeInfo']['nodeHealthy']
-
- # convert boolean to string
- node_healthy = str(node_healthy)
- except:
- return (RESULT_CODE_CRITICAL, [query])
-
- # proper JSON received, compare against known value
- if node_healthy.lower() == 'true':
- result_code = RESULT_CODE_OK
- label = OK_MESSAGE
- else:
- result_code = RESULT_CODE_CRITICAL
- label = CRITICAL_NODEMANAGER_STATUS_MESSAGE.format(node_healthy)
-
- return (result_code, [label])
[2/2] ambari git commit: AMBARI-8755. Oozie server check alert fails
in secured mode (aonishuk)
Posted by ao...@apache.org.
AMBARI-8755. Oozie server check alert fails in secured mode (aonishuk)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/ec37c603
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/ec37c603
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/ec37c603
Branch: refs/heads/trunk
Commit: ec37c603c92969251f6c89318d403452f555b7ba
Parents: 79cffa1
Author: Andrew Onishuk <ao...@hortonworks.com>
Authored: Thu Dec 25 16:28:34 2014 +0200
Committer: Andrew Onishuk <ao...@hortonworks.com>
Committed: Thu Dec 25 16:28:34 2014 +0200
----------------------------------------------------------------------
.../python/ambari_agent/alerts/script_alert.py | 43 +++--
.../src/test/python/ambari_agent/TestAlerts.py | 4 +-
.../common-services/AMS/0.1.0/alerts.json | 2 +-
.../alerts/alert_ambari_metrics_monitor.py | 80 +++++++++
.../files/alert_ambari_metrics_monitor.py | 80 ---------
.../common-services/FLUME/1.4.0.2.0/alerts.json | 2 +-
.../package/alerts/alert_flume_agent_status.py | 99 +++++++++++
.../package/files/alert_flume_agent_status.py | 99 -----------
.../common-services/HDFS/2.1.0.2.0/alerts.json | 4 +-
.../package/alerts/alert_checkpoint_time.py | 136 +++++++++++++++
.../package/alerts/alert_ha_namenode_health.py | 166 +++++++++++++++++++
.../package/files/alert_checkpoint_time.py | 136 ---------------
.../package/files/alert_ha_namenode_health.py | 166 -------------------
.../common-services/HIVE/0.12.0.2.0/alerts.json | 4 +-
.../package/alerts/alert_hive_thrift_port.py | 124 ++++++++++++++
.../package/alerts/alert_webhcat_server.py | 111 +++++++++++++
.../package/files/alert_hive_thrift_port.py | 127 --------------
.../package/files/alert_webhcat_server.py | 111 -------------
.../common-services/OOZIE/4.0.0.2.0/alerts.json | 2 +-
.../package/alerts/alert_check_oozie_server.py | 81 +++++++++
.../package/files/alert_check_oozie_server.py | 74 ---------
.../stacks/HDP/1.3.2/services/HIVE/alerts.json | 4 +-
.../package/alerts/alert_hive_thrift_port.py | 124 ++++++++++++++
.../HIVE/package/alerts/alert_webhcat_server.py | 111 +++++++++++++
.../package/files/alert_hive_thrift_port.py | 127 --------------
.../HIVE/package/files/alert_webhcat_server.py | 111 -------------
.../HDP/1.3.2/services/MAPREDUCE/alerts.json | 2 +-
.../alerts/alert_mapreduce_directory_space.py | 93 +++++++++++
.../files/alert_mapreduce_directory_space.py | 95 -----------
.../stacks/HDP/1.3.2/services/OOZIE/alerts.json | 2 +-
.../package/alerts/alert_check_oozie_server.py | 81 +++++++++
.../package/files/alert_check_oozie_server.py | 74 ---------
.../stacks/HDP/2.0.6/services/YARN/alerts.json | 2 +-
.../package/alerts/alert_nodemanager_health.py | 123 ++++++++++++++
.../package/files/alert_nodemanager_health.py | 123 --------------
35 files changed, 1371 insertions(+), 1352 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/ambari/blob/ec37c603/ambari-agent/src/main/python/ambari_agent/alerts/script_alert.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/python/ambari_agent/alerts/script_alert.py b/ambari-agent/src/main/python/ambari_agent/alerts/script_alert.py
index f39ab6f..660bddf 100644
--- a/ambari-agent/src/main/python/ambari_agent/alerts/script_alert.py
+++ b/ambari-agent/src/main/python/ambari_agent/alerts/script_alert.py
@@ -21,7 +21,9 @@ limitations under the License.
import imp
import logging
import os
+import re
from alerts.base_alert import BaseAlert
+from resource_management.core.environment import Environment
from symbol import parameters
logger = logging.getLogger()
@@ -43,6 +45,7 @@ class ScriptAlert(BaseAlert):
self.stacks_dir = None
self.common_services_dir = None
self.host_scripts_dir = None
+ self.path_to_script = None
if 'path' in alert_source_meta:
self.path = alert_source_meta['path']
@@ -81,8 +84,16 @@ class ScriptAlert(BaseAlert):
parameters = {}
for key in self.config_value_dict:
parameters['{{' + key + '}}'] = self.config_value_dict[key]
-
- return cmd_module.execute(parameters, self.host_name)
+
+ # try to get basedir for scripts
+ # it's needed for server side scripts to properly use resource management
+ matchObj = re.match( r'((.*)services\/(.*)\/package\/)', self.path_to_script)
+ if matchObj:
+ basedir = matchObj.group(1)
+ with Environment(basedir) as env:
+ return cmd_module.execute(parameters, self.host_name)
+ else:
+ return cmd_module.execute(parameters, self.host_name)
else:
return (self.RESULT_UNKNOWN, ["Unable to execute script {0}".format(self.path)])
@@ -92,35 +103,35 @@ class ScriptAlert(BaseAlert):
raise Exception("The attribute 'path' must be specified")
paths = self.path.split('/')
- path_to_script = self.path
+ self.path_to_script = self.path
# if the path doesn't exist and stacks dir is defined, try that
- if not os.path.exists(path_to_script) and self.stacks_dir is not None:
- path_to_script = os.path.join(self.stacks_dir, *paths)
+ if not os.path.exists(self.path_to_script) and self.stacks_dir is not None:
+ self.path_to_script = os.path.join(self.stacks_dir, *paths)
# if the path doesn't exist and common services dir is defined, try that
- if not os.path.exists(path_to_script) and self.common_services_dir is not None:
- path_to_script = os.path.join(self.common_services_dir, *paths)
+ if not os.path.exists(self.path_to_script) and self.common_services_dir is not None:
+ self.path_to_script = os.path.join(self.common_services_dir, *paths)
# if the path doesn't exist and the host script dir is defined, try that
- if not os.path.exists(path_to_script) and self.host_scripts_dir is not None:
- path_to_script = os.path.join(self.host_scripts_dir, *paths)
+ if not os.path.exists(self.path_to_script) and self.host_scripts_dir is not None:
+ self.path_to_script = os.path.join(self.host_scripts_dir, *paths)
# if the path can't be evaluated, throw exception
- if not os.path.exists(path_to_script) or not os.path.isfile(path_to_script):
+ if not os.path.exists(self.path_to_script) or not os.path.isfile(self.path_to_script):
raise Exception(
"Unable to find '{0}' as an absolute path or part of {1} or {2}".format(self.path,
self.stacks_dir, self.host_scripts_dir))
if logger.isEnabledFor(logging.DEBUG):
- logger.debug("Executing script check {0}".format(path_to_script))
+ logger.debug("Executing script check {0}".format(self.path_to_script))
- if (not path_to_script.endswith('.py')):
- logger.error("Unable to execute script {0}".format(path_to_script))
+ if (not self.path_to_script.endswith('.py')):
+ logger.error("Unable to execute script {0}".format(self.path_to_script))
return None
-
- return imp.load_source(self._find_value('name'), path_to_script)
+
+ return imp.load_source(self._find_value('name'), self.path_to_script)
def _get_reporting_text(self, state):
@@ -132,4 +143,4 @@ class ScriptAlert(BaseAlert):
:param state: the state of the alert in uppercase (such as OK, WARNING, etc)
:return: the parameterized text
'''
- return '{0}'
\ No newline at end of file
+ return '{0}'
http://git-wip-us.apache.org/repos/asf/ambari/blob/ec37c603/ambari-agent/src/test/python/ambari_agent/TestAlerts.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/test/python/ambari_agent/TestAlerts.py b/ambari-agent/src/test/python/ambari_agent/TestAlerts.py
index 34911b5..0ac1e00 100644
--- a/ambari-agent/src/test/python/ambari_agent/TestAlerts.py
+++ b/ambari-agent/src/test/python/ambari_agent/TestAlerts.py
@@ -21,6 +21,7 @@ limitations under the License.
import os
import socket
import sys
+import re
from ambari_agent.AlertSchedulerHandler import AlertSchedulerHandler
from ambari_agent.alerts.collector import AlertCollector
@@ -31,7 +32,7 @@ from ambari_agent.alerts.web_alert import WebAlert
from ambari_agent.apscheduler.scheduler import Scheduler
from collections import namedtuple
-from mock.mock import patch
+from mock.mock import MagicMock, patch
from unittest import TestCase
class TestAlerts(TestCase):
@@ -195,6 +196,7 @@ class TestAlerts(TestCase):
pa.collect()
+ @patch.object(re, 'match', new = MagicMock())
def test_script_alert(self):
json = {
"name": "namenode_process",
http://git-wip-us.apache.org/repos/asf/ambari/blob/ec37c603/ambari-server/src/main/resources/common-services/AMS/0.1.0/alerts.json
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/AMS/0.1.0/alerts.json b/ambari-server/src/main/resources/common-services/AMS/0.1.0/alerts.json
index 700f021..93b224a 100644
--- a/ambari-server/src/main/resources/common-services/AMS/0.1.0/alerts.json
+++ b/ambari-server/src/main/resources/common-services/AMS/0.1.0/alerts.json
@@ -147,7 +147,7 @@
"scope": "ANY",
"source": {
"type": "SCRIPT",
- "path": "AMS/0.1.0/package/files/alert_ambari_metrics_monitor.py"
+ "path": "AMS/0.1.0/package/alerts/alert_ambari_metrics_monitor.py"
}
}
]
http://git-wip-us.apache.org/repos/asf/ambari/blob/ec37c603/ambari-server/src/main/resources/common-services/AMS/0.1.0/package/alerts/alert_ambari_metrics_monitor.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/AMS/0.1.0/package/alerts/alert_ambari_metrics_monitor.py b/ambari-server/src/main/resources/common-services/AMS/0.1.0/package/alerts/alert_ambari_metrics_monitor.py
new file mode 100644
index 0000000..5841267
--- /dev/null
+++ b/ambari-server/src/main/resources/common-services/AMS/0.1.0/package/alerts/alert_ambari_metrics_monitor.py
@@ -0,0 +1,80 @@
+#!/usr/bin/env python
+
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import socket
+
+from resource_management.libraries.functions.check_process_status import check_process_status
+from resource_management.core.exceptions import ComponentIsNotRunning
+
+RESULT_CODE_OK = 'OK'
+RESULT_CODE_CRITICAL = 'CRITICAL'
+RESULT_CODE_UNKNOWN = 'UNKNOWN'
+
+AMS_MONITOR_PID_PATH = '/var/run/ambari-metrics-monitor/ambari-metrics-monitor.pid'
+
+
+def get_tokens():
+ """
+ Returns a tuple of tokens in the format {{site/property}} that will be used
+ to build the dictionary passed into execute
+ """
+ return ()
+
+
+def is_monitor_process_live(pid_file):
+ """
+ Gets whether the AMS monitor represented by the specified file is running.
+ :param pid_file: the PID file of the monitor to check
+ :return: True if the monitor is running, False otherwise
+ """
+ live = False
+
+ try:
+ check_process_status(pid_file)
+ live = True
+ except ComponentIsNotRunning:
+ pass
+
+ return live
+
+
+def execute(parameters=None, host_name=None):
+ """
+ Returns a tuple containing the result code and a pre-formatted result label
+
+ Keyword arguments:
+ parameters (dictionary): a mapping of parameter key to value
+ host_name (string): the name of this host where the alert is running
+ """
+
+ if parameters is None:
+ return (RESULT_CODE_UNKNOWN, ['There were no parameters supplied to the script.'])
+
+ if host_name is None:
+ host_name = socket.getfqdn()
+
+ ams_monitor_process_running = is_monitor_process_live(AMS_MONITOR_PID_PATH)
+
+ alert_state = RESULT_CODE_OK if ams_monitor_process_running else RESULT_CODE_CRITICAL
+
+ alert_label = 'Ambari Monitor is running on {0}' if ams_monitor_process_running else 'Ambari Monitor is NOT running on {0}'
+ alert_label = alert_label.format(host_name)
+
+ return (alert_state, [alert_label])
http://git-wip-us.apache.org/repos/asf/ambari/blob/ec37c603/ambari-server/src/main/resources/common-services/AMS/0.1.0/package/files/alert_ambari_metrics_monitor.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/AMS/0.1.0/package/files/alert_ambari_metrics_monitor.py b/ambari-server/src/main/resources/common-services/AMS/0.1.0/package/files/alert_ambari_metrics_monitor.py
deleted file mode 100644
index 5841267..0000000
--- a/ambari-server/src/main/resources/common-services/AMS/0.1.0/package/files/alert_ambari_metrics_monitor.py
+++ /dev/null
@@ -1,80 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements. See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership. The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import socket
-
-from resource_management.libraries.functions.check_process_status import check_process_status
-from resource_management.core.exceptions import ComponentIsNotRunning
-
-RESULT_CODE_OK = 'OK'
-RESULT_CODE_CRITICAL = 'CRITICAL'
-RESULT_CODE_UNKNOWN = 'UNKNOWN'
-
-AMS_MONITOR_PID_PATH = '/var/run/ambari-metrics-monitor/ambari-metrics-monitor.pid'
-
-
-def get_tokens():
- """
- Returns a tuple of tokens in the format {{site/property}} that will be used
- to build the dictionary passed into execute
- """
- return ()
-
-
-def is_monitor_process_live(pid_file):
- """
- Gets whether the AMS monitor represented by the specified file is running.
- :param pid_file: the PID file of the monitor to check
- :return: True if the monitor is running, False otherwise
- """
- live = False
-
- try:
- check_process_status(pid_file)
- live = True
- except ComponentIsNotRunning:
- pass
-
- return live
-
-
-def execute(parameters=None, host_name=None):
- """
- Returns a tuple containing the result code and a pre-formatted result label
-
- Keyword arguments:
- parameters (dictionary): a mapping of parameter key to value
- host_name (string): the name of this host where the alert is running
- """
-
- if parameters is None:
- return (RESULT_CODE_UNKNOWN, ['There were no parameters supplied to the script.'])
-
- if host_name is None:
- host_name = socket.getfqdn()
-
- ams_monitor_process_running = is_monitor_process_live(AMS_MONITOR_PID_PATH)
-
- alert_state = RESULT_CODE_OK if ams_monitor_process_running else RESULT_CODE_CRITICAL
-
- alert_label = 'Ambari Monitor is running on {0}' if ams_monitor_process_running else 'Ambari Monitor is NOT running on {0}'
- alert_label = alert_label.format(host_name)
-
- return (alert_state, [alert_label])
http://git-wip-us.apache.org/repos/asf/ambari/blob/ec37c603/ambari-server/src/main/resources/common-services/FLUME/1.4.0.2.0/alerts.json
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/FLUME/1.4.0.2.0/alerts.json b/ambari-server/src/main/resources/common-services/FLUME/1.4.0.2.0/alerts.json
index b40afd9..865c471 100644
--- a/ambari-server/src/main/resources/common-services/FLUME/1.4.0.2.0/alerts.json
+++ b/ambari-server/src/main/resources/common-services/FLUME/1.4.0.2.0/alerts.json
@@ -10,7 +10,7 @@
"scope": "ANY",
"source": {
"type": "SCRIPT",
- "path": "FLUME/1.4.0.2.0/package/files/alert_flume_agent_status.py"
+ "path": "FLUME/1.4.0.2.0/package/alerts/alert_flume_agent_status.py"
}
}
]
http://git-wip-us.apache.org/repos/asf/ambari/blob/ec37c603/ambari-server/src/main/resources/common-services/FLUME/1.4.0.2.0/package/alerts/alert_flume_agent_status.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/FLUME/1.4.0.2.0/package/alerts/alert_flume_agent_status.py b/ambari-server/src/main/resources/common-services/FLUME/1.4.0.2.0/package/alerts/alert_flume_agent_status.py
new file mode 100644
index 0000000..b183bbc
--- /dev/null
+++ b/ambari-server/src/main/resources/common-services/FLUME/1.4.0.2.0/package/alerts/alert_flume_agent_status.py
@@ -0,0 +1,99 @@
+#!/usr/bin/env python
+
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import socket
+
+from resource_management.libraries.functions.flume_agent_helper import find_expected_agent_names
+from resource_management.libraries.functions.flume_agent_helper import get_flume_status
+
+RESULT_CODE_OK = 'OK'
+RESULT_CODE_CRITICAL = 'CRITICAL'
+RESULT_CODE_UNKNOWN = 'UNKNOWN'
+
+FLUME_CONF_DIR_KEY = '{{flume-env/flume_conf_dir}}'
+
+FLUME_RUN_DIR = '/var/run/flume'
+
+def get_tokens():
+ """
+ Returns a tuple of tokens in the format {{site/property}} that will be used
+ to build the dictionary passed into execute
+ """
+ return (FLUME_CONF_DIR_KEY,)
+
+
+def execute(parameters=None, host_name=None):
+ """
+ Returns a tuple containing the result code and a pre-formatted result label
+
+ Keyword arguments:
+ parameters (dictionary): a mapping of parameter key to value
+ host_name (string): the name of this host where the alert is running
+ """
+
+ if parameters is None:
+ return (RESULT_CODE_UNKNOWN, ['There were no parameters supplied to the script.'])
+
+ flume_conf_directory = None
+ if FLUME_CONF_DIR_KEY in parameters:
+ flume_conf_directory = parameters[FLUME_CONF_DIR_KEY]
+
+ if flume_conf_directory is None:
+ return (RESULT_CODE_UNKNOWN, ['The Flume configuration directory is a required parameter.'])
+
+ if host_name is None:
+ host_name = socket.getfqdn()
+
+ processes = get_flume_status(flume_conf_directory, FLUME_RUN_DIR)
+ expected_agents = find_expected_agent_names(flume_conf_directory)
+
+ alert_label = ''
+ alert_state = RESULT_CODE_OK
+
+ if len(processes) == 0 and len(expected_agents) == 0:
+ alert_label = 'No agents defined on {0}'.format(host_name)
+ else:
+ ok = []
+ critical = []
+ text_arr = []
+
+ for process in processes:
+ if not process.has_key('status') or process['status'] == 'NOT_RUNNING':
+ critical.append(process['name'])
+ else:
+ ok.append(process['name'])
+
+ if len(critical) > 0:
+ text_arr.append("{0} {1} NOT running".format(", ".join(critical),
+ "is" if len(critical) == 1 else "are"))
+
+ if len(ok) > 0:
+ text_arr.append("{0} {1} running".format(", ".join(ok),
+ "is" if len(ok) == 1 else "are"))
+
+ plural = len(critical) > 1 or len(ok) > 1
+ alert_label = "Agent{0} {1} {2}".format(
+ "s" if plural else "",
+ " and ".join(text_arr),
+ "on " + host_name)
+
+ alert_state = RESULT_CODE_CRITICAL if len(critical) > 0 else RESULT_CODE_OK
+
+ return (alert_state, [alert_label])
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/ambari/blob/ec37c603/ambari-server/src/main/resources/common-services/FLUME/1.4.0.2.0/package/files/alert_flume_agent_status.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/FLUME/1.4.0.2.0/package/files/alert_flume_agent_status.py b/ambari-server/src/main/resources/common-services/FLUME/1.4.0.2.0/package/files/alert_flume_agent_status.py
deleted file mode 100644
index b183bbc..0000000
--- a/ambari-server/src/main/resources/common-services/FLUME/1.4.0.2.0/package/files/alert_flume_agent_status.py
+++ /dev/null
@@ -1,99 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements. See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership. The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import socket
-
-from resource_management.libraries.functions.flume_agent_helper import find_expected_agent_names
-from resource_management.libraries.functions.flume_agent_helper import get_flume_status
-
-RESULT_CODE_OK = 'OK'
-RESULT_CODE_CRITICAL = 'CRITICAL'
-RESULT_CODE_UNKNOWN = 'UNKNOWN'
-
-FLUME_CONF_DIR_KEY = '{{flume-env/flume_conf_dir}}'
-
-FLUME_RUN_DIR = '/var/run/flume'
-
-def get_tokens():
- """
- Returns a tuple of tokens in the format {{site/property}} that will be used
- to build the dictionary passed into execute
- """
- return (FLUME_CONF_DIR_KEY,)
-
-
-def execute(parameters=None, host_name=None):
- """
- Returns a tuple containing the result code and a pre-formatted result label
-
- Keyword arguments:
- parameters (dictionary): a mapping of parameter key to value
- host_name (string): the name of this host where the alert is running
- """
-
- if parameters is None:
- return (RESULT_CODE_UNKNOWN, ['There were no parameters supplied to the script.'])
-
- flume_conf_directory = None
- if FLUME_CONF_DIR_KEY in parameters:
- flume_conf_directory = parameters[FLUME_CONF_DIR_KEY]
-
- if flume_conf_directory is None:
- return (RESULT_CODE_UNKNOWN, ['The Flume configuration directory is a required parameter.'])
-
- if host_name is None:
- host_name = socket.getfqdn()
-
- processes = get_flume_status(flume_conf_directory, FLUME_RUN_DIR)
- expected_agents = find_expected_agent_names(flume_conf_directory)
-
- alert_label = ''
- alert_state = RESULT_CODE_OK
-
- if len(processes) == 0 and len(expected_agents) == 0:
- alert_label = 'No agents defined on {0}'.format(host_name)
- else:
- ok = []
- critical = []
- text_arr = []
-
- for process in processes:
- if not process.has_key('status') or process['status'] == 'NOT_RUNNING':
- critical.append(process['name'])
- else:
- ok.append(process['name'])
-
- if len(critical) > 0:
- text_arr.append("{0} {1} NOT running".format(", ".join(critical),
- "is" if len(critical) == 1 else "are"))
-
- if len(ok) > 0:
- text_arr.append("{0} {1} running".format(", ".join(ok),
- "is" if len(ok) == 1 else "are"))
-
- plural = len(critical) > 1 or len(ok) > 1
- alert_label = "Agent{0} {1} {2}".format(
- "s" if plural else "",
- " and ".join(text_arr),
- "on " + host_name)
-
- alert_state = RESULT_CODE_CRITICAL if len(critical) > 0 else RESULT_CODE_OK
-
- return (alert_state, [alert_label])
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/ambari/blob/ec37c603/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/alerts.json
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/alerts.json b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/alerts.json
index e2db2f2..8de4b3a 100644
--- a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/alerts.json
+++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/alerts.json
@@ -330,7 +330,7 @@
"enabled": true,
"source": {
"type": "SCRIPT",
- "path": "HDFS/2.1.0.2.0/package/files/alert_checkpoint_time.py"
+ "path": "HDFS/2.1.0.2.0/package/alerts/alert_checkpoint_time.py"
}
},
{
@@ -343,7 +343,7 @@
"ignore_host": true,
"source": {
"type": "SCRIPT",
- "path": "HDFS/2.1.0.2.0/package/files/alert_ha_namenode_health.py"
+ "path": "HDFS/2.1.0.2.0/package/alerts/alert_ha_namenode_health.py"
}
}
],
http://git-wip-us.apache.org/repos/asf/ambari/blob/ec37c603/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_checkpoint_time.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_checkpoint_time.py b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_checkpoint_time.py
new file mode 100644
index 0000000..410608f
--- /dev/null
+++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_checkpoint_time.py
@@ -0,0 +1,136 @@
+#!/usr/bin/env python
+
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import time
+import urllib2
+import json
+
+LABEL = 'Last Checkpoint: [{h} hours, {m} minutes, {tx} transactions]'
+
+NN_HTTP_ADDRESS_KEY = '{{hdfs-site/dfs.namenode.http-address}}'
+NN_HTTPS_ADDRESS_KEY = '{{hdfs-site/dfs.namenode.https-address}}'
+NN_HTTP_POLICY_KEY = '{{hdfs-site/dfs.http.policy}}'
+NN_CHECKPOINT_TX_KEY = '{{hdfs-site/dfs.namenode.checkpoint.txns}}'
+NN_CHECKPOINT_PERIOD_KEY = '{{hdfs-site/dfs.namenode.checkpoint.period}}'
+
+PERCENT_WARNING = 200
+PERCENT_CRITICAL = 200
+
+CHECKPOINT_TX_DEFAULT = 1000000
+CHECKPOINT_PERIOD_DEFAULT = 21600
+
+def get_tokens():
+ """
+ Returns a tuple of tokens in the format {{site/property}} that will be used
+ to build the dictionary passed into execute
+ """
+ return (NN_HTTP_ADDRESS_KEY, NN_HTTPS_ADDRESS_KEY, NN_HTTP_POLICY_KEY,
+ NN_CHECKPOINT_TX_KEY, NN_CHECKPOINT_PERIOD_KEY)
+
+
+def execute(parameters=None, host_name=None):
+ """
+ Returns a tuple containing the result code and a pre-formatted result label
+
+ Keyword arguments:
+ parameters (dictionary): a mapping of parameter key to value
+ host_name (string): the name of this host where the alert is running
+ """
+
+ if parameters is None:
+ return (('UNKNOWN', ['There were no parameters supplied to the script.']))
+
+ uri = None
+ scheme = 'http'
+ http_uri = None
+ https_uri = None
+ http_policy = 'HTTP_ONLY'
+ percent_warning = PERCENT_WARNING
+ percent_critical = PERCENT_CRITICAL
+ checkpoint_tx = CHECKPOINT_TX_DEFAULT
+ checkpoint_period = CHECKPOINT_PERIOD_DEFAULT
+
+ if NN_HTTP_ADDRESS_KEY in parameters:
+ http_uri = parameters[NN_HTTP_ADDRESS_KEY]
+
+ if NN_HTTPS_ADDRESS_KEY in parameters:
+ https_uri = parameters[NN_HTTPS_ADDRESS_KEY]
+
+ if NN_HTTP_POLICY_KEY in parameters:
+ http_policy = parameters[NN_HTTP_POLICY_KEY]
+
+ if NN_CHECKPOINT_TX_KEY in parameters:
+ checkpoint_tx = parameters[NN_CHECKPOINT_TX_KEY]
+
+ if NN_CHECKPOINT_PERIOD_KEY in parameters:
+ checkpoint_period = parameters[NN_CHECKPOINT_PERIOD_KEY]
+
+ # determine the right URI and whether to use SSL
+ uri = http_uri
+ if http_policy == 'HTTPS_ONLY':
+ scheme = 'https'
+
+ if https_uri is not None:
+ uri = https_uri
+
+ current_time = int(round(time.time() * 1000))
+
+ last_checkpoint_time_qry = "{0}://{1}/jmx?qry=Hadoop:service=NameNode,name=FSNamesystem".format(scheme,uri)
+ journal_transaction_info_qry = "{0}://{1}/jmx?qry=Hadoop:service=NameNode,name=NameNodeInfo".format(scheme,uri)
+
+ # start out assuming an OK status
+ label = None
+ result_code = "OK"
+
+ try:
+ last_checkpoint_time = int(get_value_from_jmx(last_checkpoint_time_qry,"LastCheckpointTime"))
+ journal_transaction_info = get_value_from_jmx(journal_transaction_info_qry,"JournalTransactionInfo")
+ journal_transaction_info_dict = json.loads(journal_transaction_info)
+
+ last_tx = int(journal_transaction_info_dict['LastAppliedOrWrittenTxId'])
+ most_recent_tx = int(journal_transaction_info_dict['MostRecentCheckpointTxId'])
+ transaction_difference = last_tx - most_recent_tx
+
+ delta = (current_time - last_checkpoint_time)/1000
+
+ label = LABEL.format(h=get_time(delta)['h'], m=get_time(delta)['m'], tx=transaction_difference)
+
+ if (transaction_difference > int(checkpoint_tx)) and (float(delta) / int(checkpoint_period)*100 >= int(percent_critical)):
+ result_code = 'CRITICAL'
+ elif (transaction_difference > int(checkpoint_tx)) and (float(delta) / int(checkpoint_period)*100 >= int(percent_warning)):
+ result_code = 'WARNING'
+
+ except Exception, e:
+ label = str(e)
+ result_code = 'UNKNOWN'
+
+ return ((result_code, [label]))
+
+def get_time(delta):
+ h = int(delta/3600)
+ m = int((delta % 3600)/60)
+ return {'h':h, 'm':m}
+
+
+def get_value_from_jmx(qry, property):
+ response = urllib2.urlopen(qry)
+ data=response.read()
+ data_dict = json.loads(data)
+ return data_dict["beans"][0][property]
http://git-wip-us.apache.org/repos/asf/ambari/blob/ec37c603/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_ha_namenode_health.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_ha_namenode_health.py b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_ha_namenode_health.py
new file mode 100644
index 0000000..fc1541d
--- /dev/null
+++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_ha_namenode_health.py
@@ -0,0 +1,166 @@
+#!/usr/bin/env python
+
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import urllib2
+import json
+
+RESULT_STATE_OK = 'OK'
+RESULT_STATE_CRITICAL = 'CRITICAL'
+RESULT_STATE_UNKNOWN = 'UNKNOWN'
+RESULT_STATE_SKIPPED = 'SKIPPED'
+
+HDFS_NN_STATE_ACTIVE = 'active'
+HDFS_NN_STATE_STANDBY = 'standby'
+
+HDFS_SITE_KEY = '{{hdfs-site}}'
+NAMESERVICE_KEY = '{{hdfs-site/dfs.nameservices}}'
+NN_HTTP_ADDRESS_KEY = '{{hdfs-site/dfs.namenode.http-address}}'
+NN_HTTPS_ADDRESS_KEY = '{{hdfs-site/dfs.namenode.https-address}}'
+DFS_POLICY_KEY = '{{hdfs-site/dfs.http.policy}}'
+
+def get_tokens():
+ """
+ Returns a tuple of tokens in the format {{site/property}} that will be used
+ to build the dictionary passed into execute
+ """
+ return (HDFS_SITE_KEY, NAMESERVICE_KEY, NN_HTTP_ADDRESS_KEY,
+ NN_HTTPS_ADDRESS_KEY, DFS_POLICY_KEY)
+
+
+def execute(parameters=None, host_name=None):
+ """
+ Returns a tuple containing the result code and a pre-formatted result label
+
+ Keyword arguments:
+ parameters (dictionary): a mapping of parameter key to value
+ host_name (string): the name of this host where the alert is running
+ """
+ if parameters is None:
+ return (RESULT_STATE_UNKNOWN, ['There were no parameters supplied to the script.'])
+
+ # if not in HA mode, then SKIP
+ if not NAMESERVICE_KEY in parameters:
+ return (RESULT_STATE_SKIPPED, ['NameNode HA is not enabled'])
+
+ # hdfs-site is required
+ if not HDFS_SITE_KEY in parameters:
+ return (RESULT_STATE_UNKNOWN, ['{0} is a required parameter for the script'.format(HDFS_SITE_KEY)])
+
+ # determine whether or not SSL is enabled
+ is_ssl_enabled = False
+ if DFS_POLICY_KEY in parameters:
+ dfs_policy = parameters[DFS_POLICY_KEY]
+ if dfs_policy == "HTTPS_ONLY":
+ is_ssl_enabled = True
+
+ name_service = parameters[NAMESERVICE_KEY]
+ hdfs_site = parameters[HDFS_SITE_KEY]
+
+ # look for dfs.ha.namenodes.foo
+ nn_unique_ids_key = 'dfs.ha.namenodes.' + name_service
+ if not nn_unique_ids_key in hdfs_site:
+ return (RESULT_STATE_UNKNOWN, ['Unable to find unique namenode alias key {0}'.format(nn_unique_ids_key)])
+
+ namenode_http_fragment = 'dfs.namenode.http-address.{0}.{1}'
+ jmx_uri_fragment = "http://{0}/jmx?qry=Hadoop:service=NameNode,name=NameNodeStatus"
+
+ if is_ssl_enabled:
+ namenode_http_fragment = 'dfs.namenode.https-address.{0}.{1}'
+ jmx_uri_fragment = "https://{0}/jmx?qry=Hadoop:service=NameNode,name=NameNodeStatus"
+
+
+ active_namenodes = []
+ standby_namenodes = []
+ unknown_namenodes = []
+
+ # now we have something like 'nn1,nn2,nn3,nn4'
+ # turn it into dfs.namenode.[property].[dfs.nameservices].[nn_unique_id]
+ # ie dfs.namenode.http-address.hacluster.nn1
+ nn_unique_ids = hdfs_site[nn_unique_ids_key].split(',')
+ for nn_unique_id in nn_unique_ids:
+ key = namenode_http_fragment.format(name_service,nn_unique_id)
+
+ if key in hdfs_site:
+ # use str() to ensure that unicode strings do not have the u' in them
+ value = str(hdfs_site[key])
+
+ try:
+ jmx_uri = jmx_uri_fragment.format(value)
+ state = get_value_from_jmx(jmx_uri,'State')
+
+ if state == HDFS_NN_STATE_ACTIVE:
+ active_namenodes.append(value)
+ elif state == HDFS_NN_STATE_STANDBY:
+ standby_namenodes.append(value)
+ else:
+ unknown_namenodes.append(value)
+ except:
+ unknown_namenodes.append(value)
+
+ # now that the request is done, determine if this host is the host that
+ # should report the status of the HA topology
+ is_active_namenode = False
+ for active_namenode in active_namenodes:
+ if active_namenode.startswith(host_name):
+ is_active_namenode = True
+
+ # there's only one scenario here; there is exactly 1 active and 1 standby
+ is_topology_healthy = len(active_namenodes) == 1 and len(standby_namenodes) == 1
+
+ result_label = 'Active{0}, Standby{1}, Unknown{2}'.format(str(active_namenodes),
+ str(standby_namenodes), str(unknown_namenodes))
+
+ # Healthy Topology:
+ # - Active NN reports the alert, standby does not
+ #
+ # Unhealthy Topology:
+ # - Report the alert if this is the first named host
+ # - Report the alert if not the first named host, but the other host
+ # could not report its status
+ if is_topology_healthy:
+ if is_active_namenode is True:
+ return (RESULT_STATE_OK, [result_label])
+ else:
+ return (RESULT_STATE_SKIPPED, ['Another host will report this alert'])
+ else:
+ # dfs.namenode.rpc-address.service.alias is guaranteed in HA mode
+ first_listed_host_key = 'dfs.namenode.rpc-address.{0}.{1}'.format(
+ name_service, nn_unique_ids[0])
+
+ first_listed_host = ''
+ if first_listed_host_key in hdfs_site:
+ first_listed_host = hdfs_site[first_listed_host_key]
+
+ is_first_listed_host = False
+ if first_listed_host.startswith(host_name):
+ is_first_listed_host = True
+
+ if is_first_listed_host:
+ return (RESULT_STATE_CRITICAL, [result_label])
+ else:
+ # not the first listed host, but the first host might be in the unknown
+ return (RESULT_STATE_SKIPPED, ['Another host will report this alert'])
+
+
+def get_value_from_jmx(qry, property):
+ response = urllib2.urlopen(qry)
+ data=response.read()
+ data_dict = json.loads(data)
+ return data_dict["beans"][0][property]
http://git-wip-us.apache.org/repos/asf/ambari/blob/ec37c603/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/files/alert_checkpoint_time.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/files/alert_checkpoint_time.py b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/files/alert_checkpoint_time.py
deleted file mode 100644
index 410608f..0000000
--- a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/files/alert_checkpoint_time.py
+++ /dev/null
@@ -1,136 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements. See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership. The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import time
-import urllib2
-import json
-
-LABEL = 'Last Checkpoint: [{h} hours, {m} minutes, {tx} transactions]'
-
-NN_HTTP_ADDRESS_KEY = '{{hdfs-site/dfs.namenode.http-address}}'
-NN_HTTPS_ADDRESS_KEY = '{{hdfs-site/dfs.namenode.https-address}}'
-NN_HTTP_POLICY_KEY = '{{hdfs-site/dfs.http.policy}}'
-NN_CHECKPOINT_TX_KEY = '{{hdfs-site/dfs.namenode.checkpoint.txns}}'
-NN_CHECKPOINT_PERIOD_KEY = '{{hdfs-site/dfs.namenode.checkpoint.period}}'
-
-PERCENT_WARNING = 200
-PERCENT_CRITICAL = 200
-
-CHECKPOINT_TX_DEFAULT = 1000000
-CHECKPOINT_PERIOD_DEFAULT = 21600
-
-def get_tokens():
- """
- Returns a tuple of tokens in the format {{site/property}} that will be used
- to build the dictionary passed into execute
- """
- return (NN_HTTP_ADDRESS_KEY, NN_HTTPS_ADDRESS_KEY, NN_HTTP_POLICY_KEY,
- NN_CHECKPOINT_TX_KEY, NN_CHECKPOINT_PERIOD_KEY)
-
-
-def execute(parameters=None, host_name=None):
- """
- Returns a tuple containing the result code and a pre-formatted result label
-
- Keyword arguments:
- parameters (dictionary): a mapping of parameter key to value
- host_name (string): the name of this host where the alert is running
- """
-
- if parameters is None:
- return (('UNKNOWN', ['There were no parameters supplied to the script.']))
-
- uri = None
- scheme = 'http'
- http_uri = None
- https_uri = None
- http_policy = 'HTTP_ONLY'
- percent_warning = PERCENT_WARNING
- percent_critical = PERCENT_CRITICAL
- checkpoint_tx = CHECKPOINT_TX_DEFAULT
- checkpoint_period = CHECKPOINT_PERIOD_DEFAULT
-
- if NN_HTTP_ADDRESS_KEY in parameters:
- http_uri = parameters[NN_HTTP_ADDRESS_KEY]
-
- if NN_HTTPS_ADDRESS_KEY in parameters:
- https_uri = parameters[NN_HTTPS_ADDRESS_KEY]
-
- if NN_HTTP_POLICY_KEY in parameters:
- http_policy = parameters[NN_HTTP_POLICY_KEY]
-
- if NN_CHECKPOINT_TX_KEY in parameters:
- checkpoint_tx = parameters[NN_CHECKPOINT_TX_KEY]
-
- if NN_CHECKPOINT_PERIOD_KEY in parameters:
- checkpoint_period = parameters[NN_CHECKPOINT_PERIOD_KEY]
-
- # determine the right URI and whether to use SSL
- uri = http_uri
- if http_policy == 'HTTPS_ONLY':
- scheme = 'https'
-
- if https_uri is not None:
- uri = https_uri
-
- current_time = int(round(time.time() * 1000))
-
- last_checkpoint_time_qry = "{0}://{1}/jmx?qry=Hadoop:service=NameNode,name=FSNamesystem".format(scheme,uri)
- journal_transaction_info_qry = "{0}://{1}/jmx?qry=Hadoop:service=NameNode,name=NameNodeInfo".format(scheme,uri)
-
- # start out assuming an OK status
- label = None
- result_code = "OK"
-
- try:
- last_checkpoint_time = int(get_value_from_jmx(last_checkpoint_time_qry,"LastCheckpointTime"))
- journal_transaction_info = get_value_from_jmx(journal_transaction_info_qry,"JournalTransactionInfo")
- journal_transaction_info_dict = json.loads(journal_transaction_info)
-
- last_tx = int(journal_transaction_info_dict['LastAppliedOrWrittenTxId'])
- most_recent_tx = int(journal_transaction_info_dict['MostRecentCheckpointTxId'])
- transaction_difference = last_tx - most_recent_tx
-
- delta = (current_time - last_checkpoint_time)/1000
-
- label = LABEL.format(h=get_time(delta)['h'], m=get_time(delta)['m'], tx=transaction_difference)
-
- if (transaction_difference > int(checkpoint_tx)) and (float(delta) / int(checkpoint_period)*100 >= int(percent_critical)):
- result_code = 'CRITICAL'
- elif (transaction_difference > int(checkpoint_tx)) and (float(delta) / int(checkpoint_period)*100 >= int(percent_warning)):
- result_code = 'WARNING'
-
- except Exception, e:
- label = str(e)
- result_code = 'UNKNOWN'
-
- return ((result_code, [label]))
-
-def get_time(delta):
- h = int(delta/3600)
- m = int((delta % 3600)/60)
- return {'h':h, 'm':m}
-
-
-def get_value_from_jmx(qry, property):
- response = urllib2.urlopen(qry)
- data=response.read()
- data_dict = json.loads(data)
- return data_dict["beans"][0][property]
http://git-wip-us.apache.org/repos/asf/ambari/blob/ec37c603/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/files/alert_ha_namenode_health.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/files/alert_ha_namenode_health.py b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/files/alert_ha_namenode_health.py
deleted file mode 100644
index fc1541d..0000000
--- a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/files/alert_ha_namenode_health.py
+++ /dev/null
@@ -1,166 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements. See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership. The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import urllib2
-import json
-
-RESULT_STATE_OK = 'OK'
-RESULT_STATE_CRITICAL = 'CRITICAL'
-RESULT_STATE_UNKNOWN = 'UNKNOWN'
-RESULT_STATE_SKIPPED = 'SKIPPED'
-
-HDFS_NN_STATE_ACTIVE = 'active'
-HDFS_NN_STATE_STANDBY = 'standby'
-
-HDFS_SITE_KEY = '{{hdfs-site}}'
-NAMESERVICE_KEY = '{{hdfs-site/dfs.nameservices}}'
-NN_HTTP_ADDRESS_KEY = '{{hdfs-site/dfs.namenode.http-address}}'
-NN_HTTPS_ADDRESS_KEY = '{{hdfs-site/dfs.namenode.https-address}}'
-DFS_POLICY_KEY = '{{hdfs-site/dfs.http.policy}}'
-
-def get_tokens():
- """
- Returns a tuple of tokens in the format {{site/property}} that will be used
- to build the dictionary passed into execute
- """
- return (HDFS_SITE_KEY, NAMESERVICE_KEY, NN_HTTP_ADDRESS_KEY,
- NN_HTTPS_ADDRESS_KEY, DFS_POLICY_KEY)
-
-
-def execute(parameters=None, host_name=None):
- """
- Returns a tuple containing the result code and a pre-formatted result label
-
- Keyword arguments:
- parameters (dictionary): a mapping of parameter key to value
- host_name (string): the name of this host where the alert is running
- """
- if parameters is None:
- return (RESULT_STATE_UNKNOWN, ['There were no parameters supplied to the script.'])
-
- # if not in HA mode, then SKIP
- if not NAMESERVICE_KEY in parameters:
- return (RESULT_STATE_SKIPPED, ['NameNode HA is not enabled'])
-
- # hdfs-site is required
- if not HDFS_SITE_KEY in parameters:
- return (RESULT_STATE_UNKNOWN, ['{0} is a required parameter for the script'.format(HDFS_SITE_KEY)])
-
- # determine whether or not SSL is enabled
- is_ssl_enabled = False
- if DFS_POLICY_KEY in parameters:
- dfs_policy = parameters[DFS_POLICY_KEY]
- if dfs_policy == "HTTPS_ONLY":
- is_ssl_enabled = True
-
- name_service = parameters[NAMESERVICE_KEY]
- hdfs_site = parameters[HDFS_SITE_KEY]
-
- # look for dfs.ha.namenodes.foo
- nn_unique_ids_key = 'dfs.ha.namenodes.' + name_service
- if not nn_unique_ids_key in hdfs_site:
- return (RESULT_STATE_UNKNOWN, ['Unable to find unique namenode alias key {0}'.format(nn_unique_ids_key)])
-
- namenode_http_fragment = 'dfs.namenode.http-address.{0}.{1}'
- jmx_uri_fragment = "http://{0}/jmx?qry=Hadoop:service=NameNode,name=NameNodeStatus"
-
- if is_ssl_enabled:
- namenode_http_fragment = 'dfs.namenode.https-address.{0}.{1}'
- jmx_uri_fragment = "https://{0}/jmx?qry=Hadoop:service=NameNode,name=NameNodeStatus"
-
-
- active_namenodes = []
- standby_namenodes = []
- unknown_namenodes = []
-
- # now we have something like 'nn1,nn2,nn3,nn4'
- # turn it into dfs.namenode.[property].[dfs.nameservices].[nn_unique_id]
- # ie dfs.namenode.http-address.hacluster.nn1
- nn_unique_ids = hdfs_site[nn_unique_ids_key].split(',')
- for nn_unique_id in nn_unique_ids:
- key = namenode_http_fragment.format(name_service,nn_unique_id)
-
- if key in hdfs_site:
- # use str() to ensure that unicode strings do not have the u' in them
- value = str(hdfs_site[key])
-
- try:
- jmx_uri = jmx_uri_fragment.format(value)
- state = get_value_from_jmx(jmx_uri,'State')
-
- if state == HDFS_NN_STATE_ACTIVE:
- active_namenodes.append(value)
- elif state == HDFS_NN_STATE_STANDBY:
- standby_namenodes.append(value)
- else:
- unknown_namenodes.append(value)
- except:
- unknown_namenodes.append(value)
-
- # now that the request is done, determine if this host is the host that
- # should report the status of the HA topology
- is_active_namenode = False
- for active_namenode in active_namenodes:
- if active_namenode.startswith(host_name):
- is_active_namenode = True
-
- # there's only one scenario here; there is exactly 1 active and 1 standby
- is_topology_healthy = len(active_namenodes) == 1 and len(standby_namenodes) == 1
-
- result_label = 'Active{0}, Standby{1}, Unknown{2}'.format(str(active_namenodes),
- str(standby_namenodes), str(unknown_namenodes))
-
- # Healthy Topology:
- # - Active NN reports the alert, standby does not
- #
- # Unhealthy Topology:
- # - Report the alert if this is the first named host
- # - Report the alert if not the first named host, but the other host
- # could not report its status
- if is_topology_healthy:
- if is_active_namenode is True:
- return (RESULT_STATE_OK, [result_label])
- else:
- return (RESULT_STATE_SKIPPED, ['Another host will report this alert'])
- else:
- # dfs.namenode.rpc-address.service.alias is guaranteed in HA mode
- first_listed_host_key = 'dfs.namenode.rpc-address.{0}.{1}'.format(
- name_service, nn_unique_ids[0])
-
- first_listed_host = ''
- if first_listed_host_key in hdfs_site:
- first_listed_host = hdfs_site[first_listed_host_key]
-
- is_first_listed_host = False
- if first_listed_host.startswith(host_name):
- is_first_listed_host = True
-
- if is_first_listed_host:
- return (RESULT_STATE_CRITICAL, [result_label])
- else:
- # not the first listed host, but the first host might be in the unknown
- return (RESULT_STATE_SKIPPED, ['Another host will report this alert'])
-
-
-def get_value_from_jmx(qry, property):
- response = urllib2.urlopen(qry)
- data=response.read()
- data_dict = json.loads(data)
- return data_dict["beans"][0][property]
http://git-wip-us.apache.org/repos/asf/ambari/blob/ec37c603/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/alerts.json
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/alerts.json b/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/alerts.json
index 2dedbe9..750f586 100644
--- a/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/alerts.json
+++ b/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/alerts.json
@@ -38,7 +38,7 @@
"enabled": true,
"source": {
"type": "SCRIPT",
- "path": "HIVE/0.12.0.2.0/package/files/alert_hive_thrift_port.py"
+ "path": "HIVE/0.12.0.2.0/package/alerts/alert_hive_thrift_port.py"
}
}
],
@@ -52,7 +52,7 @@
"enabled": true,
"source": {
"type": "SCRIPT",
- "path": "HIVE/0.12.0.2.0/package/files/alert_webhcat_server.py"
+ "path": "HIVE/0.12.0.2.0/package/alerts/alert_webhcat_server.py"
}
}
]
http://git-wip-us.apache.org/repos/asf/ambari/blob/ec37c603/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_hive_thrift_port.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_hive_thrift_port.py b/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_hive_thrift_port.py
new file mode 100644
index 0000000..ebfbf55
--- /dev/null
+++ b/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_hive_thrift_port.py
@@ -0,0 +1,124 @@
+#!/usr/bin/env python
+
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import json
+import socket
+import time
+import traceback
+import urllib2
+from resource_management.libraries.functions import hive_check
+from resource_management.libraries.functions import format
+from resource_management.libraries.functions import get_kinit_path
+
+OK_MESSAGE = "TCP OK - %.4f response on port %s"
+CRITICAL_MESSAGE = "Connection failed on host {0}:{1}"
+
+HIVE_SERVER_THRIFT_PORT_KEY = '{{hive-site/hive.server2.thrift.port}}'
+SECURITY_ENABLED_KEY = '{{cluster-env/security_enabled}}'
+HIVE_SERVER2_AUTHENTICATION_KEY = '{{hive-site/hive.server2.authentication}}'
+HIVE_SERVER_PRINCIPAL_KEY = '{{hive-site/hive.server2.authentication.kerberos.principal}}'
+SMOKEUSER_KEYTAB_KEY = '{{cluster-env/smokeuser_keytab}}'
+SMOKEUSER_KEY = '{{cluster-env/smokeuser}}'
+
+PERCENT_WARNING = 200
+PERCENT_CRITICAL = 200
+
+THRIFT_PORT_DEFAULT = 10000
+HIVE_SERVER_PRINCIPAL_DEFAULT = 'hive/_HOST@EXAMPLE.COM'
+HIVE_SERVER2_AUTHENTICATION_DEFAULT = 'NOSASL'
+SMOKEUSER_KEYTAB_DEFAULT = '/etc/security/keytabs/smokeuser.headless.keytab'
+SMOKEUSER_DEFAULT = 'ambari-qa'
+
+def get_tokens():
+ """
+ Returns a tuple of tokens in the format {{site/property}} that will be used
+ to build the dictionary passed into execute
+ """
+ return (HIVE_SERVER_THRIFT_PORT_KEY,SECURITY_ENABLED_KEY,HIVE_SERVER2_AUTHENTICATION_KEY,HIVE_SERVER_PRINCIPAL_KEY,SMOKEUSER_KEYTAB_KEY,SMOKEUSER_KEY)
+
+
+def execute(parameters=None, host_name=None):
+ """
+ Returns a tuple containing the result code and a pre-formatted result label
+
+ Keyword arguments:
+ parameters (dictionary): a mapping of parameter key to value
+ host_name (string): the name of this host where the alert is running
+ """
+
+ if parameters is None:
+ return (('UNKNOWN', ['There were no parameters supplied to the script.']))
+
+ thrift_port = THRIFT_PORT_DEFAULT
+ if HIVE_SERVER_THRIFT_PORT_KEY in parameters:
+ thrift_port = int(parameters[HIVE_SERVER_THRIFT_PORT_KEY])
+
+ security_enabled = False
+ if SECURITY_ENABLED_KEY in parameters:
+ security_enabled = str(parameters[SECURITY_ENABLED_KEY]).upper() == 'TRUE'
+
+ hive_server2_authentication = HIVE_SERVER2_AUTHENTICATION_DEFAULT
+ if HIVE_SERVER2_AUTHENTICATION_KEY in parameters:
+ hive_server2_authentication = parameters[HIVE_SERVER2_AUTHENTICATION_KEY]
+
+ smokeuser = SMOKEUSER_DEFAULT
+ if SMOKEUSER_KEY in parameters:
+ smokeuser = parameters[SMOKEUSER_KEY]
+
+ result_code = None
+
+ if security_enabled:
+ hive_server_principal = HIVE_SERVER_PRINCIPAL_DEFAULT
+ if HIVE_SERVER_PRINCIPAL_KEY in parameters:
+ hive_server_principal = parameters[HIVE_SERVER_PRINCIPAL_KEY]
+ smokeuser_keytab = SMOKEUSER_KEYTAB_DEFAULT
+ if SMOKEUSER_KEYTAB_KEY in parameters:
+ smokeuser_keytab = parameters[SMOKEUSER_KEYTAB_KEY]
+ kinit_path_local = get_kinit_path(["/usr/bin", "/usr/kerberos/bin", "/usr/sbin"])
+ kinitcmd=format("{kinit_path_local} -kt {smokeuser_keytab} {smokeuser}; ")
+ else:
+ hive_server_principal = None
+ kinitcmd=None
+
+ try:
+ if host_name is None:
+ host_name = socket.getfqdn()
+
+ start_time = time.time()
+ try:
+ hive_check.check_thrift_port_sasl(host_name, thrift_port, hive_server2_authentication,
+ hive_server_principal, kinitcmd, smokeuser)
+ is_thrift_port_ok = True
+ except:
+ is_thrift_port_ok = False
+
+ if is_thrift_port_ok == True:
+ result_code = 'OK'
+ total_time = time.time() - start_time
+ label = OK_MESSAGE % (total_time, thrift_port)
+ else:
+ result_code = 'CRITICAL'
+ label = CRITICAL_MESSAGE.format(host_name,thrift_port)
+
+ except Exception, e:
+ label = str(e)
+ result_code = 'UNKNOWN'
+
+ return ((result_code, [label]))
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/ambari/blob/ec37c603/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_webhcat_server.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_webhcat_server.py b/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_webhcat_server.py
new file mode 100644
index 0000000..44840de
--- /dev/null
+++ b/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/alerts/alert_webhcat_server.py
@@ -0,0 +1,111 @@
+#!/usr/bin/env python
+
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import json
+import socket
+import time
+import urllib2
+
+RESULT_CODE_OK = 'OK'
+RESULT_CODE_CRITICAL = 'CRITICAL'
+RESULT_CODE_UNKNOWN = 'UNKNOWN'
+
+OK_MESSAGE = 'TCP OK - {0:.4f} response on port {1}'
+CRITICAL_CONNECTION_MESSAGE = 'Connection failed on host {0}:{1}'
+CRITICAL_TEMPLETON_STATUS_MESSAGE = 'WebHCat returned an unexpected status of "{0}"'
+CRITICAL_TEMPLETON_UNKNOWN_JSON_MESSAGE = 'Unable to determine WebHCat health from unexpected JSON response'
+
+TEMPLETON_PORT_KEY = '{{webhcat-site/templeton.port}}'
+SECURITY_ENABLED_KEY = '{{cluster-env/security_enabled}}'
+
+TEMPLETON_OK_RESPONSE = 'ok'
+TEMPLETON_PORT_DEFAULT = 50111
+
+def get_tokens():
+ """
+ Returns a tuple of tokens in the format {{site/property}} that will be used
+ to build the dictionary passed into execute
+ """
+ return (TEMPLETON_PORT_KEY,SECURITY_ENABLED_KEY)
+
+
+def execute(parameters=None, host_name=None):
+ """
+ Returns a tuple containing the result code and a pre-formatted result label
+
+ Keyword arguments:
+ parameters (dictionary): a mapping of parameter key to value
+ host_name (string): the name of this host where the alert is running
+ """
+
+ result_code = RESULT_CODE_UNKNOWN
+
+ if parameters is None:
+ return (result_code, ['There were no parameters supplied to the script.'])
+
+ templeton_port = TEMPLETON_PORT_DEFAULT
+ if TEMPLETON_PORT_KEY in parameters:
+ templeton_port = int(parameters[TEMPLETON_PORT_KEY])
+
+ security_enabled = False
+ if SECURITY_ENABLED_KEY in parameters:
+ security_enabled = parameters[SECURITY_ENABLED_KEY].lower() == 'true'
+
+ scheme = 'http'
+ if security_enabled is True:
+ scheme = 'https'
+
+ label = ''
+ url_response = None
+ templeton_status = ''
+ total_time = 0
+
+ try:
+ # the alert will always run on the webhcat host
+ if host_name is None:
+ host_name = socket.getfqdn()
+
+ query = "{0}://{1}:{2}/templeton/v1/status".format(scheme, host_name,
+ templeton_port)
+
+ # execute the query for the JSON that includes templeton status
+ start_time = time.time()
+ url_response = urllib2.urlopen(query)
+ total_time = time.time() - start_time
+ except:
+ label = CRITICAL_CONNECTION_MESSAGE.format(host_name,templeton_port)
+ return (RESULT_CODE_CRITICAL, [label])
+
+ # URL response received, parse it
+ try:
+ json_response = json.loads(url_response.read())
+ templeton_status = json_response['status']
+ except:
+ return (RESULT_CODE_CRITICAL, [CRITICAL_TEMPLETON_UNKNOWN_JSON_MESSAGE])
+
+ # proper JSON received, compare against known value
+ if templeton_status.lower() == TEMPLETON_OK_RESPONSE:
+ result_code = RESULT_CODE_OK
+ label = OK_MESSAGE.format(total_time, templeton_port)
+ else:
+ result_code = RESULT_CODE_CRITICAL
+ label = CRITICAL_TEMPLETON_STATUS_MESSAGE.format(templeton_status)
+
+ return (result_code, [label])
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/ambari/blob/ec37c603/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/files/alert_hive_thrift_port.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/files/alert_hive_thrift_port.py b/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/files/alert_hive_thrift_port.py
deleted file mode 100644
index 499640f..0000000
--- a/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/files/alert_hive_thrift_port.py
+++ /dev/null
@@ -1,127 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements. See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership. The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import json
-import socket
-import time
-import traceback
-import urllib2
-from resource_management.libraries.functions import hive_check
-from resource_management.libraries.functions import format
-from resource_management.libraries.functions import get_kinit_path
-from resource_management.core.environment import Environment
-
-OK_MESSAGE = "TCP OK - %.4f response on port %s"
-CRITICAL_MESSAGE = "Connection failed on host {0}:{1}"
-
-HIVE_SERVER_THRIFT_PORT_KEY = '{{hive-site/hive.server2.thrift.port}}'
-SECURITY_ENABLED_KEY = '{{cluster-env/security_enabled}}'
-HIVE_SERVER2_AUTHENTICATION_KEY = '{{hive-site/hive.server2.authentication}}'
-HIVE_SERVER_PRINCIPAL_KEY = '{{hive-site/hive.server2.authentication.kerberos.principal}}'
-SMOKEUSER_KEYTAB_KEY = '{{cluster-env/smokeuser_keytab}}'
-SMOKEUSER_KEY = '{{cluster-env/smokeuser}}'
-
-PERCENT_WARNING = 200
-PERCENT_CRITICAL = 200
-
-THRIFT_PORT_DEFAULT = 10000
-HIVE_SERVER_PRINCIPAL_DEFAULT = 'hive/_HOST@EXAMPLE.COM'
-HIVE_SERVER2_AUTHENTICATION_DEFAULT = 'NOSASL'
-SMOKEUSER_KEYTAB_DEFAULT = '/etc/security/keytabs/smokeuser.headless.keytab'
-SMOKEUSER_DEFAULT = 'ambari-qa'
-
-def get_tokens():
- """
- Returns a tuple of tokens in the format {{site/property}} that will be used
- to build the dictionary passed into execute
- """
- return (HIVE_SERVER_THRIFT_PORT_KEY,SECURITY_ENABLED_KEY,HIVE_SERVER2_AUTHENTICATION_KEY,HIVE_SERVER_PRINCIPAL_KEY,SMOKEUSER_KEYTAB_KEY,SMOKEUSER_KEY)
-
-
-def execute(parameters=None, host_name=None):
- """
- Returns a tuple containing the result code and a pre-formatted result label
-
- Keyword arguments:
- parameters (dictionary): a mapping of parameter key to value
- host_name (string): the name of this host where the alert is running
- """
-
- if parameters is None:
- return (('UNKNOWN', ['There were no parameters supplied to the script.']))
-
- thrift_port = THRIFT_PORT_DEFAULT
- if HIVE_SERVER_THRIFT_PORT_KEY in parameters:
- thrift_port = int(parameters[HIVE_SERVER_THRIFT_PORT_KEY])
-
- security_enabled = False
- if SECURITY_ENABLED_KEY in parameters:
- security_enabled = str(parameters[SECURITY_ENABLED_KEY]).upper() == 'TRUE'
-
- hive_server2_authentication = HIVE_SERVER2_AUTHENTICATION_DEFAULT
- if HIVE_SERVER2_AUTHENTICATION_KEY in parameters:
- hive_server2_authentication = parameters[HIVE_SERVER2_AUTHENTICATION_KEY]
-
- smokeuser = SMOKEUSER_DEFAULT
- if SMOKEUSER_KEY in parameters:
- smokeuser = parameters[SMOKEUSER_KEY]
-
- result_code = None
-
- if security_enabled:
- hive_server_principal = HIVE_SERVER_PRINCIPAL_DEFAULT
- if HIVE_SERVER_PRINCIPAL_KEY in parameters:
- hive_server_principal = parameters[HIVE_SERVER_PRINCIPAL_KEY]
- smokeuser_keytab = SMOKEUSER_KEYTAB_DEFAULT
- if SMOKEUSER_KEYTAB_KEY in parameters:
- smokeuser_keytab = parameters[SMOKEUSER_KEYTAB_KEY]
- with Environment() as env:
- kinit_path_local = get_kinit_path(["/usr/bin", "/usr/kerberos/bin", "/usr/sbin"])
- kinitcmd=format("{kinit_path_local} -kt {smokeuser_keytab} {smokeuser}; ")
- else:
- hive_server_principal = None
- kinitcmd=None
-
- try:
- if host_name is None:
- host_name = socket.getfqdn()
-
- start_time = time.time()
- try:
- with Environment() as env:
- hive_check.check_thrift_port_sasl(host_name, thrift_port, hive_server2_authentication,
- hive_server_principal, kinitcmd, smokeuser)
- is_thrift_port_ok = True
- except:
- is_thrift_port_ok = False
-
- if is_thrift_port_ok == True:
- result_code = 'OK'
- total_time = time.time() - start_time
- label = OK_MESSAGE % (total_time, thrift_port)
- else:
- result_code = 'CRITICAL'
- label = CRITICAL_MESSAGE.format(host_name,thrift_port)
-
- except Exception, e:
- label = str(e)
- result_code = 'UNKNOWN'
-
- return ((result_code, [label]))
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/ambari/blob/ec37c603/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/files/alert_webhcat_server.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/files/alert_webhcat_server.py b/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/files/alert_webhcat_server.py
deleted file mode 100644
index 44840de..0000000
--- a/ambari-server/src/main/resources/common-services/HIVE/0.12.0.2.0/package/files/alert_webhcat_server.py
+++ /dev/null
@@ -1,111 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements. See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership. The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import json
-import socket
-import time
-import urllib2
-
-RESULT_CODE_OK = 'OK'
-RESULT_CODE_CRITICAL = 'CRITICAL'
-RESULT_CODE_UNKNOWN = 'UNKNOWN'
-
-OK_MESSAGE = 'TCP OK - {0:.4f} response on port {1}'
-CRITICAL_CONNECTION_MESSAGE = 'Connection failed on host {0}:{1}'
-CRITICAL_TEMPLETON_STATUS_MESSAGE = 'WebHCat returned an unexpected status of "{0}"'
-CRITICAL_TEMPLETON_UNKNOWN_JSON_MESSAGE = 'Unable to determine WebHCat health from unexpected JSON response'
-
-TEMPLETON_PORT_KEY = '{{webhcat-site/templeton.port}}'
-SECURITY_ENABLED_KEY = '{{cluster-env/security_enabled}}'
-
-TEMPLETON_OK_RESPONSE = 'ok'
-TEMPLETON_PORT_DEFAULT = 50111
-
-def get_tokens():
- """
- Returns a tuple of tokens in the format {{site/property}} that will be used
- to build the dictionary passed into execute
- """
- return (TEMPLETON_PORT_KEY,SECURITY_ENABLED_KEY)
-
-
-def execute(parameters=None, host_name=None):
- """
- Returns a tuple containing the result code and a pre-formatted result label
-
- Keyword arguments:
- parameters (dictionary): a mapping of parameter key to value
- host_name (string): the name of this host where the alert is running
- """
-
- result_code = RESULT_CODE_UNKNOWN
-
- if parameters is None:
- return (result_code, ['There were no parameters supplied to the script.'])
-
- templeton_port = TEMPLETON_PORT_DEFAULT
- if TEMPLETON_PORT_KEY in parameters:
- templeton_port = int(parameters[TEMPLETON_PORT_KEY])
-
- security_enabled = False
- if SECURITY_ENABLED_KEY in parameters:
- security_enabled = parameters[SECURITY_ENABLED_KEY].lower() == 'true'
-
- scheme = 'http'
- if security_enabled is True:
- scheme = 'https'
-
- label = ''
- url_response = None
- templeton_status = ''
- total_time = 0
-
- try:
- # the alert will always run on the webhcat host
- if host_name is None:
- host_name = socket.getfqdn()
-
- query = "{0}://{1}:{2}/templeton/v1/status".format(scheme, host_name,
- templeton_port)
-
- # execute the query for the JSON that includes templeton status
- start_time = time.time()
- url_response = urllib2.urlopen(query)
- total_time = time.time() - start_time
- except:
- label = CRITICAL_CONNECTION_MESSAGE.format(host_name,templeton_port)
- return (RESULT_CODE_CRITICAL, [label])
-
- # URL response received, parse it
- try:
- json_response = json.loads(url_response.read())
- templeton_status = json_response['status']
- except:
- return (RESULT_CODE_CRITICAL, [CRITICAL_TEMPLETON_UNKNOWN_JSON_MESSAGE])
-
- # proper JSON received, compare against known value
- if templeton_status.lower() == TEMPLETON_OK_RESPONSE:
- result_code = RESULT_CODE_OK
- label = OK_MESSAGE.format(total_time, templeton_port)
- else:
- result_code = RESULT_CODE_CRITICAL
- label = CRITICAL_TEMPLETON_STATUS_MESSAGE.format(templeton_status)
-
- return (result_code, [label])
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/ambari/blob/ec37c603/ambari-server/src/main/resources/common-services/OOZIE/4.0.0.2.0/alerts.json
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/OOZIE/4.0.0.2.0/alerts.json b/ambari-server/src/main/resources/common-services/OOZIE/4.0.0.2.0/alerts.json
index 9e74cdd..970c9d4 100644
--- a/ambari-server/src/main/resources/common-services/OOZIE/4.0.0.2.0/alerts.json
+++ b/ambari-server/src/main/resources/common-services/OOZIE/4.0.0.2.0/alerts.json
@@ -34,7 +34,7 @@
"scope": "ANY",
"source": {
"type": "SCRIPT",
- "path": "OOZIE/4.0.0.2.0/package/files/alert_check_oozie_server.py"
+ "path": "OOZIE/4.0.0.2.0/package/alerts/alert_check_oozie_server.py"
}
}
]
http://git-wip-us.apache.org/repos/asf/ambari/blob/ec37c603/ambari-server/src/main/resources/common-services/OOZIE/4.0.0.2.0/package/alerts/alert_check_oozie_server.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/OOZIE/4.0.0.2.0/package/alerts/alert_check_oozie_server.py b/ambari-server/src/main/resources/common-services/OOZIE/4.0.0.2.0/package/alerts/alert_check_oozie_server.py
new file mode 100644
index 0000000..4e3e6ae
--- /dev/null
+++ b/ambari-server/src/main/resources/common-services/OOZIE/4.0.0.2.0/package/alerts/alert_check_oozie_server.py
@@ -0,0 +1,81 @@
+#!/usr/bin/env python
+
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from resource_management import *
+from resource_management.libraries.functions import format
+from resource_management.libraries.functions import get_kinit_path
+from resource_management.core.environment import Environment
+
+RESULT_CODE_OK = 'OK'
+RESULT_CODE_CRITICAL = 'CRITICAL'
+RESULT_CODE_UNKNOWN = 'UNKNOWN'
+
+OOZIE_URL_KEY = '{{oozie-site/oozie.base.url}}'
+SECURITY_ENABLED = '{{cluster-env/security_enabled}}'
+SMOKEUSER_KEY = '{{cluster-env/smokeuser}}'
+SMOKEUSER_KEYTAB_KEY = '{{cluster-env/smokeuser_keytab}}'
+
+def get_tokens():
+ """
+ Returns a tuple of tokens in the format {{site/property}} that will be used
+ to build the dictionary passed into execute
+ """
+ return (OOZIE_URL_KEY, SMOKEUSER_KEY, SECURITY_ENABLED,SMOKEUSER_KEYTAB_KEY)
+
+def execute(parameters=None, host_name=None):
+ """
+ Returns a tuple containing the result code and a pre-formatted result label
+
+ Keyword arguments:
+ parameters (dictionary): a mapping of parameter key to value
+ host_name (string): the name of this host where the alert is running
+ """
+
+ if parameters is None:
+ return (RESULT_CODE_UNKNOWN, ['There were no parameters supplied to the script.'])
+
+ security_enabled = False
+ if set([OOZIE_URL_KEY, SMOKEUSER_KEY, SECURITY_ENABLED]).issubset(parameters):
+ oozie_url = parameters[OOZIE_URL_KEY]
+ smokeuser = parameters[SMOKEUSER_KEY]
+ security_enabled = str(parameters[SECURITY_ENABLED]).upper() == 'TRUE'
+ else:
+ return (RESULT_CODE_UNKNOWN, ['The Oozie URL and Smokeuser are a required parameters.'])
+
+ try:
+ if security_enabled:
+ if set([SMOKEUSER_KEYTAB_KEY]).issubset(parameters):
+ smokeuser_keytab = parameters[SMOKEUSER_KEYTAB_KEY]
+ else:
+ return (RESULT_CODE_UNKNOWN, ['The Smokeuser keytab is required when security is enabled.'])
+ kinit_path_local = get_kinit_path(["/usr/bin", "/usr/kerberos/bin", "/usr/sbin"])
+ kinitcmd = format("{kinit_path_local} -kt {smokeuser_keytab} {smokeuser}; ")
+
+ Execute(kinitcmd,
+ user=smokeuser,
+ )
+
+ Execute(format("source /etc/oozie/conf/oozie-env.sh ; oozie admin -oozie {oozie_url} -status"),
+ user=smokeuser,
+ )
+ return (RESULT_CODE_OK, ["Oozie check success"])
+
+ except Exception, ex:
+ return (RESULT_CODE_CRITICAL, [str(ex)])
http://git-wip-us.apache.org/repos/asf/ambari/blob/ec37c603/ambari-server/src/main/resources/common-services/OOZIE/4.0.0.2.0/package/files/alert_check_oozie_server.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/OOZIE/4.0.0.2.0/package/files/alert_check_oozie_server.py b/ambari-server/src/main/resources/common-services/OOZIE/4.0.0.2.0/package/files/alert_check_oozie_server.py
deleted file mode 100644
index 7bf1255..0000000
--- a/ambari-server/src/main/resources/common-services/OOZIE/4.0.0.2.0/package/files/alert_check_oozie_server.py
+++ /dev/null
@@ -1,74 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements. See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership. The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import subprocess
-from subprocess import CalledProcessError
-
-RESULT_CODE_OK = 'OK'
-RESULT_CODE_CRITICAL = 'CRITICAL'
-RESULT_CODE_UNKNOWN = 'UNKNOWN'
-
-OOZIE_URL_KEY = '{{oozie-site/oozie.base.url}}'
-
-def get_tokens():
- """
- Returns a tuple of tokens in the format {{site/property}} that will be used
- to build the dictionary passed into execute
- """
- return (OOZIE_URL_KEY)
-
-
-def execute(parameters=None, host_name=None):
- """
- Returns a tuple containing the result code and a pre-formatted result label
-
- Keyword arguments:
- parameters (dictionary): a mapping of parameter key to value
- host_name (string): the name of this host where the alert is running
- """
-
- if parameters is None:
- return (RESULT_CODE_UNKNOWN, ['There were no parameters supplied to the script.'])
-
- oozie_url = None
- if OOZIE_URL_KEY in parameters:
- oozie_url = parameters[OOZIE_URL_KEY]
-
- if oozie_url is None:
- return (RESULT_CODE_UNKNOWN, ['The Oozie URL is a required parameter.'])
-
- try:
- # oozie admin -oozie http://server:11000/oozie -status
- oozie_process = subprocess.Popen(['oozie', 'admin', '-oozie',
- oozie_url, '-status'], stderr=subprocess.PIPE, stdout=subprocess.PIPE)
-
- oozie_output, oozie_error = oozie_process.communicate()
- oozie_return_code = oozie_process.returncode
-
- if oozie_return_code == 0:
- # strip trailing newlines
- oozie_output = str(oozie_output).strip('\n')
- return (RESULT_CODE_OK, [oozie_output])
- else:
- oozie_error = str(oozie_error).strip('\n')
- return (RESULT_CODE_CRITICAL, [oozie_error])
-
- except CalledProcessError, cpe:
- return (RESULT_CODE_CRITICAL, [str(cpe)])
http://git-wip-us.apache.org/repos/asf/ambari/blob/ec37c603/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/HIVE/alerts.json
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/HIVE/alerts.json b/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/HIVE/alerts.json
index a16ef62..ffad3ce 100644
--- a/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/HIVE/alerts.json
+++ b/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/HIVE/alerts.json
@@ -38,7 +38,7 @@
"enabled": true,
"source": {
"type": "SCRIPT",
- "path": "HDP/1.3.2/services/HIVE/package/files/alert_hive_thrift_port.py"
+ "path": "HDP/1.3.2/services/HIVE/package/alerts/alert_hive_thrift_port.py"
}
}
],
@@ -52,7 +52,7 @@
"enabled": true,
"source": {
"type": "SCRIPT",
- "path": "HDP/1.3.2/services/HIVE/package/files/alert_webhcat_server.py"
+ "path": "HDP/1.3.2/services/HIVE/package/alerts/alert_webhcat_server.py"
}
}
]
http://git-wip-us.apache.org/repos/asf/ambari/blob/ec37c603/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/HIVE/package/alerts/alert_hive_thrift_port.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/HIVE/package/alerts/alert_hive_thrift_port.py b/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/HIVE/package/alerts/alert_hive_thrift_port.py
new file mode 100644
index 0000000..ebfbf55
--- /dev/null
+++ b/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/HIVE/package/alerts/alert_hive_thrift_port.py
@@ -0,0 +1,124 @@
+#!/usr/bin/env python
+
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import json
+import socket
+import time
+import traceback
+import urllib2
+from resource_management.libraries.functions import hive_check
+from resource_management.libraries.functions import format
+from resource_management.libraries.functions import get_kinit_path
+
+OK_MESSAGE = "TCP OK - %.4f response on port %s"
+CRITICAL_MESSAGE = "Connection failed on host {0}:{1}"
+
+HIVE_SERVER_THRIFT_PORT_KEY = '{{hive-site/hive.server2.thrift.port}}'
+SECURITY_ENABLED_KEY = '{{cluster-env/security_enabled}}'
+HIVE_SERVER2_AUTHENTICATION_KEY = '{{hive-site/hive.server2.authentication}}'
+HIVE_SERVER_PRINCIPAL_KEY = '{{hive-site/hive.server2.authentication.kerberos.principal}}'
+SMOKEUSER_KEYTAB_KEY = '{{cluster-env/smokeuser_keytab}}'
+SMOKEUSER_KEY = '{{cluster-env/smokeuser}}'
+
+PERCENT_WARNING = 200
+PERCENT_CRITICAL = 200
+
+THRIFT_PORT_DEFAULT = 10000
+HIVE_SERVER_PRINCIPAL_DEFAULT = 'hive/_HOST@EXAMPLE.COM'
+HIVE_SERVER2_AUTHENTICATION_DEFAULT = 'NOSASL'
+SMOKEUSER_KEYTAB_DEFAULT = '/etc/security/keytabs/smokeuser.headless.keytab'
+SMOKEUSER_DEFAULT = 'ambari-qa'
+
+def get_tokens():
+ """
+ Returns a tuple of tokens in the format {{site/property}} that will be used
+ to build the dictionary passed into execute
+ """
+ return (HIVE_SERVER_THRIFT_PORT_KEY,SECURITY_ENABLED_KEY,HIVE_SERVER2_AUTHENTICATION_KEY,HIVE_SERVER_PRINCIPAL_KEY,SMOKEUSER_KEYTAB_KEY,SMOKEUSER_KEY)
+
+
+def execute(parameters=None, host_name=None):
+ """
+ Returns a tuple containing the result code and a pre-formatted result label
+
+ Keyword arguments:
+ parameters (dictionary): a mapping of parameter key to value
+ host_name (string): the name of this host where the alert is running
+ """
+
+ if parameters is None:
+ return (('UNKNOWN', ['There were no parameters supplied to the script.']))
+
+ thrift_port = THRIFT_PORT_DEFAULT
+ if HIVE_SERVER_THRIFT_PORT_KEY in parameters:
+ thrift_port = int(parameters[HIVE_SERVER_THRIFT_PORT_KEY])
+
+ security_enabled = False
+ if SECURITY_ENABLED_KEY in parameters:
+ security_enabled = str(parameters[SECURITY_ENABLED_KEY]).upper() == 'TRUE'
+
+ hive_server2_authentication = HIVE_SERVER2_AUTHENTICATION_DEFAULT
+ if HIVE_SERVER2_AUTHENTICATION_KEY in parameters:
+ hive_server2_authentication = parameters[HIVE_SERVER2_AUTHENTICATION_KEY]
+
+ smokeuser = SMOKEUSER_DEFAULT
+ if SMOKEUSER_KEY in parameters:
+ smokeuser = parameters[SMOKEUSER_KEY]
+
+ result_code = None
+
+ if security_enabled:
+ hive_server_principal = HIVE_SERVER_PRINCIPAL_DEFAULT
+ if HIVE_SERVER_PRINCIPAL_KEY in parameters:
+ hive_server_principal = parameters[HIVE_SERVER_PRINCIPAL_KEY]
+ smokeuser_keytab = SMOKEUSER_KEYTAB_DEFAULT
+ if SMOKEUSER_KEYTAB_KEY in parameters:
+ smokeuser_keytab = parameters[SMOKEUSER_KEYTAB_KEY]
+ kinit_path_local = get_kinit_path(["/usr/bin", "/usr/kerberos/bin", "/usr/sbin"])
+ kinitcmd=format("{kinit_path_local} -kt {smokeuser_keytab} {smokeuser}; ")
+ else:
+ hive_server_principal = None
+ kinitcmd=None
+
+ try:
+ if host_name is None:
+ host_name = socket.getfqdn()
+
+ start_time = time.time()
+ try:
+ hive_check.check_thrift_port_sasl(host_name, thrift_port, hive_server2_authentication,
+ hive_server_principal, kinitcmd, smokeuser)
+ is_thrift_port_ok = True
+ except:
+ is_thrift_port_ok = False
+
+ if is_thrift_port_ok == True:
+ result_code = 'OK'
+ total_time = time.time() - start_time
+ label = OK_MESSAGE % (total_time, thrift_port)
+ else:
+ result_code = 'CRITICAL'
+ label = CRITICAL_MESSAGE.format(host_name,thrift_port)
+
+ except Exception, e:
+ label = str(e)
+ result_code = 'UNKNOWN'
+
+ return ((result_code, [label]))
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/ambari/blob/ec37c603/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/HIVE/package/alerts/alert_webhcat_server.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/HIVE/package/alerts/alert_webhcat_server.py b/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/HIVE/package/alerts/alert_webhcat_server.py
new file mode 100644
index 0000000..44840de
--- /dev/null
+++ b/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/HIVE/package/alerts/alert_webhcat_server.py
@@ -0,0 +1,111 @@
+#!/usr/bin/env python
+
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import json
+import socket
+import time
+import urllib2
+
+RESULT_CODE_OK = 'OK'
+RESULT_CODE_CRITICAL = 'CRITICAL'
+RESULT_CODE_UNKNOWN = 'UNKNOWN'
+
+OK_MESSAGE = 'TCP OK - {0:.4f} response on port {1}'
+CRITICAL_CONNECTION_MESSAGE = 'Connection failed on host {0}:{1}'
+CRITICAL_TEMPLETON_STATUS_MESSAGE = 'WebHCat returned an unexpected status of "{0}"'
+CRITICAL_TEMPLETON_UNKNOWN_JSON_MESSAGE = 'Unable to determine WebHCat health from unexpected JSON response'
+
+TEMPLETON_PORT_KEY = '{{webhcat-site/templeton.port}}'
+SECURITY_ENABLED_KEY = '{{cluster-env/security_enabled}}'
+
+TEMPLETON_OK_RESPONSE = 'ok'
+TEMPLETON_PORT_DEFAULT = 50111
+
+def get_tokens():
+ """
+ Returns a tuple of tokens in the format {{site/property}} that will be used
+ to build the dictionary passed into execute
+ """
+ return (TEMPLETON_PORT_KEY,SECURITY_ENABLED_KEY)
+
+
+def execute(parameters=None, host_name=None):
+ """
+ Returns a tuple containing the result code and a pre-formatted result label
+
+ Keyword arguments:
+ parameters (dictionary): a mapping of parameter key to value
+ host_name (string): the name of this host where the alert is running
+ """
+
+ result_code = RESULT_CODE_UNKNOWN
+
+ if parameters is None:
+ return (result_code, ['There were no parameters supplied to the script.'])
+
+ templeton_port = TEMPLETON_PORT_DEFAULT
+ if TEMPLETON_PORT_KEY in parameters:
+ templeton_port = int(parameters[TEMPLETON_PORT_KEY])
+
+ security_enabled = False
+ if SECURITY_ENABLED_KEY in parameters:
+ security_enabled = parameters[SECURITY_ENABLED_KEY].lower() == 'true'
+
+ scheme = 'http'
+ if security_enabled is True:
+ scheme = 'https'
+
+ label = ''
+ url_response = None
+ templeton_status = ''
+ total_time = 0
+
+ try:
+ # the alert will always run on the webhcat host
+ if host_name is None:
+ host_name = socket.getfqdn()
+
+ query = "{0}://{1}:{2}/templeton/v1/status".format(scheme, host_name,
+ templeton_port)
+
+ # execute the query for the JSON that includes templeton status
+ start_time = time.time()
+ url_response = urllib2.urlopen(query)
+ total_time = time.time() - start_time
+ except:
+ label = CRITICAL_CONNECTION_MESSAGE.format(host_name,templeton_port)
+ return (RESULT_CODE_CRITICAL, [label])
+
+ # URL response received, parse it
+ try:
+ json_response = json.loads(url_response.read())
+ templeton_status = json_response['status']
+ except:
+ return (RESULT_CODE_CRITICAL, [CRITICAL_TEMPLETON_UNKNOWN_JSON_MESSAGE])
+
+ # proper JSON received, compare against known value
+ if templeton_status.lower() == TEMPLETON_OK_RESPONSE:
+ result_code = RESULT_CODE_OK
+ label = OK_MESSAGE.format(total_time, templeton_port)
+ else:
+ result_code = RESULT_CODE_CRITICAL
+ label = CRITICAL_TEMPLETON_STATUS_MESSAGE.format(templeton_status)
+
+ return (result_code, [label])
\ No newline at end of file