You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by ma...@apache.org on 2013/02/07 08:46:45 UTC
svn commit: r1443336 - in /incubator/ambari/trunk: ./
ambari-agent/src/main/puppet/modules/hdp-nagios/templates/
ambari-server/src/main/java/org/apache/ambari/server/configuration/
contrib/addons/src/addOns/nagios/plugins/ contrib/addons/src/addOns/nag...
Author: mahadev
Date: Thu Feb 7 07:46:45 2013
New Revision: 1443336
URL: http://svn.apache.org/viewvc?rev=1443336&view=rev
Log:
AMBARI-1358. Clean up alert messages. (Yusaku Sako via mahadev)
Modified:
incubator/ambari/trunk/CHANGES.txt
incubator/ambari/trunk/ambari-agent/src/main/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb
incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/configuration/Configuration.java
incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/check_hive_metastore_status.sh
incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/check_name_dir_status.php
incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/check_oozie_status.sh
incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/check_webui.sh
incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/sys_logger.py
incubator/ambari/trunk/contrib/addons/src/addOns/nagios/scripts/nagios_alerts.php
Modified: incubator/ambari/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/incubator/ambari/trunk/CHANGES.txt?rev=1443336&r1=1443335&r2=1443336&view=diff
==============================================================================
--- incubator/ambari/trunk/CHANGES.txt (original)
+++ incubator/ambari/trunk/CHANGES.txt Thu Feb 7 07:46:45 2013
@@ -439,6 +439,9 @@ Trunk (unreleased changes):
AMBARI-1330. Cluster missing hosts after successful install and restart.
(mahadev)
+ AMBARI-1358. Clean up alert messages. (Yusaku Sako via mahadev)
+
+
AMBARI-1.2.0 branch:
INCOMPATIBLE CHANGES
Modified: incubator/ambari/trunk/ambari-agent/src/main/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb
URL: http://svn.apache.org/viewvc/incubator/ambari/trunk/ambari-agent/src/main/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb?rev=1443336&r1=1443335&r2=1443336&view=diff
==============================================================================
--- incubator/ambari/trunk/ambari-agent/src/main/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb (original)
+++ incubator/ambari/trunk/ambari-agent/src/main/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb Thu Feb 7 07:46:45 2013
@@ -47,7 +47,7 @@ define service {
use hadoop-service
service_description HDFS::Percent DataNodes storage full
servicegroups HDFS
- check_command check_aggregate!"DATANODE::Storage full"!10%!30%
+ check_command check_aggregate!"DATANODE::DataNode storage full"!10%!30%
normal_check_interval 2
retry_check_interval 1
max_check_attempts 1
@@ -58,7 +58,7 @@ define service {
use hadoop-service
service_description HDFS::Percent DataNodes down
servicegroups HDFS
- check_command check_aggregate!"DATANODE::Process down"!10%!30%
+ check_command check_aggregate!"DATANODE::DataNode process down"!10%!30%
normal_check_interval 0.5
retry_check_interval 0.25
max_check_attempts 3
@@ -70,7 +70,7 @@ define service {
use hadoop-service
service_description MAPREDUCE::Percent TaskTrackers down
servicegroups MAPREDUCE
- check_command check_aggregate!"TASKTRACKER::Process down"!10%!30%
+ check_command check_aggregate!"TASKTRACKER::TaskTracker process down"!10%!30%
normal_check_interval 0.5
retry_check_interval 0.25
max_check_attempts 3
@@ -81,9 +81,9 @@ define service {
define service {
hostgroup_name nagios-server
use hadoop-service
- service_description ZOOKEEPER::Percent zookeeper servers down
+ service_description ZOOKEEPER::Percent ZooKeeper Servers down
servicegroups ZOOKEEPER
- check_command check_aggregate!"ZKSERVERS::ZKSERVERS Process down"!35%!70%
+ check_command check_aggregate!"ZOOKEEPER::ZooKeeper Server process down"!35%!70%
normal_check_interval 0.5
retry_check_interval 0.25
max_check_attempts 3
@@ -95,9 +95,9 @@ define service {
define service {
hostgroup_name nagios-server
use hadoop-service
- service_description HBASE::Percent region servers down
+ service_description HBASE::Percent RegionServers down
servicegroups HBASE
- check_command check_aggregate!"REGIONSERVER::Process down"!10%!30%
+ check_command check_aggregate!"REGIONSERVER::RegionServer process down"!10%!30%
normal_check_interval 0.5
retry_check_interval 0.25
max_check_attempts 3
@@ -110,7 +110,7 @@ define service {
define service {
hostgroup_name ganglia-server
use hadoop-service
- service_description GANGLIA::Ganglia [gmetad] Process down
+ service_description GANGLIA::Ganglia [gmetad] process down
servicegroups GANGLIA
check_command check_tcp!8651!-w 1 -c 1
normal_check_interval 0.25
@@ -121,7 +121,7 @@ define service {
define service {
hostgroup_name ganglia-server
use hadoop-service
- service_description GANGLIA::Ganglia collector [gmond] Process down alert for slaves
+ service_description GANGLIA::Ganglia Collector [gmond] process down alert for slaves
servicegroups GANGLIA
check_command check_tcp!8660!-w 1 -c 1
normal_check_interval 0.25
@@ -132,7 +132,7 @@ define service {
define service {
hostgroup_name ganglia-server
use hadoop-service
- service_description GANGLIA::Ganglia collector [gmond] Process down alert for namenode
+ service_description GANGLIA::Ganglia Collector [gmond] process down alert for NameNode
servicegroups GANGLIA
check_command check_tcp!8661!-w 1 -c 1
normal_check_interval 0.25
@@ -143,7 +143,7 @@ define service {
define service {
hostgroup_name ganglia-server
use hadoop-service
- service_description GANGLIA::Ganglia collector [gmond] Process down alert for jobtracker
+ service_description GANGLIA::Ganglia Collector [gmond] process down alert for JobTracker
servicegroups GANGLIA
check_command check_tcp!8662!-w 1 -c 1
normal_check_interval 0.25
@@ -155,7 +155,7 @@ define service {
define service {
hostgroup_name ganglia-server
use hadoop-service
- service_description GANGLIA::Ganglia collector [gmond] Process down alert for hbasemaster
+ service_description GANGLIA::Ganglia Collector [gmond] process down alert for HBase Master
servicegroups GANGLIA
check_command check_tcp!8663!-w 1 -c 1
normal_check_interval 0.25
@@ -170,7 +170,7 @@ define service {
define service {
hostgroup_name snamenode
use hadoop-service
- service_description NAMENODE::Secondary Namenode Process down
+ service_description NAMENODE::Secondary NameNode process down
servicegroups HDFS
check_command check_tcp!50090!-w 1 -c 1
normal_check_interval 0.5
@@ -183,7 +183,7 @@ define service {
define service {
hostgroup_name namenode
use hadoop-service
- service_description NAMENODE::Namenode Web UI down
+ service_description NAMENODE::NameNode Web UI down
servicegroups HDFS
check_command check_webui!namenode
normal_check_interval 1
@@ -194,7 +194,7 @@ define service {
define service {
hostgroup_name namenode
use hadoop-service
- service_description NAMENODE::Namenode Edit logs directory status
+ service_description NAMENODE::NameNode edit logs directory status
servicegroups HDFS
check_command check_name_dir_status!50070
normal_check_interval 0.5
@@ -205,7 +205,7 @@ define service {
define service {
hostgroup_name namenode
use hadoop-service
- service_description NAMENODE::Namenode Host CPU utilization
+ service_description NAMENODE::NameNode host CPU utilization
servicegroups HDFS
check_command check_cpu!200%!250%
normal_check_interval 5
@@ -217,7 +217,7 @@ define service {
define service {
hostgroup_name namenode
use hadoop-service
- service_description NAMENODE::Namenode Process down
+ service_description NAMENODE::NameNode process down
servicegroups HDFS
check_command check_tcp!8020!-w 1 -c 1
normal_check_interval 0.5
@@ -239,7 +239,7 @@ define service {
define service {
hostgroup_name namenode
use hadoop-service
- service_description HDFS::HDFS Capacity utilization
+ service_description HDFS::HDFS capacity utilization
servicegroups HDFS
check_command check_hdfs_capacity!50070!80%!90%
normal_check_interval 10
@@ -250,7 +250,7 @@ define service {
define service {
hostgroup_name namenode
use hadoop-service
- service_description HDFS::Namenode RPC Latency
+ service_description HDFS::NameNode RPC latency
servicegroups HDFS
check_command check_rpcq_latency!NameNode!50070!3000!5000
normal_check_interval 5
@@ -286,7 +286,7 @@ define service {
define service {
hostgroup_name jobtracker
use hadoop-service
- service_description JOBTRACKER::Jobtracker CPU utilization
+ service_description JOBTRACKER::JobTracker CPU utilization
servicegroups MAPREDUCE
check_command check_cpu!200%!250%
normal_check_interval 5
@@ -298,7 +298,7 @@ define service {
define service {
hostgroup_name jobtracker
use hadoop-service
- service_description JOBTRACKER::Jobtracker Process down
+ service_description JOBTRACKER::JobTracker process down
servicegroups MAPREDUCE
check_command check_tcp!50030!-w 1 -c 1
normal_check_interval 0.5
@@ -309,7 +309,7 @@ define service {
define service {
hostgroup_name jobtracker
use hadoop-service
- service_description MAPREDUCE::JobTracker RPC Latency
+ service_description MAPREDUCE::JobTracker RPC latency
servicegroups MAPREDUCE
check_command check_rpcq_latency!JobTracker!50030!3000!5000
normal_check_interval 5
@@ -323,7 +323,7 @@ define service {
define service {
hostgroup_name slaves
use hadoop-service
- service_description DATANODE::Process down
+ service_description DATANODE::DataNode process down
servicegroups HDFS
check_command check_tcp!<%=scope.function_hdp_template_var("dfs_datanode_address")%>!-w 1 -c 1
normal_check_interval 1
@@ -334,7 +334,7 @@ define service {
define service {
hostgroup_name slaves
use hadoop-service
- service_description DATANODE::Storage full
+ service_description DATANODE::DataNode storage full
servicegroups HDFS
check_command check_datanode_storage!<%=scope.function_hdp_template_var("dfs_datanode_http_address")%>!90%!90%
normal_check_interval 5
@@ -346,7 +346,7 @@ define service {
define service {
hostgroup_name slaves
use hadoop-service
- service_description TASKTRACKER::Process down
+ service_description TASKTRACKER::TaskTracker process down
servicegroups MAPREDUCE
check_command check_tcp!50060!-w 1 -c 1
normal_check_interval 1
@@ -360,7 +360,7 @@ define service {
define service {
hostgroup_name zookeeper-servers
use hadoop-service
- service_description ZKSERVERS::ZKSERVERS Process down
+ service_description ZOOKEEPER::ZooKeeper Server process down
servicegroups ZOOKEEPER
check_command check_tcp!2181!-w 1 -c 1
normal_check_interval 1
@@ -374,7 +374,7 @@ define service {
define service {
hostgroup_name region-servers
use hadoop-service
- service_description REGIONSERVER::Process down
+ service_description REGIONSERVER::RegionServer process down
servicegroups HBASE
check_command check_tcp!60020!-w 1 -c 1
normal_check_interval 1
@@ -386,7 +386,7 @@ define service {
define service {
hostgroup_name hbasemaster
use hadoop-service
- service_description HBASEMASTER::HBase Web UI down
+ service_description HBASEMASTER::HBase Master Web UI down
servicegroups HBASE
check_command check_webui!hbase
normal_check_interval 1
@@ -397,7 +397,7 @@ define service {
define service {
hostgroup_name hbasemaster
use hadoop-service
- service_description HBASEMASTER::HBaseMaster CPU utilization
+ service_description HBASEMASTER::HBase Master CPU utilization
servicegroups HBASE
check_command check_cpu!200%!250%
normal_check_interval 5
@@ -408,7 +408,7 @@ define service {
define service {
hostgroup_name hbasemaster
use hadoop-service
- service_description HBASEMASTER::HBaseMaster Process down
+ service_description HBASEMASTER::HBase Master process down
servicegroups HBASE
check_command check_tcp!60000!-w 1 -c 1
normal_check_interval 0.5
@@ -422,7 +422,7 @@ define service {
define service {
hostgroup_name hiveserver
use hadoop-service
- service_description HIVE-METASTORE::HIVE-METASTORE status check
+ service_description HIVE-METASTORE::Hive Metastore status check
servicegroups HIVE-METASTORE
<%if scope.function_hdp_template_var("security_enabled")-%>
check_command check_hive_metastore_status!9083!<%=scope.function_hdp_template_var("java64_home")%>!true!<%=scope.function_hdp_template_var("keytab_path")%>/<%=scope.function_hdp_template_var("nagios_user")%>.headless.keytab!<%=scope.function_hdp_template_var("nagios_user")%>
@@ -439,7 +439,7 @@ define service {
define service {
hostgroup_name oozie-server
use hadoop-service
- service_description OOZIE::Oozie status check
+ service_description OOZIE::Oozie Server status check
servicegroups OOZIE
<%if scope.function_hdp_template_var("security_enabled")-%>
check_command check_oozie_status!11000!<%=scope.function_hdp_template_var("java64_home")%>!true!<%=scope.function_hdp_template_var("keytab_path")%>/<%=scope.function_hdp_template_var("nagios_user")%>.headless.keytab!<%=scope.function_hdp_template_var("nagios_user")%>
@@ -456,7 +456,7 @@ define service {
define service {
hostgroup_name webhcat-server
use hadoop-service
- service_description WEBHCAT::WEBHCAT status check
+ service_description WEBHCAT::WebHCat Server status check
servicegroups WEBHCAT
<%if scope.function_hdp_template_var("security_enabled")-%>
check_command check_templeton_status!50111!v1!true!<%=scope.function_hdp_template_var("keytab_path")%>/<%=scope.function_hdp_template_var("nagios_user")%>.headless.keytab!<%=scope.function_hdp_template_var("nagios_user")%>
Modified: incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/configuration/Configuration.java
URL: http://svn.apache.org/viewvc/incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/configuration/Configuration.java?rev=1443336&r1=1443335&r2=1443336&view=diff
==============================================================================
--- incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/configuration/Configuration.java (original)
+++ incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/configuration/Configuration.java Thu Feb 7 07:46:45 2013
@@ -109,6 +109,10 @@ public class Configuration {
public static final String SRVR_HOSTS_MAPPING =
"server.hosts.mapping";
+ public static final String SSL_TRUSTSTORE_PATH_KEY = "ssl.trustStore.path";
+ public static final String SSL_TRUSTSTORE_PASSWORD_KEY = "ssl.trustStore.password";
+ public static final String SSL_TRUSTSTORE_TYPE_KEY = "ssl.trustStore.type";
+
private static final String SRVR_KSTR_DIR_DEFAULT = ".";
public static final String SRVR_CRT_NAME_DEFAULT = "ca.crt";
public static final String SRVR_KEY_NAME_DEFAULT = "ca.key";
@@ -209,6 +213,23 @@ public class Configuration {
}
}
configsMap.put(SRVR_CRT_PASS_KEY, randStr);
+
+ loadSSLParams();
+ }
+
+ /**
+ * Loads trusted certificates store properties
+ */
+ private void loadSSLParams(){
+ if (properties.getProperty(SSL_TRUSTSTORE_PATH_KEY) != null) {
+ System.setProperty("javax.net.ssl.trustStore", properties.getProperty(SSL_TRUSTSTORE_PATH_KEY));
+ }
+ if (properties.getProperty(SSL_TRUSTSTORE_PASSWORD_KEY) != null) {
+ System.setProperty("javax.net.ssl.trustStorePassword", properties.getProperty(SSL_TRUSTSTORE_PASSWORD_KEY));
+ }
+ if (properties.getProperty(SSL_TRUSTSTORE_TYPE_KEY) != null) {
+ System.setProperty("javax.net.ssl.trustStoreType", properties.getProperty(SSL_TRUSTSTORE_TYPE_KEY));
+ }
}
Modified: incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/check_hive_metastore_status.sh
URL: http://svn.apache.org/viewvc/incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/check_hive_metastore_status.sh?rev=1443336&r1=1443335&r2=1443336&view=diff
==============================================================================
--- incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/check_hive_metastore_status.sh (original)
+++ incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/check_hive_metastore_status.sh Thu Feb 7 07:46:45 2013
@@ -25,8 +25,8 @@ PORT=$2
HCAT_URL=-Dhive.metastore.uris="thrift://$HOST:$PORT"
out=`hcat $HCAT_URL -e "show databases" 2>&1`
if [[ "$?" -ne 0 ]]; then
- echo "CRITICAL: Error accessing hive-metaserver status [$out]";
+ echo "CRITICAL: Error accessing Hive Metastore status [$out]";
exit 2;
fi
-echo "OK: Hive metaserver status OK";
+echo "OK: Hive Metastore status OK";
exit 0;
Modified: incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/check_name_dir_status.php
URL: http://svn.apache.org/viewvc/incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/check_name_dir_status.php?rev=1443336&r1=1443335&r2=1443336&view=diff
==============================================================================
--- incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/check_name_dir_status.php (original)
+++ incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/check_name_dir_status.php Thu Feb 7 07:46:45 2013
@@ -36,12 +36,12 @@
$json_array = json_decode($json_string, true);
$object = $json_array['beans'][0];
if ($object['NameDirStatuses'] == "") {
- echo "UNKNOWN: Namenode directory status not available via http://<nn_host>:port/jmx url" . "\n";
+ echo "UNKNOWN: NameNode directory status not available via http://<nn_host>:port/jmx url" . "\n";
exit(3);
}
$NameDirStatuses = json_decode($object['NameDirStatuses'], true);
$failed_dir_count = count($NameDirStatuses['failed']);
- $out_msg = "CRITICAL: Offline Namenode directories: ";
+ $out_msg = "CRITICAL: Offline NameNode directories: ";
if ($failed_dir_count > 0) {
foreach ($NameDirStatuses['failed'] as $key => $value) {
$out_msg = $out_msg . $key . ":" . $value . ", ";
@@ -49,7 +49,7 @@
echo $out_msg . "\n";
exit (2);
}
- echo "OK: All Namenode directories are active" . "\n";
+ echo "OK: All NameNode directories are active" . "\n";
exit(0);
/* print usage */
Modified: incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/check_oozie_status.sh
URL: http://svn.apache.org/viewvc/incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/check_oozie_status.sh?rev=1443336&r1=1443335&r2=1443336&view=diff
==============================================================================
--- incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/check_oozie_status.sh (original)
+++ incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/check_oozie_status.sh Thu Feb 7 07:46:45 2013
@@ -20,7 +20,7 @@
#
#
# OOZIE_URL is of the form http://<hostname>:<port>/oozie
-# OOZIE_URL: http://hortonworks-sandbox.localdomain:11000/oozie
+# OOZIE_URL: http://host1.localdomain:11000/oozie
HOST=$1
PORT=$2
JAVA_HOME=$3
@@ -28,8 +28,8 @@ OOZIE_URL="http://$HOST:$PORT/oozie"
export JAVA_HOME=$JAVA_HOME
out=`oozie admin -oozie ${OOZIE_URL} -status 2>&1`
if [[ "$?" -ne 0 ]]; then
- echo "CRITICAL: Error accessing oozie server status [$out]";
+ echo "CRITICAL: Error accessing Oozie Server status [$out]";
exit 2;
fi
-echo "OK: Oozie server status [$out]";
+echo "OK: Oozie Server status [$out]";
exit 0;
Modified: incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/check_webui.sh
URL: http://svn.apache.org/viewvc/incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/check_webui.sh?rev=1443336&r1=1443335&r2=1443336&view=diff
==============================================================================
--- incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/check_webui.sh (original)
+++ incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/check_webui.sh Thu Feb 7 07:46:45 2013
@@ -45,14 +45,14 @@ jobtracker)
namenode)
nnweburl="http://$host:50070"
if [[ `checkurl "$nnweburl"` -ne 0 ]] ; then
- echo "WARNING: NameNode web UI not accessible : $nnweburl";
+ echo "WARNING: NameNode Web UI not accessible : $nnweburl";
exit 1;
fi
;;
jobhistory)
jhweburl="http://$host:51111/jobhistoryhome.jsp"
if [[ `checkurl "$jhweburl"` -ne 0 ]]; then
- echo "WARNING: Jobhistory web UI not accessible : $jhweburl";
+ echo "WARNING: JobHistory Web UI not accessible : $jhweburl";
exit 1;
fi
;;
@@ -60,7 +60,7 @@ hbase)
hbaseweburl="http://$host:60010/master-status"
jhweburl="http://domU-12-31-39-16-DC-FB.compute-1.internal:51111/jobhistoryhome.jsp"
if [[ `checkurl "$hbaseweburl"` -ne 0 ]]; then
- echo "WARNING: Hbase Master web UI not accessible : $hbaseweburl";
+ echo "WARNING: HBase Master Web UI not accessible : $hbaseweburl";
exit 1;
fi
;;
Modified: incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/sys_logger.py
URL: http://svn.apache.org/viewvc/incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/sys_logger.py?rev=1443336&r1=1443335&r2=1443336&view=diff
==============================================================================
--- incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/sys_logger.py (original)
+++ incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/sys_logger.py Thu Feb 7 07:46:45 2013
@@ -21,35 +21,35 @@ severities = {'UP':'OK', 'DOWN':'Critica
'WARNING':'Warning', 'UNKNOWN':'Warning', 'CRITICAL':'Critical'}
# List of services which can result in events at the Degraded severity
-degraded_alert_services = ['HBASEMASTER::HBaseMaster CPU utilization',
- 'HDFS::Namenode RPC Latency',
- 'MAPREDUCE::JobTracker RPC Latency',
- 'JOBTRACKER::Jobtracker CPU utilization']
+degraded_alert_services = ['HBASEMASTER::HBase Master CPU utilization',
+ 'HDFS::NameNode RPC latency',
+ 'MAPREDUCE::JobTracker RPC latency',
+ 'JOBTRACKER::JobTracker CPU utilization']
# List of services which can result in events at the Fatal severity
-fatal_alert_services = ['NAMENODE::Namenode Process down']
+fatal_alert_services = ['NAMENODE::NameNode process down']
# dictionary of service->msg_id mappings
-msg_ids = {'Host::Ping':'host_down', 'HBASEMASTER::HBaseMaster CPU utilization':'master_cpu_utilization',
- 'HDFS::HDFS Capacity utilization':'hdfs_percent_capacity', 'HDFS::Corrupt/Missing blocks':'hdfs_block',
- 'NAMENODE::Namenode Edit logs directory status':'namenode_edit_log_write', 'HDFS::Percent DataNodes down':'datanode_down',
- 'DATANODE::Process down':'datanode_process_down', 'HDFS::Percent DataNodes storage full':'datanodes_percent_storage_full',
- 'NAMENODE::Namenode Process down':'namenode_process_down', 'HDFS::Namenode RPC Latency':'namenode_rpc_latency',
- 'DATANODE::Storage full':'datanodes_storage_full', 'JOBTRACKER::Jobtracker Process down':'jobtracker_process_down',
- 'MAPREDUCE::JobTracker RPC Latency':'jobtracker_rpc_latency', 'MAPREDUCE::Percent TaskTrackers down':'tasktrackers_down',
- 'TASKTRACKER::Process down':'tasktracker_process_down', 'HBASEMASTER::HBaseMaster Process down':'hbasemaster_process_down',
- 'REGIONSERVER::Process down':'regionserver_process_down', 'HBASE::Percent region servers down':'regionservers_down',
- 'HIVE-METASTORE::HIVE-METASTORE status check':'hive_metastore_process_down', 'ZOOKEEPER::Percent zookeeper servers down':'zookeepers_down',
- 'ZKSERVERS::ZKSERVERS Process down':'zookeeper_process_down', 'OOZIE::Oozie status check':'oozie_down',
- 'TEMPLETON::Templeton status check':'templeton_down', 'PUPPET::Puppet agent down':'puppet_down',
- 'NAGIOS::Nagios status log staleness':'nagios_status_log_stale', 'GANGLIA::Ganglia [gmetad] Process down':'ganglia_process_down',
- 'GANGLIA::Ganglia collector [gmond] Process down alert for hbasemaster':'ganglia_collector_process_down',
- 'GANGLIA::Ganglia collector [gmond] Process down alert for jobtracker':'ganglia_collector_process_down',
- 'GANGLIA::Ganglia collector [gmond] Process down alert for namenode':'ganglia_collector_process_down',
- 'GANGLIA::Ganglia collector [gmond] Process down alert for slaves':'ganglia_collector_process_down',
- 'NAMENODE::Secondary Namenode Process down':'secondary_namenode_process_down',
- 'JOBTRACKER::Jobtracker CPU utilization':'jobtracker_cpu_utilization',
- 'HBASEMASTER::HBase Web UI down':'hbase_ui_down', 'NAMENODE::Namenode Web UI down':'namenode_ui_down',
+msg_ids = {'Host::Ping':'host_down', 'HBASEMASTER::HBase Master CPU utilization':'master_cpu_utilization',
+ 'HDFS::HDFS capacity utilization':'hdfs_percent_capacity', 'HDFS::Corrupt/Missing blocks':'hdfs_block',
+ 'NAMENODE::NameNode edit logs directory status':'namenode_edit_log_write', 'HDFS::Percent DataNodes down':'datanode_down',
+ 'DATANODE::DataNode process down':'datanode_process_down', 'HDFS::Percent DataNodes storage full':'datanodes_percent_storage_full',
+ 'NAMENODE::NameNode process down':'namenode_process_down', 'HDFS::NameNode RPC latency':'namenode_rpc_latency',
+ 'DATANODE::DataNode storage full':'datanodes_storage_full', 'JOBTRACKER::JobTracker process down':'jobtracker_process_down',
+ 'MAPREDUCE::JobTracker RPC latency':'jobtracker_rpc_latency', 'MAPREDUCE::Percent TaskTrackers down':'tasktrackers_down',
+ 'TASKTRACKER::TaskTracker process down':'tasktracker_process_down', 'HBASEMASTER::HBase Master process down':'hbasemaster_process_down',
+ 'REGIONSERVER::RegionServer process down':'regionserver_process_down', 'HBASE::Percent RegionServers down':'regionservers_down',
+ 'HIVE-METASTORE::Hive Metastore status check':'hive_metastore_process_down', 'ZOOKEEPER::Percent ZooKeeper Servers down':'zookeepers_down',
+ 'ZOOKEEPER::ZooKeeper Server process down':'zookeeper_process_down', 'OOZIE::Oozie Server status check':'oozie_down',
+ 'WEBHCAT::WebHCat Server status check':'templeton_down', 'PUPPET::Puppet agent down':'puppet_down',
+ 'NAGIOS::Nagios status log staleness':'nagios_status_log_stale', 'GANGLIA::Ganglia [gmetad] process down':'ganglia_process_down',
+ 'GANGLIA::Ganglia Collector [gmond] process down alert for HBase Master':'ganglia_collector_process_down',
+ 'GANGLIA::Ganglia Collector [gmond] process down alert for JobTracker':'ganglia_collector_process_down',
+ 'GANGLIA::Ganglia Collector [gmond] process down alert for NameNode':'ganglia_collector_process_down',
+ 'GANGLIA::Ganglia Collector [gmond] process down alert for slaves':'ganglia_collector_process_down',
+ 'NAMENODE::Secondary NameNode process down':'secondary_namenode_process_down',
+ 'JOBTRACKER::JobTracker CPU utilization':'jobtracker_cpu_utilization',
+ 'HBASEMASTER::HBase Master Web UI down':'hbase_ui_down', 'NAMENODE::NameNode Web UI down':'namenode_ui_down',
'JOBTRACKER::JobHistory Web UI down':'jobhistory_ui_down', 'JOBTRACKER::JobTracker Web UI down':'jobtracker_ui_down'}
Modified: incubator/ambari/trunk/contrib/addons/src/addOns/nagios/scripts/nagios_alerts.php
URL: http://svn.apache.org/viewvc/incubator/ambari/trunk/contrib/addons/src/addOns/nagios/scripts/nagios_alerts.php?rev=1443336&r1=1443335&r2=1443336&view=diff
==============================================================================
--- incubator/ambari/trunk/contrib/addons/src/addOns/nagios/scripts/nagios_alerts.php (original)
+++ incubator/ambari/trunk/contrib/addons/src/addOns/nagios/scripts/nagios_alerts.php Thu Feb 7 07:46:45 2013
@@ -91,13 +91,13 @@ function hdp_mon_generate_response( $res
define ("warn", "1");
define ("critical", "2");
- define ("HDFS_SERVICE_CHECK", "NAMENODE::Namenode Process down");
- define ("MAPREDUCE_SERVICE_CHECK", "JOBTRACKER::Jobtracker Process down");
- define ("HBASE_SERVICE_CHECK", "HBASEMASTER::HBaseMaster Process down");
- define ("ZOOKEEPER_SERVICE_CHECK", "ZOOKEEPER::Percent zookeeper servers down");
- define ("HIVE_METASTORE_SERVICE_CHECK", "HIVE-METASTORE::HIVE-METASTORE status check");
- define ("OOZIE_SERVICE_CHECK", "OOZIE::Oozie status check");
- define ("TEMPLETON_SERVICE_CHECK", "TEMPLETON::Templeton status check");
+ define ("HDFS_SERVICE_CHECK", "NAMENODE::NameNode process down");
+ define ("MAPREDUCE_SERVICE_CHECK", "JOBTRACKER::JobTracker process down");
+ define ("HBASE_SERVICE_CHECK", "HBASEMASTER::HBaseMaster process down");
+ define ("ZOOKEEPER_SERVICE_CHECK", "ZOOKEEPER::Percent ZooKeeper Servers down");
+ define ("HIVE_METASTORE_SERVICE_CHECK", "HIVE-METASTORE::Hive Metastore status check");
+ define ("OOZIE_SERVICE_CHECK", "OOZIE::Oozie Server status check");
+ define ("WEBHCAT_SERVICE_CHECK", "WEBHCAT::WebHCat Server status check");
define ("PUPPET_SERVICE_CHECK", "PUPPET::Puppet agent down");
/* If SUSE, status file is under /var/lib/nagios */
@@ -196,10 +196,10 @@ function hdp_mon_generate_response( $res
}
continue;
}
- if (getParameter($object, "service_description") == TEMPLETON_SERVICE_CHECK) {
- $services_object["TEMPLETON"] = getParameter($object, "last_hard_state");
- if ($services_object["TEMPLETON"] >= 1) {
- $services_object["TEMPLETON"] = 1;
+ if (getParameter($object, "service_description") == WEBHCAT_SERVICE_CHECK) {
+ $services_object["WEBHCAT"] = getParameter($object, "last_hard_state");
+ if ($services_object["WEBHCAT"] >= 1) {
+ $services_object["WEBHCAT"] = 1;
}
continue;
}
@@ -302,16 +302,7 @@ function hdp_mon_generate_response( $res
$servicestatus['service_type'] = get_service_type($servicestatus['service_description']);
$srv_desc = explode ("::",$servicestatus['service_description'],2);
- switch ($srv_desc[0]) {
- case "DATANODE":
- case "TASKTRACKER":
- case "REGIONSERVER":
- $servicestatus['service_description'] = $srv_desc[0] . ' ' . $srv_desc[1];
- break;
- default:
- $servicestatus['service_description'] = $srv_desc[1];
- }
- $servicestatus['service_description'] = format_description($servicestatus['service_description']);
+ $servicestatus['service_description'] = $srv_desc[1];
}
break;
case "nok":
@@ -368,43 +359,6 @@ function hdp_mon_generate_response( $res
return $services_objects;
}
- function format_description ($service_description)
- {
- $patterns[0] = "/tasktracker/i";
- $patterns[1] = "/datanode/i";
- $patterns[2] = "/namenode/i";
- $patterns[3] = "/jobtracker/i";
- $patterns[4] = "/hbaseMaster/i";
- $patterns[5] = "/hive-metastore/i";
- $patterns[6] = "/webhcat/i";
- $patterns[7] = "/zookeeper/i";
- $patterns[8] = "/zkserver/i";
- $patterns[9] = "/oozie/i";
- $patterns[10] = "/region server/i";
- $patterns[11] = "/region/i";
- $patterns[12] = "/server/i";
- $patterns[13] = "/servers/i";
-
- $replacements[0] = "TaskTracker";
- $replacements[1] = "DataNode";
- $replacements[2] = "NameNode";
- $replacements[3] = "JobTracker";
- $replacements[4] = "HBase Master";
- $replacements[5] = "Hive Metastore";
- $replacements[6] = "WebHCat Server";
- $replacements[7] = "ZooKeeper";
- $replacements[8] = "ZooKeeper Server";
- $replacements[9] = "Oozie Server";
- $replacements[10] = "RegionServer";
- $replacements[11] = "Region";
- $replacements[12] = "Server";
- $replacements[13] = "Servers";
-
- $result = preg_replace($patterns, $replacements, $service_description);
-
- return $result;
- }
-
function get_service_type($service_description)
{
$pieces = explode("::", $service_description);