You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by ma...@apache.org on 2014/02/26 19:47:56 UTC

git commit: AMBARI-4714. Alerts for Storm. (Arsen Babych via mahadev)

Repository: ambari
Updated Branches:
  refs/heads/trunk f959b3768 -> dc4709ead


AMBARI-4714. Alerts for Storm. (Arsen Babych via mahadev)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/dc4709ea
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/dc4709ea
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/dc4709ea

Branch: refs/heads/trunk
Commit: dc4709ead97f0fcdf623f28b3a84786bbbdd7f1c
Parents: f959b37
Author: Mahadev Konar <ma...@apache.org>
Authored: Wed Feb 26 10:47:46 2014 -0800
Committer: Mahadev Konar <ma...@apache.org>
Committed: Wed Feb 26 10:47:46 2014 -0800

----------------------------------------------------------------------
 .../nagios/NagiosPropertyProvider.java          |  2 +-
 .../NAGIOS/package/files/check_aggregate.php    |  1 +
 .../NAGIOS/package/files/check_webui.sh         |  9 +-
 .../services/NAGIOS/package/scripts/params.py   | 16 +++-
 .../templates/hadoop-servicegroups.cfg.j2       | 10 +++
 .../package/templates/hadoop-services.cfg.j2    | 94 ++++++++++++++++++++
 .../python/stacks/2.1.1/configs/default.json    |  2 +-
 .../python/stacks/2.1.1/configs/secured.json    |  2 +-
 ambari-web/app/models/alert.js                  |  4 +-
 .../src/addOns/nagios/scripts/nagios_alerts.php |  8 ++
 10 files changed, 141 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/dc4709ea/ambari-server/src/main/java/org/apache/ambari/server/controller/nagios/NagiosPropertyProvider.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/controller/nagios/NagiosPropertyProvider.java b/ambari-server/src/main/java/org/apache/ambari/server/controller/nagios/NagiosPropertyProvider.java
index b520202..90c0e4e 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/controller/nagios/NagiosPropertyProvider.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/controller/nagios/NagiosPropertyProvider.java
@@ -77,7 +77,7 @@ public class NagiosPropertyProvider extends BaseProvider implements PropertyProv
   private static final List<String> IGNORABLE_FOR_SERVICES = new ArrayList<String>(
       Arrays.asList("NodeManager health", "NodeManager process", "TaskTracker process",
       "RegionServer process", "DataNode process", "DataNode space",
-      "ZooKeeper Server process"));
+      "ZooKeeper Server process", "Supervisors process"));
   
   private static final List<String> IGNORABLE_FOR_HOSTS = new ArrayList<String>(
       Arrays.asList("percent"));

http://git-wip-us.apache.org/repos/asf/ambari/blob/dc4709ea/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_aggregate.php
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_aggregate.php b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_aggregate.php
index db2a20f..792b25b 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_aggregate.php
+++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_aggregate.php
@@ -127,6 +127,7 @@ define("PASSIVE_MODE_STR", "AMBARIPASSIVE=");
       case "HDFS":
       case "MAPREDUCE":
       case "HBASE":
+      case "STORM":
         break;
       default:
         $pieces[0] = "UNKNOWN";

http://git-wip-us.apache.org/repos/asf/ambari/blob/dc4709ea/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_webui.sh
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_webui.sh b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_webui.sh
index 87e6aa6..054677f 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_webui.sh
+++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_webui.sh
@@ -78,6 +78,13 @@ historyserver2)
       exit 1;
     fi
     ;;
+storm_ui)
+    rmweburl="http://$host:$port"
+    if [[ `checkurl "$rmweburl"` -ne 0 ]]; then
+      echo "WARNING: Storm Web UI not accessible : $rmweburl";
+      exit 1;
+    fi
+    ;;
 falconserver)
     hsweburl="http://$host:$port/"
     if [[ `checkurl "$hsweburl"` -ne 0 ]]; then
@@ -85,7 +92,7 @@ falconserver)
       exit 1;
     fi
     ;;
-*) echo "UNKNOWN: Invalid service name [$service], valid options [jobtracker|jobhistory|hbase|namenode|resourcemanager|historyserver2|falconserver]"
+*) echo "UNKNOWN: Invalid service name [$service], valid options [jobtracker|jobhistory|hbase|namenode|resourcemanager|historyserver2|falconserver|storm_ui]"
    exit 3
    ;;
 esac

http://git-wip-us.apache.org/repos/asf/ambari/blob/dc4709ea/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/params.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/params.py b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/params.py
index 3b21d74..3b0cd1e 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/params.py
+++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/params.py
@@ -65,6 +65,11 @@ flume_port = "4159"
 hive_metastore_port = config['configurations']['global']['hive_metastore_port'] #"9083"
 templeton_port = config['configurations']['webhcat-site']['templeton.port'] #"50111"
 hbase_rs_port = "60030"
+storm_ui_port = config['configurations']['storm-site']['ui.port']
+drpc_port = config['configurations']['storm-site']['drpc.port']
+nimbus_port = config['configurations']['storm-site']['nimbus.thrift.port']
+logviewer_port = config['configurations']['storm-site']['logviewer.port']
+supervisor_port = "56431"
 falcon_port = config['configurations']['global']['falcon_port']
 ahs_port = get_port_from_url(config['configurations']['yarn-site']['yarn.timeline-service.webapp.address'])
 
@@ -129,7 +134,11 @@ _flume_hosts = default("/clusterHostInfo/flume_hosts", None)
 _nagios_server_host = default("/clusterHostInfo/nagios_server_host",None)
 _ganglia_server_host = default("/clusterHostInfo/ganglia_server_host",None)
 _app_timeline_server_hosts = default("/clusterHostInfo/app_timeline_server_hosts",None)
-
+_nimbus_host = default("/clusterHostInfo/nimbus_hosts",None)
+_drpc_host = default("/clusterHostInfo/drpc_server_hosts",None)
+_logwier_host = default("/clusterHostInfo/logviewer_server_hosts",None)
+_supervisor_hosts = default("/clusterHostInfo/supervisor_hosts",None)
+_storm_ui_host = default("/clusterHostInfo/storm_ui_server_hosts",None)
 hbase_master_hosts = default("/clusterHostInfo/hbase_master_hosts",None)
 _hive_server_host = default("/clusterHostInfo/hive_server_host",None)
 _oozie_server = default("/clusterHostInfo/oozie_server",None)
@@ -165,6 +174,11 @@ hostgroup_defs = {
     'nodemanagers' : _nm_hosts,
     'historyserver2' : _hs_host,
     'journalnodes' : _journalnode_hosts,
+    'nimbus' : _nimbus_host,
+    'drpc-server' : _drpc_host,
+    'logviewer-server' : _logwier_host,
+    'storm_ui' : _storm_ui_host,
+    'supervisors' : _supervisor_hosts,
     'falcon-server' : _falcon_host,
     'ats-servers' : _app_timeline_server_hosts
 }

http://git-wip-us.apache.org/repos/asf/ambari/blob/dc4709ea/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-servicegroups.cfg.j2
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-servicegroups.cfg.j2 b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-servicegroups.cfg.j2
index b07dbe8..a22f208 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-servicegroups.cfg.j2
+++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-servicegroups.cfg.j2
@@ -78,6 +78,16 @@ define servicegroup {
   alias  HUE Checks
 }
 {% endif %}
+{% if hostgroup_defs['nimbus'] or
+  hostgroup_defs['drpc-server'] or
+  hostgroup_defs['logviewer-server'] or
+  hostgroup_defs['storm_ui'] or
+  hostgroup_defs['supervisors']%}
+define servicegroup {
+  servicegroup_name  STORM
+  alias  STORM Checks
+}
+{% endif %}
 {% if hostgroup_defs['falcon-server'] %}
 define servicegroup {
   servicegroup_name  FALCON

http://git-wip-us.apache.org/repos/asf/ambari/blob/dc4709ea/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-services.cfg.j2
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-services.cfg.j2 b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-services.cfg.j2
index 5e339dc..edc36c6 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-services.cfg.j2
+++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-services.cfg.j2
@@ -232,6 +232,100 @@ define service {
 }
 {% endif %}
 
+{% if hostgroup_defs['storm_ui'] %}
+# STORM UI Checks
+define service {
+        hostgroup_name          storm_ui
+        use                     hadoop-service
+        service_description     STORM_UI_SERVER::Storm Web UI on {{ hostgroup_defs['storm_ui'][0] }}
+        servicegroups           STORM
+        check_command           check_webui!storm_ui!{{ storm_ui_port }}
+        normal_check_interval   1
+        retry_check_interval    1
+        max_check_attempts      3
+}
+{% endif %}
+
+{% if hostgroup_defs['storm_ui'] %}
+# STORM UI Checks
+define service {
+        hostgroup_name          storm_ui
+        use                     hadoop-service
+        service_description     STORM_UI_SERVER::Storm Web UI process
+        servicegroups           STORM
+        check_command           check_tcp_wrapper!{{ storm_ui_port }}!-w 1 -c 1
+        normal_check_interval   0.5
+        retry_check_interval    0.25
+        max_check_attempts      3
+}
+{% endif %}
+
+{% if hostgroup_defs['nimbus'] %}
+# Nimbus Checks
+define service {
+        hostgroup_name          nimbus
+        use                     hadoop-service
+        service_description     NIMBUS::Nimbus process
+        servicegroups           STORM
+        check_command           check_tcp_wrapper!{{ nimbus_port }}!-w 1 -c 1
+        normal_check_interval   0.5
+        retry_check_interval    0.25
+        max_check_attempts      3
+}
+{% endif %}
+
+{% if hostgroup_defs['drpc-server'] %}
+# drpc Checks
+define service {
+        hostgroup_name          drpc-server
+        use                     hadoop-service
+        service_description     DRPC_SERVER::DRPC server process
+        servicegroups           STORM
+        check_command           check_tcp_wrapper!{{ drpc_port }}!-w 1 -c 1
+        normal_check_interval   0.5
+        retry_check_interval    0.25
+        max_check_attempts      3
+}
+{% endif %}
+
+{% if hostgroup_defs['logviewer-server'] %}
+# logviewer Checks
+define service {
+        hostgroup_name          logviewer-server
+        use                     hadoop-service
+        service_description     LOGVIEWER_SERVER::Logviewer server process
+        servicegroups           STORM
+        check_command           check_tcp_wrapper!{{ logviewer_port }}!-w 1 -c 1
+        normal_check_interval   0.5
+        retry_check_interval    0.25
+        max_check_attempts      3
+}
+{% endif %}
+
+# NAGIOS SERVER Supervisor Checks
+{% if hostgroup_defs['supervisors'] %}
+define service {
+        hostgroup_name          nagios-server
+        use                     hadoop-service
+        service_description     SUPERVISOR::Percent Supervisors live
+        servicegroups           STORM
+        check_command           check_aggregate!"SUPERVISOR::Supervisors process"!10%!30%
+        normal_check_interval   0.5
+        retry_check_interval    0.25
+        max_check_attempts      3
+}
+
+define service {
+        hostgroup_name          supervisors
+        use                     hadoop-service
+        service_description     SUPERVISOR::Supervisors process
+        servicegroups           STORM
+        check_command           check_tcp_wrapper!{{ supervisor_port }}!-w 1 -c 1
+        normal_check_interval   1
+        retry_check_interval    0.5
+        max_check_attempts      3
+}
+{%  endif %}
 
 {% if hostgroup_defs['namenode'] %}
 # HDFS Checks

http://git-wip-us.apache.org/repos/asf/ambari/blob/dc4709ea/ambari-server/src/test/python/stacks/2.1.1/configs/default.json
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/python/stacks/2.1.1/configs/default.json b/ambari-server/src/test/python/stacks/2.1.1/configs/default.json
index 1530293..562c073 100644
--- a/ambari-server/src/test/python/stacks/2.1.1/configs/default.json
+++ b/ambari-server/src/test/python/stacks/2.1.1/configs/default.json
@@ -208,7 +208,7 @@
             "topology.executor.receive.buffer.size": "1024",
             "topology.stats.sample.rate": "0.05",
             "topology.fall.back.on.java.serialization": "true",
-            "supervisor.childopts": "-javaagent:/usr/lib/storm/contrib/storm-jmxetric/lib/jmxetric-1.0.4.jar=host=c6402.ambari.apache.org,port=8650,wireformat31x=true,mode=multicast,config=/usr/lib/storm/contrib/storm-jmxetric/conf/jmxetric-conf.xml,process=Supervisor_JVM -Xmx256m",
+            "supervisor.childopts": "-Xmx256m -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.port=56431 -javaagent:/usr/lib/storm/contrib/storm-jmxetric/lib/jmxetric-1.0.4.jar=host=c6402.ambari.apache.org,port=8650,wireformat31x=true,mode=multicast,config=/usr/lib/storm/contrib/storm-jmxetric/conf/jmxetric-conf.xml,process=Supervisor_JVM",
             "topology.enable.message.timeouts": "true",
             "storm.messaging.netty.max_wait_ms": "1000",
             "nimbus.topology.validator": "backtype.storm.nimbus.DefaultTopologyValidator",

http://git-wip-us.apache.org/repos/asf/ambari/blob/dc4709ea/ambari-server/src/test/python/stacks/2.1.1/configs/secured.json
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/python/stacks/2.1.1/configs/secured.json b/ambari-server/src/test/python/stacks/2.1.1/configs/secured.json
index e7e9eb6..e4dd536 100644
--- a/ambari-server/src/test/python/stacks/2.1.1/configs/secured.json
+++ b/ambari-server/src/test/python/stacks/2.1.1/configs/secured.json
@@ -229,7 +229,7 @@
             "topology.executor.receive.buffer.size": "1024",
             "topology.stats.sample.rate": "0.05",
             "topology.fall.back.on.java.serialization": "true",
-            "supervisor.childopts": "-javaagent:/usr/lib/storm/contrib/storm-jmxetric/lib/jmxetric-1.0.4.jar=host=c6402.ambari.apache.org,port=8650,wireformat31x=true,mode=multicast,config=/usr/lib/storm/contrib/storm-jmxetric/conf/jmxetric-conf.xml,process=Supervisor_JVM -Xmx256m",
+            "supervisor.childopts": "-Xmx256m -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.port=56431 -javaagent:/usr/lib/storm/contrib/storm-jmxetric/lib/jmxetric-1.0.4.jar=host=c6402.ambari.apache.org,port=8650,wireformat31x=true,mode=multicast,config=/usr/lib/storm/contrib/storm-jmxetric/conf/jmxetric-conf.xml,process=Supervisor_JVM",
             "topology.enable.message.timeouts": "true",
             "storm.messaging.netty.max_wait_ms": "1000",
             "nimbus.topology.validator": "backtype.storm.nimbus.DefaultTopologyValidator",

http://git-wip-us.apache.org/repos/asf/ambari/blob/dc4709ea/ambari-web/app/models/alert.js
----------------------------------------------------------------------
diff --git a/ambari-web/app/models/alert.js b/ambari-web/app/models/alert.js
index c75239a..cd3e92a 100644
--- a/ambari-web/app/models/alert.js
+++ b/ambari-web/app/models/alert.js
@@ -67,7 +67,7 @@ App.Alert = Em.Object.extend({
    * Used to show only required alerts at the service level
    */
   ignoredForServices: function() {
-    return ['NodeManager health', 'NodeManager process', 'TaskTracker process', 'RegionServer process', 'DataNode process', 'DataNode space', 'ZooKeeper Server process'].contains(this.get('title'));
+    return ['NodeManager health', 'NodeManager process', 'TaskTracker process', 'RegionServer process', 'DataNode process', 'DataNode space', 'ZooKeeper Server process', 'Supervisors process'].contains(this.get('title'));
   }.property('title'),
 
   /**
@@ -187,4 +187,4 @@ App.Alert = Em.Object.extend({
     }
     return null;
   }.property('serviceType')
-});
\ No newline at end of file
+});

http://git-wip-us.apache.org/repos/asf/ambari/blob/dc4709ea/contrib/addons/src/addOns/nagios/scripts/nagios_alerts.php
----------------------------------------------------------------------
diff --git a/contrib/addons/src/addOns/nagios/scripts/nagios_alerts.php b/contrib/addons/src/addOns/nagios/scripts/nagios_alerts.php
index ab4227c..ddae4c0 100644
--- a/contrib/addons/src/addOns/nagios/scripts/nagios_alerts.php
+++ b/contrib/addons/src/addOns/nagios/scripts/nagios_alerts.php
@@ -399,6 +399,13 @@ function hdp_mon_generate_response( $response_data )
       case "NODEMANAGER":
         $pieces[0] = "YARN";
         break;
+      case "STORM_UI_SERVER":
+      case "NIMBUS":
+      case "DRPC_SERVER":
+      case "LOGVIEWER_SERVER":
+      case "SUPERVISOR":
+        $pieces[0] = "STORM";
+        break;
       case "NAGIOS":
       case "HDFS":
       case "MAPREDUCE":
@@ -407,6 +414,7 @@ function hdp_mon_generate_response( $response_data )
       case "OOZIE":
       case "WEBHCAT":
       case "GANGLIA":
+      case "STORM":
       case "FALCON":
       case "PUPPET":
         break;