You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by jl...@apache.org on 2014/12/17 21:05:43 UTC
[11/37] ambari git commit: AMBARI-8695: Common Services: Refactor
HDP-2.0.6 HDFS, ZOOKEEPER services (Jayush Luniya)
http://git-wip-us.apache.org/repos/asf/ambari/blob/17b71553/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/files/alert_checkpoint_time.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/files/alert_checkpoint_time.py b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/files/alert_checkpoint_time.py
new file mode 100644
index 0000000..410608f
--- /dev/null
+++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/files/alert_checkpoint_time.py
@@ -0,0 +1,136 @@
+#!/usr/bin/env python
+
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import time
+import urllib2
+import json
+
+LABEL = 'Last Checkpoint: [{h} hours, {m} minutes, {tx} transactions]'
+
+NN_HTTP_ADDRESS_KEY = '{{hdfs-site/dfs.namenode.http-address}}'
+NN_HTTPS_ADDRESS_KEY = '{{hdfs-site/dfs.namenode.https-address}}'
+NN_HTTP_POLICY_KEY = '{{hdfs-site/dfs.http.policy}}'
+NN_CHECKPOINT_TX_KEY = '{{hdfs-site/dfs.namenode.checkpoint.txns}}'
+NN_CHECKPOINT_PERIOD_KEY = '{{hdfs-site/dfs.namenode.checkpoint.period}}'
+
+PERCENT_WARNING = 200
+PERCENT_CRITICAL = 200
+
+CHECKPOINT_TX_DEFAULT = 1000000
+CHECKPOINT_PERIOD_DEFAULT = 21600
+
+def get_tokens():
+ """
+ Returns a tuple of tokens in the format {{site/property}} that will be used
+ to build the dictionary passed into execute
+ """
+ return (NN_HTTP_ADDRESS_KEY, NN_HTTPS_ADDRESS_KEY, NN_HTTP_POLICY_KEY,
+ NN_CHECKPOINT_TX_KEY, NN_CHECKPOINT_PERIOD_KEY)
+
+
+def execute(parameters=None, host_name=None):
+ """
+ Returns a tuple containing the result code and a pre-formatted result label
+
+ Keyword arguments:
+ parameters (dictionary): a mapping of parameter key to value
+ host_name (string): the name of this host where the alert is running
+ """
+
+ if parameters is None:
+ return (('UNKNOWN', ['There were no parameters supplied to the script.']))
+
+ uri = None
+ scheme = 'http'
+ http_uri = None
+ https_uri = None
+ http_policy = 'HTTP_ONLY'
+ percent_warning = PERCENT_WARNING
+ percent_critical = PERCENT_CRITICAL
+ checkpoint_tx = CHECKPOINT_TX_DEFAULT
+ checkpoint_period = CHECKPOINT_PERIOD_DEFAULT
+
+ if NN_HTTP_ADDRESS_KEY in parameters:
+ http_uri = parameters[NN_HTTP_ADDRESS_KEY]
+
+ if NN_HTTPS_ADDRESS_KEY in parameters:
+ https_uri = parameters[NN_HTTPS_ADDRESS_KEY]
+
+ if NN_HTTP_POLICY_KEY in parameters:
+ http_policy = parameters[NN_HTTP_POLICY_KEY]
+
+ if NN_CHECKPOINT_TX_KEY in parameters:
+ checkpoint_tx = parameters[NN_CHECKPOINT_TX_KEY]
+
+ if NN_CHECKPOINT_PERIOD_KEY in parameters:
+ checkpoint_period = parameters[NN_CHECKPOINT_PERIOD_KEY]
+
+ # determine the right URI and whether to use SSL
+ uri = http_uri
+ if http_policy == 'HTTPS_ONLY':
+ scheme = 'https'
+
+ if https_uri is not None:
+ uri = https_uri
+
+ current_time = int(round(time.time() * 1000))
+
+ last_checkpoint_time_qry = "{0}://{1}/jmx?qry=Hadoop:service=NameNode,name=FSNamesystem".format(scheme,uri)
+ journal_transaction_info_qry = "{0}://{1}/jmx?qry=Hadoop:service=NameNode,name=NameNodeInfo".format(scheme,uri)
+
+ # start out assuming an OK status
+ label = None
+ result_code = "OK"
+
+ try:
+ last_checkpoint_time = int(get_value_from_jmx(last_checkpoint_time_qry,"LastCheckpointTime"))
+ journal_transaction_info = get_value_from_jmx(journal_transaction_info_qry,"JournalTransactionInfo")
+ journal_transaction_info_dict = json.loads(journal_transaction_info)
+
+ last_tx = int(journal_transaction_info_dict['LastAppliedOrWrittenTxId'])
+ most_recent_tx = int(journal_transaction_info_dict['MostRecentCheckpointTxId'])
+ transaction_difference = last_tx - most_recent_tx
+
+ delta = (current_time - last_checkpoint_time)/1000
+
+ label = LABEL.format(h=get_time(delta)['h'], m=get_time(delta)['m'], tx=transaction_difference)
+
+ if (transaction_difference > int(checkpoint_tx)) and (float(delta) / int(checkpoint_period)*100 >= int(percent_critical)):
+ result_code = 'CRITICAL'
+ elif (transaction_difference > int(checkpoint_tx)) and (float(delta) / int(checkpoint_period)*100 >= int(percent_warning)):
+ result_code = 'WARNING'
+
+ except Exception, e:
+ label = str(e)
+ result_code = 'UNKNOWN'
+
+ return ((result_code, [label]))
+
+def get_time(delta):
+ h = int(delta/3600)
+ m = int((delta % 3600)/60)
+ return {'h':h, 'm':m}
+
+
+def get_value_from_jmx(qry, property):
+ response = urllib2.urlopen(qry)
+ data=response.read()
+ data_dict = json.loads(data)
+ return data_dict["beans"][0][property]
http://git-wip-us.apache.org/repos/asf/ambari/blob/17b71553/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/files/alert_ha_namenode_health.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/files/alert_ha_namenode_health.py b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/files/alert_ha_namenode_health.py
new file mode 100644
index 0000000..fc1541d
--- /dev/null
+++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/files/alert_ha_namenode_health.py
@@ -0,0 +1,166 @@
+#!/usr/bin/env python
+
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import urllib2
+import json
+
+RESULT_STATE_OK = 'OK'
+RESULT_STATE_CRITICAL = 'CRITICAL'
+RESULT_STATE_UNKNOWN = 'UNKNOWN'
+RESULT_STATE_SKIPPED = 'SKIPPED'
+
+HDFS_NN_STATE_ACTIVE = 'active'
+HDFS_NN_STATE_STANDBY = 'standby'
+
+HDFS_SITE_KEY = '{{hdfs-site}}'
+NAMESERVICE_KEY = '{{hdfs-site/dfs.nameservices}}'
+NN_HTTP_ADDRESS_KEY = '{{hdfs-site/dfs.namenode.http-address}}'
+NN_HTTPS_ADDRESS_KEY = '{{hdfs-site/dfs.namenode.https-address}}'
+DFS_POLICY_KEY = '{{hdfs-site/dfs.http.policy}}'
+
+def get_tokens():
+ """
+ Returns a tuple of tokens in the format {{site/property}} that will be used
+ to build the dictionary passed into execute
+ """
+ return (HDFS_SITE_KEY, NAMESERVICE_KEY, NN_HTTP_ADDRESS_KEY,
+ NN_HTTPS_ADDRESS_KEY, DFS_POLICY_KEY)
+
+
+def execute(parameters=None, host_name=None):
+ """
+ Returns a tuple containing the result code and a pre-formatted result label
+
+ Keyword arguments:
+ parameters (dictionary): a mapping of parameter key to value
+ host_name (string): the name of this host where the alert is running
+ """
+ if parameters is None:
+ return (RESULT_STATE_UNKNOWN, ['There were no parameters supplied to the script.'])
+
+ # if not in HA mode, then SKIP
+ if not NAMESERVICE_KEY in parameters:
+ return (RESULT_STATE_SKIPPED, ['NameNode HA is not enabled'])
+
+ # hdfs-site is required
+ if not HDFS_SITE_KEY in parameters:
+ return (RESULT_STATE_UNKNOWN, ['{0} is a required parameter for the script'.format(HDFS_SITE_KEY)])
+
+ # determine whether or not SSL is enabled
+ is_ssl_enabled = False
+ if DFS_POLICY_KEY in parameters:
+ dfs_policy = parameters[DFS_POLICY_KEY]
+ if dfs_policy == "HTTPS_ONLY":
+ is_ssl_enabled = True
+
+ name_service = parameters[NAMESERVICE_KEY]
+ hdfs_site = parameters[HDFS_SITE_KEY]
+
+ # look for dfs.ha.namenodes.foo
+ nn_unique_ids_key = 'dfs.ha.namenodes.' + name_service
+ if not nn_unique_ids_key in hdfs_site:
+ return (RESULT_STATE_UNKNOWN, ['Unable to find unique namenode alias key {0}'.format(nn_unique_ids_key)])
+
+ namenode_http_fragment = 'dfs.namenode.http-address.{0}.{1}'
+ jmx_uri_fragment = "http://{0}/jmx?qry=Hadoop:service=NameNode,name=NameNodeStatus"
+
+ if is_ssl_enabled:
+ namenode_http_fragment = 'dfs.namenode.https-address.{0}.{1}'
+ jmx_uri_fragment = "https://{0}/jmx?qry=Hadoop:service=NameNode,name=NameNodeStatus"
+
+
+ active_namenodes = []
+ standby_namenodes = []
+ unknown_namenodes = []
+
+ # now we have something like 'nn1,nn2,nn3,nn4'
+ # turn it into dfs.namenode.[property].[dfs.nameservices].[nn_unique_id]
+ # ie dfs.namenode.http-address.hacluster.nn1
+ nn_unique_ids = hdfs_site[nn_unique_ids_key].split(',')
+ for nn_unique_id in nn_unique_ids:
+ key = namenode_http_fragment.format(name_service,nn_unique_id)
+
+ if key in hdfs_site:
+ # use str() to ensure that unicode strings do not have the u' in them
+ value = str(hdfs_site[key])
+
+ try:
+ jmx_uri = jmx_uri_fragment.format(value)
+ state = get_value_from_jmx(jmx_uri,'State')
+
+ if state == HDFS_NN_STATE_ACTIVE:
+ active_namenodes.append(value)
+ elif state == HDFS_NN_STATE_STANDBY:
+ standby_namenodes.append(value)
+ else:
+ unknown_namenodes.append(value)
+ except:
+ unknown_namenodes.append(value)
+
+ # now that the request is done, determine if this host is the host that
+ # should report the status of the HA topology
+ is_active_namenode = False
+ for active_namenode in active_namenodes:
+ if active_namenode.startswith(host_name):
+ is_active_namenode = True
+
+ # there's only one scenario here; there is exactly 1 active and 1 standby
+ is_topology_healthy = len(active_namenodes) == 1 and len(standby_namenodes) == 1
+
+ result_label = 'Active{0}, Standby{1}, Unknown{2}'.format(str(active_namenodes),
+ str(standby_namenodes), str(unknown_namenodes))
+
+ # Healthy Topology:
+ # - Active NN reports the alert, standby does not
+ #
+ # Unhealthy Topology:
+ # - Report the alert if this is the first named host
+ # - Report the alert if not the first named host, but the other host
+ # could not report its status
+ if is_topology_healthy:
+ if is_active_namenode is True:
+ return (RESULT_STATE_OK, [result_label])
+ else:
+ return (RESULT_STATE_SKIPPED, ['Another host will report this alert'])
+ else:
+ # dfs.namenode.rpc-address.service.alias is guaranteed in HA mode
+ first_listed_host_key = 'dfs.namenode.rpc-address.{0}.{1}'.format(
+ name_service, nn_unique_ids[0])
+
+ first_listed_host = ''
+ if first_listed_host_key in hdfs_site:
+ first_listed_host = hdfs_site[first_listed_host_key]
+
+ is_first_listed_host = False
+ if first_listed_host.startswith(host_name):
+ is_first_listed_host = True
+
+ if is_first_listed_host:
+ return (RESULT_STATE_CRITICAL, [result_label])
+ else:
+ # not the first listed host, but the first host might be in the unknown
+ return (RESULT_STATE_SKIPPED, ['Another host will report this alert'])
+
+
+def get_value_from_jmx(qry, property):
+ response = urllib2.urlopen(qry)
+ data=response.read()
+ data_dict = json.loads(data)
+ return data_dict["beans"][0][property]
http://git-wip-us.apache.org/repos/asf/ambari/blob/17b71553/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/files/checkForFormat.sh
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/files/checkForFormat.sh b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/files/checkForFormat.sh
new file mode 100644
index 0000000..54405f6
--- /dev/null
+++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/files/checkForFormat.sh
@@ -0,0 +1,71 @@
+#!/usr/bin/env bash
+#
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#
+
+export hdfs_user=$1
+shift
+export conf_dir=$1
+shift
+export bin_dir=$1
+shift
+export old_mark_dir=$1
+shift
+export mark_dir=$1
+shift
+export name_dirs=$*
+
+export EXIT_CODE=0
+export command="namenode -format"
+export list_of_non_empty_dirs=""
+
+mark_file=/var/run/hadoop/hdfs/namenode-formatted
+if [[ -f ${mark_file} ]] ; then
+ sudo rm -f ${mark_file}
+ sudo mkdir -p ${mark_dir}
+fi
+
+if [[ -d $old_mark_dir ]] ; then
+ mv ${old_mark_dir} ${mark_dir}
+fi
+
+if [[ ! -d $mark_dir ]] ; then
+ for dir in `echo $name_dirs | tr ',' ' '` ; do
+ echo "NameNode Dirname = $dir"
+ cmd="ls $dir | wc -l | grep -q ^0$"
+ eval $cmd
+ if [[ $? -ne 0 ]] ; then
+ (( EXIT_CODE = $EXIT_CODE + 1 ))
+ list_of_non_empty_dirs="$list_of_non_empty_dirs $dir"
+ fi
+ done
+
+ if [[ $EXIT_CODE == 0 ]] ; then
+ sudo su ${hdfs_user} - -s /bin/bash -c "export PATH=$PATH:${bin_dir} ; yes Y | hdfs --config ${conf_dir} ${command}"
+ (( EXIT_CODE = $EXIT_CODE | $? ))
+ else
+ echo "ERROR: Namenode directory(s) is non empty. Will not format the namenode. List of non-empty namenode dirs ${list_of_non_empty_dirs}"
+ fi
+else
+ echo "${mark_dir} exists. Namenode DFS already formatted"
+fi
+
+exit $EXIT_CODE
+
http://git-wip-us.apache.org/repos/asf/ambari/blob/17b71553/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/files/checkWebUI.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/files/checkWebUI.py b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/files/checkWebUI.py
new file mode 100644
index 0000000..f8e9c1a
--- /dev/null
+++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/files/checkWebUI.py
@@ -0,0 +1,53 @@
+#!/usr/bin/env python
+
+'''
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+'''
+
+import optparse
+import httplib
+
+#
+# Main.
+#
+def main():
+ parser = optparse.OptionParser(usage="usage: %prog [options] component ")
+ parser.add_option("-m", "--hosts", dest="hosts", help="Comma separated hosts list for WEB UI to check it availability")
+ parser.add_option("-p", "--port", dest="port", help="Port of WEB UI to check it availability")
+
+ (options, args) = parser.parse_args()
+
+ hosts = options.hosts.split(',')
+ port = options.port
+
+ for host in hosts:
+ try:
+ conn = httplib.HTTPConnection(host, port)
+ # This can be modified to get a partial url part to be sent with request
+ conn.request("GET", "/")
+ httpCode = conn.getresponse().status
+ conn.close()
+ except Exception:
+ httpCode = 404
+
+ if httpCode != 200:
+ print "Cannot access WEB UI on: http://" + host + ":" + port
+ exit(1)
+
+
+if __name__ == "__main__":
+ main()
http://git-wip-us.apache.org/repos/asf/ambari/blob/17b71553/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/__init__.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/__init__.py b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/__init__.py
new file mode 100644
index 0000000..35de4bb
--- /dev/null
+++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/__init__.py
@@ -0,0 +1,20 @@
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+Ambari Agent
+
+"""