You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by jl...@apache.org on 2014/12/17 21:05:43 UTC

[11/37] ambari git commit: AMBARI-8695: Common Services: Refactor HDP-2.0.6 HDFS, ZOOKEEPER services (Jayush Luniya)

http://git-wip-us.apache.org/repos/asf/ambari/blob/17b71553/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/files/alert_checkpoint_time.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/files/alert_checkpoint_time.py b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/files/alert_checkpoint_time.py
new file mode 100644
index 0000000..410608f
--- /dev/null
+++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/files/alert_checkpoint_time.py
@@ -0,0 +1,136 @@
+#!/usr/bin/env python
+
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import time
+import urllib2
+import json
+
+LABEL = 'Last Checkpoint: [{h} hours, {m} minutes, {tx} transactions]'
+
+NN_HTTP_ADDRESS_KEY = '{{hdfs-site/dfs.namenode.http-address}}'
+NN_HTTPS_ADDRESS_KEY = '{{hdfs-site/dfs.namenode.https-address}}'
+NN_HTTP_POLICY_KEY = '{{hdfs-site/dfs.http.policy}}'
+NN_CHECKPOINT_TX_KEY = '{{hdfs-site/dfs.namenode.checkpoint.txns}}'
+NN_CHECKPOINT_PERIOD_KEY = '{{hdfs-site/dfs.namenode.checkpoint.period}}'
+
+PERCENT_WARNING = 200
+PERCENT_CRITICAL = 200
+
+CHECKPOINT_TX_DEFAULT = 1000000
+CHECKPOINT_PERIOD_DEFAULT = 21600
+
+def get_tokens():
+  """
+  Returns a tuple of tokens in the format {{site/property}} that will be used
+  to build the dictionary passed into execute
+  """
+  return (NN_HTTP_ADDRESS_KEY, NN_HTTPS_ADDRESS_KEY, NN_HTTP_POLICY_KEY, 
+      NN_CHECKPOINT_TX_KEY, NN_CHECKPOINT_PERIOD_KEY)      
+  
+
+def execute(parameters=None, host_name=None):
+  """
+  Returns a tuple containing the result code and a pre-formatted result label
+
+  Keyword arguments:
+  parameters (dictionary): a mapping of parameter key to value
+  host_name (string): the name of this host where the alert is running
+  """
+
+  if parameters is None:
+    return (('UNKNOWN', ['There were no parameters supplied to the script.']))
+  
+  uri = None
+  scheme = 'http'  
+  http_uri = None
+  https_uri = None
+  http_policy = 'HTTP_ONLY'
+  percent_warning = PERCENT_WARNING
+  percent_critical = PERCENT_CRITICAL
+  checkpoint_tx = CHECKPOINT_TX_DEFAULT
+  checkpoint_period = CHECKPOINT_PERIOD_DEFAULT
+  
+  if NN_HTTP_ADDRESS_KEY in parameters:
+    http_uri = parameters[NN_HTTP_ADDRESS_KEY]
+
+  if NN_HTTPS_ADDRESS_KEY in parameters:
+    https_uri = parameters[NN_HTTPS_ADDRESS_KEY]
+
+  if NN_HTTP_POLICY_KEY in parameters:
+    http_policy = parameters[NN_HTTP_POLICY_KEY]
+
+  if NN_CHECKPOINT_TX_KEY in parameters:
+    checkpoint_tx = parameters[NN_CHECKPOINT_TX_KEY]
+
+  if NN_CHECKPOINT_PERIOD_KEY in parameters:
+    checkpoint_period = parameters[NN_CHECKPOINT_PERIOD_KEY]
+    
+  # determine the right URI and whether to use SSL
+  uri = http_uri
+  if http_policy == 'HTTPS_ONLY':
+    scheme = 'https'
+    
+    if https_uri is not None:
+      uri = https_uri 
+  
+  current_time = int(round(time.time() * 1000))
+
+  last_checkpoint_time_qry = "{0}://{1}/jmx?qry=Hadoop:service=NameNode,name=FSNamesystem".format(scheme,uri)
+  journal_transaction_info_qry = "{0}://{1}/jmx?qry=Hadoop:service=NameNode,name=NameNodeInfo".format(scheme,uri)
+
+  # start out assuming an OK status
+  label = None
+  result_code = "OK"
+
+  try:
+    last_checkpoint_time = int(get_value_from_jmx(last_checkpoint_time_qry,"LastCheckpointTime"))
+    journal_transaction_info = get_value_from_jmx(journal_transaction_info_qry,"JournalTransactionInfo")
+    journal_transaction_info_dict = json.loads(journal_transaction_info)
+  
+    last_tx = int(journal_transaction_info_dict['LastAppliedOrWrittenTxId'])
+    most_recent_tx = int(journal_transaction_info_dict['MostRecentCheckpointTxId'])
+    transaction_difference = last_tx - most_recent_tx
+    
+    delta = (current_time - last_checkpoint_time)/1000
+
+    label = LABEL.format(h=get_time(delta)['h'], m=get_time(delta)['m'], tx=transaction_difference)
+    
+    if (transaction_difference > int(checkpoint_tx)) and (float(delta) / int(checkpoint_period)*100 >= int(percent_critical)):
+      result_code = 'CRITICAL'
+    elif (transaction_difference > int(checkpoint_tx)) and (float(delta) / int(checkpoint_period)*100 >= int(percent_warning)):
+      result_code = 'WARNING'
+
+  except Exception, e:
+    label = str(e)
+    result_code = 'UNKNOWN'
+        
+  return ((result_code, [label]))
+
+def get_time(delta):
+  h = int(delta/3600)
+  m = int((delta % 3600)/60)
+  return {'h':h, 'm':m}
+
+
+def get_value_from_jmx(qry, property):
+  response = urllib2.urlopen(qry)
+  data=response.read()
+  data_dict = json.loads(data)
+  return data_dict["beans"][0][property]

http://git-wip-us.apache.org/repos/asf/ambari/blob/17b71553/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/files/alert_ha_namenode_health.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/files/alert_ha_namenode_health.py b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/files/alert_ha_namenode_health.py
new file mode 100644
index 0000000..fc1541d
--- /dev/null
+++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/files/alert_ha_namenode_health.py
@@ -0,0 +1,166 @@
+#!/usr/bin/env python
+
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import urllib2
+import json
+
+RESULT_STATE_OK = 'OK'
+RESULT_STATE_CRITICAL = 'CRITICAL'
+RESULT_STATE_UNKNOWN = 'UNKNOWN'
+RESULT_STATE_SKIPPED = 'SKIPPED'
+
+HDFS_NN_STATE_ACTIVE = 'active'
+HDFS_NN_STATE_STANDBY = 'standby'
+
+HDFS_SITE_KEY = '{{hdfs-site}}'
+NAMESERVICE_KEY = '{{hdfs-site/dfs.nameservices}}'
+NN_HTTP_ADDRESS_KEY = '{{hdfs-site/dfs.namenode.http-address}}'
+NN_HTTPS_ADDRESS_KEY = '{{hdfs-site/dfs.namenode.https-address}}'
+DFS_POLICY_KEY = '{{hdfs-site/dfs.http.policy}}'
+
+def get_tokens():
+  """
+  Returns a tuple of tokens in the format {{site/property}} that will be used
+  to build the dictionary passed into execute
+  """
+  return (HDFS_SITE_KEY, NAMESERVICE_KEY, NN_HTTP_ADDRESS_KEY,
+  NN_HTTPS_ADDRESS_KEY, DFS_POLICY_KEY)
+  
+
+def execute(parameters=None, host_name=None):
+  """
+  Returns a tuple containing the result code and a pre-formatted result label
+
+  Keyword arguments:
+  parameters (dictionary): a mapping of parameter key to value
+  host_name (string): the name of this host where the alert is running
+  """
+  if parameters is None:
+    return (RESULT_STATE_UNKNOWN, ['There were no parameters supplied to the script.'])
+
+  # if not in HA mode, then SKIP
+  if not NAMESERVICE_KEY in parameters:
+    return (RESULT_STATE_SKIPPED, ['NameNode HA is not enabled'])
+
+  # hdfs-site is required
+  if not HDFS_SITE_KEY in parameters:
+    return (RESULT_STATE_UNKNOWN, ['{0} is a required parameter for the script'.format(HDFS_SITE_KEY)])
+
+  # determine whether or not SSL is enabled
+  is_ssl_enabled = False
+  if DFS_POLICY_KEY in parameters:
+    dfs_policy = parameters[DFS_POLICY_KEY]
+    if dfs_policy == "HTTPS_ONLY":
+      is_ssl_enabled = True
+
+  name_service = parameters[NAMESERVICE_KEY]
+  hdfs_site = parameters[HDFS_SITE_KEY]
+
+  # look for dfs.ha.namenodes.foo
+  nn_unique_ids_key = 'dfs.ha.namenodes.' + name_service
+  if not nn_unique_ids_key in hdfs_site:
+    return (RESULT_STATE_UNKNOWN, ['Unable to find unique namenode alias key {0}'.format(nn_unique_ids_key)])
+
+  namenode_http_fragment = 'dfs.namenode.http-address.{0}.{1}'
+  jmx_uri_fragment = "http://{0}/jmx?qry=Hadoop:service=NameNode,name=NameNodeStatus"
+
+  if is_ssl_enabled:
+    namenode_http_fragment = 'dfs.namenode.https-address.{0}.{1}'
+    jmx_uri_fragment = "https://{0}/jmx?qry=Hadoop:service=NameNode,name=NameNodeStatus"
+
+
+  active_namenodes = []
+  standby_namenodes = []
+  unknown_namenodes = []
+
+  # now we have something like 'nn1,nn2,nn3,nn4'
+  # turn it into dfs.namenode.[property].[dfs.nameservices].[nn_unique_id]
+  # ie dfs.namenode.http-address.hacluster.nn1
+  nn_unique_ids = hdfs_site[nn_unique_ids_key].split(',')
+  for nn_unique_id in nn_unique_ids:
+    key = namenode_http_fragment.format(name_service,nn_unique_id)
+
+    if key in hdfs_site:
+      # use str() to ensure that unicode strings do not have the u' in them
+      value = str(hdfs_site[key])
+
+      try:
+        jmx_uri = jmx_uri_fragment.format(value)
+        state = get_value_from_jmx(jmx_uri,'State')
+
+        if state == HDFS_NN_STATE_ACTIVE:
+          active_namenodes.append(value)
+        elif state == HDFS_NN_STATE_STANDBY:
+          standby_namenodes.append(value)
+        else:
+          unknown_namenodes.append(value)
+      except:
+        unknown_namenodes.append(value)
+
+  # now that the request is done, determine if this host is the host that
+  # should report the status of the HA topology
+  is_active_namenode = False
+  for active_namenode in active_namenodes:
+    if active_namenode.startswith(host_name):
+      is_active_namenode = True
+
+  # there's only one scenario here; there is exactly 1 active and 1 standby
+  is_topology_healthy = len(active_namenodes) == 1 and len(standby_namenodes) == 1
+
+  result_label = 'Active{0}, Standby{1}, Unknown{2}'.format(str(active_namenodes),
+    str(standby_namenodes), str(unknown_namenodes))
+
+  # Healthy Topology:
+  #   - Active NN reports the alert, standby does not
+  #
+  # Unhealthy Topology:
+  #   - Report the alert if this is the first named host
+  #   - Report the alert if not the first named host, but the other host
+  #   could not report its status
+  if is_topology_healthy:
+    if is_active_namenode is True:
+      return (RESULT_STATE_OK, [result_label])
+    else:
+      return (RESULT_STATE_SKIPPED, ['Another host will report this alert'])
+  else:
+    # dfs.namenode.rpc-address.service.alias is guaranteed in HA mode
+    first_listed_host_key = 'dfs.namenode.rpc-address.{0}.{1}'.format(
+      name_service, nn_unique_ids[0])
+
+    first_listed_host = ''
+    if first_listed_host_key in hdfs_site:
+      first_listed_host = hdfs_site[first_listed_host_key]
+
+    is_first_listed_host = False
+    if first_listed_host.startswith(host_name):
+      is_first_listed_host = True
+
+    if is_first_listed_host:
+      return (RESULT_STATE_CRITICAL, [result_label])
+    else:
+      # not the first listed host, but the first host might be in the unknown
+      return (RESULT_STATE_SKIPPED, ['Another host will report this alert'])
+
+
+def get_value_from_jmx(qry, property):
+  response = urllib2.urlopen(qry)
+  data=response.read()
+  data_dict = json.loads(data)
+  return data_dict["beans"][0][property]

http://git-wip-us.apache.org/repos/asf/ambari/blob/17b71553/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/files/checkForFormat.sh
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/files/checkForFormat.sh b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/files/checkForFormat.sh
new file mode 100644
index 0000000..54405f6
--- /dev/null
+++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/files/checkForFormat.sh
@@ -0,0 +1,71 @@
+#!/usr/bin/env bash
+#
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#
+
+export hdfs_user=$1
+shift
+export conf_dir=$1
+shift
+export bin_dir=$1
+shift
+export old_mark_dir=$1
+shift
+export mark_dir=$1
+shift
+export name_dirs=$*
+
+export EXIT_CODE=0
+export command="namenode -format"
+export list_of_non_empty_dirs=""
+
+mark_file=/var/run/hadoop/hdfs/namenode-formatted
+if [[ -f ${mark_file} ]] ; then
+  sudo rm -f ${mark_file}
+  sudo mkdir -p ${mark_dir}
+fi
+
+if [[ -d $old_mark_dir ]] ; then
+  mv ${old_mark_dir} ${mark_dir}
+fi
+
+if [[ ! -d $mark_dir ]] ; then
+  for dir in `echo $name_dirs | tr ',' ' '` ; do
+    echo "NameNode Dirname = $dir"
+    cmd="ls $dir | wc -l  | grep -q ^0$"
+    eval $cmd
+    if [[ $? -ne 0 ]] ; then
+      (( EXIT_CODE = $EXIT_CODE + 1 ))
+      list_of_non_empty_dirs="$list_of_non_empty_dirs $dir"
+    fi
+  done
+
+  if [[ $EXIT_CODE == 0 ]] ; then
+    sudo su ${hdfs_user} - -s /bin/bash -c "export PATH=$PATH:${bin_dir} ; yes Y | hdfs --config ${conf_dir} ${command}"
+    (( EXIT_CODE = $EXIT_CODE | $? ))
+  else
+    echo "ERROR: Namenode directory(s) is non empty. Will not format the namenode. List of non-empty namenode dirs ${list_of_non_empty_dirs}"
+  fi
+else
+  echo "${mark_dir} exists. Namenode DFS already formatted"
+fi
+
+exit $EXIT_CODE
+

http://git-wip-us.apache.org/repos/asf/ambari/blob/17b71553/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/files/checkWebUI.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/files/checkWebUI.py b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/files/checkWebUI.py
new file mode 100644
index 0000000..f8e9c1a
--- /dev/null
+++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/files/checkWebUI.py
@@ -0,0 +1,53 @@
+#!/usr/bin/env python
+
+'''
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+'''
+
+import optparse
+import httplib
+
+#
+# Main.
+#
+def main():
+  parser = optparse.OptionParser(usage="usage: %prog [options] component ")
+  parser.add_option("-m", "--hosts", dest="hosts", help="Comma separated hosts list for WEB UI to check it availability")
+  parser.add_option("-p", "--port", dest="port", help="Port of WEB UI to check it availability")
+
+  (options, args) = parser.parse_args()
+  
+  hosts = options.hosts.split(',')
+  port = options.port
+
+  for host in hosts:
+    try:
+      conn = httplib.HTTPConnection(host, port)
+      # This can be modified to get a partial url part to be sent with request
+      conn.request("GET", "/")
+      httpCode = conn.getresponse().status
+      conn.close()
+    except Exception:
+      httpCode = 404
+
+    if httpCode != 200:
+      print "Cannot access WEB UI on: http://" + host + ":" + port
+      exit(1)
+      
+
+if __name__ == "__main__":
+  main()

http://git-wip-us.apache.org/repos/asf/ambari/blob/17b71553/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/__init__.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/__init__.py b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/__init__.py
new file mode 100644
index 0000000..35de4bb
--- /dev/null
+++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/__init__.py
@@ -0,0 +1,20 @@
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+Ambari Agent
+
+"""