You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by jo...@apache.org on 2016/06/23 02:10:16 UTC
[2/2] ambari git commit: AMBARI-17380 - The DataNode Unmounted Alert
Produces False Alerts When file:// URIs Are Used (jonathanhurley)
AMBARI-17380 - The DataNode Unmounted Alert Produces False Alerts When file:// URIs Are Used (jonathanhurley)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/ec942a1a
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/ec942a1a
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/ec942a1a
Branch: refs/heads/branch-2.4
Commit: ec942a1ac1d314d4006a0aa52fb07b3b67cec722
Parents: 4720e51
Author: Jonathan Hurley <jh...@hortonworks.com>
Authored: Wed Jun 22 17:09:19 2016 -0400
Committer: Jonathan Hurley <jh...@hortonworks.com>
Committed: Wed Jun 22 22:09:04 2016 -0400
----------------------------------------------------------------------
.../alerts/alert_datanode_unmounted_data_dir.py | 41 +++++++++++++-------
.../test_alert_datanode_unmounted_data_dir.py | 34 +++++++++++++---
2 files changed, 56 insertions(+), 19 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/ambari/blob/ec942a1a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_datanode_unmounted_data_dir.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_datanode_unmounted_data_dir.py b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_datanode_unmounted_data_dir.py
index df85002..765831d 100644
--- a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_datanode_unmounted_data_dir.py
+++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_datanode_unmounted_data_dir.py
@@ -20,6 +20,7 @@ limitations under the License.
import os
import logging
+import urlparse
from resource_management.libraries.functions import file_system
from resource_management.libraries.functions import mounted_dirs_helper
@@ -52,6 +53,11 @@ def execute(configurations={}, parameters={}, host_name=None):
configurations (dictionary): a mapping of configuration key to value
parameters (dictionary): a mapping of script parameter key to value
host_name (string): the name of this host where the alert is running
+
+ DataNode directories can be of the following formats and each needs to be supported:
+ /grid/dn/archive0
+ [SSD]/grid/dn/archive0
+ [ARCHIVE]file:///grid/dn/archive0
"""
warnings = []
errors = []
@@ -68,33 +74,40 @@ def execute(configurations={}, parameters={}, host_name=None):
if dfs_data_dir is None:
return (RESULT_STATE_UNKNOWN, ['{0} is a required parameter for the script and the value is null'.format(DFS_DATA_DIR)])
- data_dir_mount_file_exists = True
# This follows symlinks and will return False for a broken link (even in the middle of the linked list)
+ data_dir_mount_file_exists = True
if not os.path.exists(DATA_DIR_MOUNT_FILE):
data_dir_mount_file_exists = False
- warnings.append("File not found, {0} .".format(DATA_DIR_MOUNT_FILE))
+ warnings.append("{0} was not found.".format(DATA_DIR_MOUNT_FILE))
- valid_data_dirs = set() # data dirs that have been normalized
+ normalized_data_dirs = set() # data dirs that have been normalized
data_dirs_not_exist = set() # data dirs that do not exist
data_dirs_unknown = set() # data dirs for which could not determine mount
data_dirs_on_root = set() # set of data dirs that are on root mount
data_dirs_on_mount = set() # set of data dirs that are mounted on a device
data_dirs_unmounted = [] # list of data dirs that are known to have become unmounted
+ # transform each data directory into something that we can use
for data_dir in dfs_data_dir.split(","):
if data_dir is None or data_dir.strip() == "":
continue
+
data_dir = data_dir.strip()
+
# filter out data storage tags
for tag in DATA_STORAGE_TAGS:
if data_dir.startswith(tag):
data_dir = data_dir.replace(tag, "")
continue
- valid_data_dirs.add(data_dir)
+
+ # parse the path in case it contains a URI scheme
+ data_dir = urlparse.urlparse(data_dir).path
+
+ normalized_data_dirs.add(data_dir)
# Sort the data dirs, which is needed for deterministic behavior when running the unit tests.
- valid_data_dirs = sorted(valid_data_dirs)
- for data_dir in valid_data_dirs:
+ normalized_data_dirs = sorted(normalized_data_dirs)
+ for data_dir in normalized_data_dirs:
# This follows symlinks and will return False for a broken link (even in the middle of the linked list)
if os.path.isdir(data_dir):
curr_mount_point = file_system.get_mount_point_for_dir(data_dir)
@@ -111,16 +124,16 @@ def execute(configurations={}, parameters={}, host_name=None):
data_dirs_not_exist.add(data_dir)
# To keep the messages consistent for all hosts, sort the sets into lists
- valid_data_dirs = sorted(valid_data_dirs)
+ normalized_data_dirs = sorted(normalized_data_dirs)
data_dirs_not_exist = sorted(data_dirs_not_exist)
data_dirs_unknown = sorted(data_dirs_unknown)
data_dirs_on_root = sorted(data_dirs_on_root)
if data_dirs_not_exist:
- errors.append("Data dir(s) not found: {0} .".format(", ".join(data_dirs_not_exist)))
+ errors.append("The following data dir(s) were not found: {0}\n".format("\n".join(data_dirs_not_exist)))
if data_dirs_unknown:
- errors.append("Cannot find mount point for data dir(s): {0} .".format(", ".join(data_dirs_unknown)))
+ errors.append("Cannot find the mount point for the following data dir(s):\n{0}".format("\n".join(data_dirs_unknown)))
if data_dir_mount_file_exists:
# This dictionary contains the expected values of <data_dir, mount_point>
@@ -135,13 +148,13 @@ def execute(configurations={}, parameters={}, host_name=None):
data_dirs_unmounted.append(data_dir)
if len(data_dirs_unmounted) > 0:
- errors.append("Detected data dir(s) that became unmounted and are now writing to the root partition: {0} .".format(", ".join(data_dirs_unmounted)))
+ errors.append("Detected data dir(s) that became unmounted and are now writing to the root partition:\n{0}".format("\n".join(data_dirs_unmounted)))
else:
# Couldn't make guarantees about the expected value of mount points, so rely on this strategy that is likely to work.
# It will report false positives (aka false alarms) if the user actually intended to have
# 1+ data dirs on a mount and 1+ data dirs on the root partition.
if len(data_dirs_on_mount) >= 1 and len(data_dirs_on_root) >= 1:
- errors.append("Detected at least one data dir on a mount point, but these are writing to the root partition: {0} .".format(", ".join(data_dirs_on_root)))
+ errors.append("Detected at least one data dir on a mount point, but these are writing to the root partition:\n{0}".format("\n".join(data_dirs_on_root)))
# Determine the status based on warnings and errors.
if len(errors) == 0:
@@ -153,10 +166,10 @@ def execute(configurations={}, parameters={}, host_name=None):
status = RESULT_STATE_WARNING
messages += warnings
- if len(valid_data_dirs) > 0:
- messages.append("Data dir(s) are fine, {0} .".format(", ".join(valid_data_dirs)))
+ if len(normalized_data_dirs) > 0:
+ messages.append("The following data dir(s) are valid:\n{0}".format("\n".join(normalized_data_dirs)))
else:
- messages.append("No data dirs to analyze.")
+ messages.append("There are no data directories to analyze.")
return (status, ["\n".join(messages)])
else:
http://git-wip-us.apache.org/repos/asf/ambari/blob/ec942a1a/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_alert_datanode_unmounted_data_dir.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_alert_datanode_unmounted_data_dir.py b/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_alert_datanode_unmounted_data_dir.py
index c9bd187..c7dd47c 100644
--- a/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_alert_datanode_unmounted_data_dir.py
+++ b/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_alert_datanode_unmounted_data_dir.py
@@ -91,7 +91,7 @@ class TestAlertDataNodeUnmountedDataDir(RMFTestCase):
[status, messages] = alert.execute(configurations=configs)
self.assertEqual(status, RESULT_STATE_WARNING)
self.assertTrue(messages is not None and len(messages) == 1)
- self.assertTrue("File not found, {0}".format(DATA_DIR_MOUNT_HIST_FILE_PATH) in messages[0])
+ self.assertTrue("{0} was not found".format(DATA_DIR_MOUNT_HIST_FILE_PATH) in messages[0])
@patch("resource_management.libraries.functions.mounted_dirs_helper.get_dir_to_mount_from_file")
@patch("resource_management.libraries.functions.file_system.get_mount_point_for_dir")
@@ -117,7 +117,7 @@ class TestAlertDataNodeUnmountedDataDir(RMFTestCase):
[status, messages] = alert.execute(configurations=configs)
self.assertEqual(status, RESULT_STATE_OK)
self.assertTrue(messages is not None and len(messages) == 1)
- self.assertTrue("Data dir(s) are fine" in messages[0])
+ self.assertTrue("The following data dir(s) are valid" in messages[0])
@patch("resource_management.libraries.functions.mounted_dirs_helper.get_dir_to_mount_from_file")
@patch("resource_management.libraries.functions.file_system.get_mount_point_for_dir")
@@ -142,7 +142,7 @@ class TestAlertDataNodeUnmountedDataDir(RMFTestCase):
[status, messages] = alert.execute(configurations=configs)
self.assertEqual(status, RESULT_STATE_OK)
self.assertTrue(messages is not None and len(messages) == 1)
- self.assertTrue("Data dir(s) are fine" in messages[0])
+ self.assertTrue("The following data dir(s) are valid" in messages[0])
@patch("resource_management.libraries.functions.mounted_dirs_helper.get_dir_to_mount_from_file")
@patch("resource_management.libraries.functions.file_system.get_mount_point_for_dir")
@@ -166,7 +166,7 @@ class TestAlertDataNodeUnmountedDataDir(RMFTestCase):
[status, messages] = alert.execute(configurations=configs)
self.assertEqual(status, RESULT_STATE_CRITICAL)
self.assertTrue(messages is not None and len(messages) == 1)
- self.assertTrue("Detected at least one data dir on a mount point, but these are writing to the root partition: /grid/0/data, /grid/1/data" in messages[0])
+ self.assertTrue("Detected at least one data dir on a mount point, but these are writing to the root partition:\n/grid/0/data\n/grid/1/data" in messages[0])
@patch("resource_management.libraries.functions.mounted_dirs_helper.get_dir_to_mount_from_file")
@patch("resource_management.libraries.functions.file_system.get_mount_point_for_dir")
@@ -193,4 +193,28 @@ class TestAlertDataNodeUnmountedDataDir(RMFTestCase):
[status, messages] = alert.execute(configurations=configs)
self.assertEqual(status, RESULT_STATE_CRITICAL)
self.assertTrue(messages is not None and len(messages) == 1)
- self.assertTrue("Detected data dir(s) that became unmounted and are now writing to the root partition: /grid/1/data ." in messages[0])
\ No newline at end of file
+ self.assertTrue("Detected data dir(s) that became unmounted and are now writing to the root partition:\n/grid/1/data" in messages[0])
+
+
+ @patch("resource_management.libraries.functions.mounted_dirs_helper.get_dir_to_mount_from_file")
+ @patch("resource_management.libraries.functions.file_system.get_mount_point_for_dir")
+ @patch("os.path.exists")
+ @patch("os.path.isdir")
+ def test_file_uri_and_meta_tags(self, is_dir_mock, exists_mock, get_mount_mock, get_data_dir_to_mount_from_file_mock):
+ """
+ Test that the status is OK when the locations include file:// schemes and meta tags.
+ """
+ configs = {
+ "{{hdfs-site/dfs.datanode.data.dir}}":"[SSD]file:///grid/0/data"
+ }
+
+ # Mock calls
+ exists_mock.return_value = True
+ is_dir_mock.return_value = True
+ get_mount_mock.return_value = "/"
+ get_data_dir_to_mount_from_file_mock.return_value = {"/grid/0/data":"/"}
+
+ [status, messages] = alert.execute(configurations = configs)
+ self.assertEqual(status, RESULT_STATE_OK)
+ self.assertTrue(messages is not None and len(messages) == 1)
+ self.assertEqual("The following data dir(s) are valid:\n/grid/0/data", messages[0])
\ No newline at end of file