You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by al...@apache.org on 2015/09/23 23:37:33 UTC
ambari git commit: AMBARI-13194. Alert definition when DataNode data
dirs become unmounted (alejandro)
Repository: ambari
Updated Branches:
refs/heads/trunk b2b88ee89 -> 51620c6e2
AMBARI-13194. Alert definition when DataNode data dirs become unmounted (alejandro)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/51620c6e
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/51620c6e
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/51620c6e
Branch: refs/heads/trunk
Commit: 51620c6e2c7c846df26294d52c83fab9f141d202
Parents: b2b88ee
Author: Alejandro Fernandez <af...@hortonworks.com>
Authored: Wed Sep 23 09:44:29 2015 -0700
Committer: Alejandro Fernandez <af...@hortonworks.com>
Committed: Wed Sep 23 14:37:22 2015 -0700
----------------------------------------------------------------------
.../resource_management/TestDatanodeHelper.py | 4 +-
.../core/providers/system.py | 26 ++-
.../libraries/functions/dfs_datanode_helper.py | 14 +-
.../common-services/HDFS/2.1.0.2.0/alerts.json | 14 +-
.../alerts/alert_datanode_unmounted_data_dir.py | 172 +++++++++++++++
.../test_alert_datanode_unmounted_data_dir.py | 218 +++++++++++++++++++
6 files changed, 430 insertions(+), 18 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/ambari/blob/51620c6e/ambari-agent/src/test/python/resource_management/TestDatanodeHelper.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/test/python/resource_management/TestDatanodeHelper.py b/ambari-agent/src/test/python/resource_management/TestDatanodeHelper.py
index e348cc4..a74cc0b 100644
--- a/ambari-agent/src/test/python/resource_management/TestDatanodeHelper.py
+++ b/ambari-agent/src/test/python/resource_management/TestDatanodeHelper.py
@@ -117,7 +117,7 @@ class TestDatanodeHelper(TestCase):
@patch.object(Logger, "info")
@patch.object(Logger, "error")
- @patch.object(dfs_datanode_helper, "_get_data_dir_to_mount_from_file")
+ @patch.object(dfs_datanode_helper, "get_data_dir_to_mount_from_file")
@patch.object(dfs_datanode_helper, "_write_data_dir_to_mount_in_file")
@patch.object(dfs_datanode_helper, "get_mount_point_for_dir")
@patch.object(os.path, "isdir")
@@ -157,7 +157,7 @@ class TestDatanodeHelper(TestCase):
@patch.object(Logger, "info")
@patch.object(Logger, "error")
- @patch.object(dfs_datanode_helper, "_get_data_dir_to_mount_from_file")
+ @patch.object(dfs_datanode_helper, "get_data_dir_to_mount_from_file")
@patch.object(dfs_datanode_helper, "_write_data_dir_to_mount_in_file")
@patch.object(dfs_datanode_helper, "get_mount_point_for_dir")
@patch.object(os.path, "isdir")
http://git-wip-us.apache.org/repos/asf/ambari/blob/51620c6e/ambari-common/src/main/python/resource_management/core/providers/system.py
----------------------------------------------------------------------
diff --git a/ambari-common/src/main/python/resource_management/core/providers/system.py b/ambari-common/src/main/python/resource_management/core/providers/system.py
index 213adc5..d53779c 100644
--- a/ambari-common/src/main/python/resource_management/core/providers/system.py
+++ b/ambari-common/src/main/python/resource_management/core/providers/system.py
@@ -131,22 +131,23 @@ class FileProvider(Provider):
class DirectoryProvider(Provider):
def action_create(self):
path = self.resource.path
-
+
if not sudo.path_exists(path):
- Logger.info("Creating directory %s" % self.resource)
+ Logger.info("Creating directory %s since it doesn't exist." % self.resource)
# dead links should be followed, else we gonna have failures on trying to create directories on top of them.
if self.resource.follow:
- followed_links = []
+ # Follow symlink until the tail.
+ followed_links = set()
while sudo.path_lexists(path):
if path in followed_links:
raise Fail("Applying %s failed, looped symbolic links found while resolving %s" % (self.resource, path))
- followed_links.append(path)
+ followed_links.add(path)
path = sudo.readlink(path)
if path != self.resource.path:
Logger.info("Following the link {0} to {1} to create the directory".format(self.resource.path, path))
-
+
if self.resource.recursive:
if self.resource.recursive_permission:
DirectoryProvider.makedirs_and_set_permission_recursively(path, self.resource.owner,
@@ -168,17 +169,18 @@ class DirectoryProvider(Provider):
@staticmethod
def makedirs_and_set_permission_recursively(path, owner, group, mode):
- folders=[]
- path,folder=os.path.split(path)
- while folder!="":
+ folders = []
+ path, folder = os.path.split(path)
+ while folder != "":
folders.append(folder)
- path,folder=os.path.split(path)
- if path!="":
+ path, folder = os.path.split(path)
+ if path != "":
folders.append(path)
+
folders.reverse()
- dir_prefix=""
+ dir_prefix = ""
for folder in folders:
- dir_prefix=os.path.join(dir_prefix, folder)
+ dir_prefix = os.path.join(dir_prefix, folder)
if not sudo.path_exists(dir_prefix):
sudo.makedir(dir_prefix, mode or 0755)
_ensure_metadata(dir_prefix, None, None, mode)
http://git-wip-us.apache.org/repos/asf/ambari/blob/51620c6e/ambari-common/src/main/python/resource_management/libraries/functions/dfs_datanode_helper.py
----------------------------------------------------------------------
diff --git a/ambari-common/src/main/python/resource_management/libraries/functions/dfs_datanode_helper.py b/ambari-common/src/main/python/resource_management/libraries/functions/dfs_datanode_helper.py
index a05e162..778d869 100644
--- a/ambari-common/src/main/python/resource_management/libraries/functions/dfs_datanode_helper.py
+++ b/ambari-common/src/main/python/resource_management/libraries/functions/dfs_datanode_helper.py
@@ -51,7 +51,7 @@ def _write_data_dir_to_mount_in_file(params, new_data_dir_to_mount_point):
return True
-def _get_data_dir_to_mount_from_file(params):
+def get_data_dir_to_mount_from_file(params):
"""
:return: Returns a dictionary by parsing the data_dir_mount_file file,
where the key is each DFS data dir, and the value is its last known mount point.
@@ -99,7 +99,7 @@ def handle_dfs_data_dir(func, params, update_cache=True):
"""
# Get the data dirs that Ambari knows about and their last known mount point
- prev_data_dir_to_mount_point = _get_data_dir_to_mount_from_file(params)
+ prev_data_dir_to_mount_point = get_data_dir_to_mount_from_file(params)
# Dictionary from data dir to the mount point that will be written to the history file.
# If a data dir becomes unmounted, we should still keep its original value.
@@ -107,7 +107,15 @@ def handle_dfs_data_dir(func, params, update_cache=True):
data_dir_to_mount_point = prev_data_dir_to_mount_point.copy()
# This should typically be False for customers, but True the first time.
- allowed_to_create_any_dir = params.data_dir_mount_file is None or not os.path.exists(params.data_dir_mount_file)
+ allowed_to_create_any_dir = False
+
+ if params.data_dir_mount_file is None:
+ allowed_to_create_any_dir = True
+ Logger.warning("DataNode is allowed to create any data directory since dfs.datanode.data.dir.mount.file property is null.")
+ else:
+ if not os.path.exists(params.data_dir_mount_file):
+ allowed_to_create_any_dir = True
+ Logger.warning("DataNode is allowed to create any data directory since dfs.datanode.data.dir.mount.file property has file %s and it does not exist." % params.data_dir_mount_file)
valid_data_dirs = [] # data dirs that have been normalized
error_messages = [] # list of error messages to report at the end
http://git-wip-us.apache.org/repos/asf/ambari/blob/51620c6e/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/alerts.json
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/alerts.json b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/alerts.json
index 2fcacc8..2ea9446 100644
--- a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/alerts.json
+++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/alerts.json
@@ -708,7 +708,19 @@
"value": "({1} - {0})/{1} * 100.0"
}
}
- }
+ },
+ {
+ "name": "datanode_unmounted_data_dir",
+ "label": "DataNode Unmounted Data Dir",
+ "description": "This host-level alert is triggered if one of the data directories on a host was previously on a mount point and became unmounted. If the mount history file does not exist, then report an error if a host has one or more mounted data directories as well as one or more unmounted data directories on the root partition. This may indicate that a data directory is writing to the root partition, which is undesirable.",
+ "interval": 2,
+ "scope": "HOST",
+ "enabled": true,
+ "source": {
+ "type": "SCRIPT",
+ "path": "HDFS/2.1.0.2.0/package/alerts/alert_datanode_unmounted_data_dir.py"
+ }
+ }
],
"ZKFC": [
{
http://git-wip-us.apache.org/repos/asf/ambari/blob/51620c6e/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_datanode_unmounted_data_dir.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_datanode_unmounted_data_dir.py b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_datanode_unmounted_data_dir.py
new file mode 100644
index 0000000..2912406
--- /dev/null
+++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_datanode_unmounted_data_dir.py
@@ -0,0 +1,172 @@
+#!/usr/bin/env python
+
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import os
+import logging
+
+from resource_management.libraries.functions import file_system
+from resource_management.libraries.functions import dfs_datanode_helper
+
+RESULT_STATE_OK = 'OK'
+RESULT_STATE_WARNING = 'WARNING'
+RESULT_STATE_CRITICAL = 'CRITICAL'
+RESULT_STATE_UNKNOWN = 'UNKNOWN'
+
+DFS_DATA_DIR = '{{hdfs-site/dfs.datanode.data.dir}}'
+DATA_DIR_MOUNT_FILE = '{{hadoop-env/dfs.datanode.data.dir.mount.file}}'
+
+logger = logging.getLogger()
+
+
+def get_tokens():
+ """
+ Returns a tuple of tokens in the format {{site/property}} that will be used
+ to build the dictionary passed into execute
+ """
+ return (DFS_DATA_DIR, DATA_DIR_MOUNT_FILE)
+
+
+def execute(configurations={}, parameters={}, host_name=None):
+ """
+ Returns a tuple containing the result code and a pre-formatted result label
+
+ Keyword arguments:
+ configurations (dictionary): a mapping of configuration key to value
+ parameters (dictionary): a mapping of script parameter key to value
+ host_name (string): the name of this host where the alert is running
+ """
+ warnings = []
+ errors = []
+
+ if configurations is None:
+ return (RESULT_STATE_UNKNOWN, ['There were no configurations supplied to the script.'])
+
+ # Check required properties
+ if DFS_DATA_DIR not in configurations:
+ return (RESULT_STATE_UNKNOWN, ['{0} is a required parameter for the script'.format(DFS_DATA_DIR)])
+
+ if DATA_DIR_MOUNT_FILE not in configurations:
+ return (RESULT_STATE_UNKNOWN, ['{0} is a required parameter for the script'.format(DATA_DIR_MOUNT_FILE)])
+
+ dfs_data_dir = configurations[DFS_DATA_DIR]
+ data_dir_mount_file = configurations[DATA_DIR_MOUNT_FILE]
+
+ if dfs_data_dir is None:
+ return (RESULT_STATE_UNKNOWN, ['{0} is a required parameter for the script and the value is null'.format(DFS_DATA_DIR)])
+
+ if data_dir_mount_file is None:
+ return (RESULT_STATE_UNKNOWN, ['{0} is a required parameter for the script and the value is null'.format(DATA_DIR_MOUNT_FILE)])
+
+ data_dir_mount_file_exists = True
+ # This follows symlinks and will return False for a broken link (even in the middle of the linked list)
+ if not os.path.exists(data_dir_mount_file):
+ data_dir_mount_file_exists = False
+ warnings.append("File not found, {0} .".format(data_dir_mount_file))
+
+ valid_data_dirs = set() # data dirs that have been normalized
+ data_dirs_not_exist = set() # data dirs that do not exist
+ data_dirs_unknown = set() # data dirs for which could not determine mount
+ data_dirs_on_root = set() # set of data dirs that are on root mount
+ data_dirs_on_mount = set() # set of data dirs that are mounted on a device
+ data_dirs_unmounted = [] # list of data dirs that are known to have become unmounted
+
+ for data_dir in dfs_data_dir.split(","):
+ if data_dir is None or data_dir.strip() == "":
+ continue
+ data_dir = data_dir.strip()
+ valid_data_dirs.add(data_dir)
+
+ # Sort the data dirs, which is needed for deterministic behavior when running the unit tests.
+ valid_data_dirs = sorted(valid_data_dirs)
+ for data_dir in valid_data_dirs:
+ # This follows symlinks and will return False for a broken link (even in the middle of the linked list)
+ if os.path.isdir(data_dir):
+ curr_mount_point = file_system.get_mount_point_for_dir(data_dir)
+ curr_mount_point = curr_mount_point.strip() if curr_mount_point else curr_mount_point
+
+ if curr_mount_point is not None and curr_mount_point != "":
+ if curr_mount_point == "/":
+ data_dirs_on_root.add(data_dir)
+ else:
+ data_dirs_on_mount.add(data_dir)
+ else:
+ data_dirs_unknown.add(data_dir)
+ else:
+ data_dirs_not_exist.add(data_dir)
+
+ # To keep the messages consistent for all hosts, sort the sets into lists
+ valid_data_dirs = sorted(valid_data_dirs)
+ data_dirs_not_exist = sorted(data_dirs_not_exist)
+ data_dirs_unknown = sorted(data_dirs_unknown)
+ data_dirs_on_root = sorted(data_dirs_on_root)
+
+ if data_dirs_not_exist:
+ errors.append("Data dir(s) not found: {0} .".format(", ".join(data_dirs_not_exist)))
+
+ if data_dirs_unknown:
+ errors.append("Cannot find mount point for data dir(s): {0} .".format(", ".join(data_dirs_unknown)))
+
+ if data_dir_mount_file_exists:
+ # Make a precise determination on which data dirs have become unmounted.
+
+ class Params:
+ def __init__(self, mount_file):
+ self.data_dir_mount_file = mount_file
+ params = Params(data_dir_mount_file)
+
+ # This dictionary contains the expected values of <data_dir, mount_point>
+ # Hence, we only need to analyze the data dirs that are currently on the root partition
+ # and report an error if they were expected to be on a mount.
+ #
+ # If one of the data dirs is not present in the file, it means that DataNode has not been restarted after
+ # the configuration was changed on the server, so we cannot make any assertions about it.
+ expected_data_dir_to_mount = dfs_datanode_helper.get_data_dir_to_mount_from_file(params)
+ for data_dir in data_dirs_on_root:
+ if data_dir in expected_data_dir_to_mount and expected_data_dir_to_mount[data_dir] != "/":
+ data_dirs_unmounted.append(data_dir)
+
+ if len(data_dirs_unmounted) > 0:
+ errors.append("Detected data dir(s) that became unmounted and are now writing to the root partition: {0} .".format(", ".join(data_dirs_unmounted)))
+ else:
+ # Couldn't make guarantees about the expected value of mount points, so rely on this strategy that is likely to work.
+ # It will report false positives (aka false alarms) if the user actually intended to have
+ # 1+ data dirs on a mount and 1+ data dirs on the root partition.
+ if len(data_dirs_on_mount) >= 1 and len(data_dirs_on_root) >= 1:
+ errors.append("Detected at least one data dir on a mount point, but these are writing to the root partition: {0} .".format(", ".join(data_dirs_on_root)))
+
+ # Determine the status based on warnings and errors.
+ if len(errors) == 0:
+ status = RESULT_STATE_OK
+ messages = []
+
+ # Check for warnings
+ if len(warnings) > 0:
+ status = RESULT_STATE_WARNING
+ messages += warnings
+
+ if len(valid_data_dirs) > 0:
+ messages.append("Data dir(s) are fine, {0} .".format(", ".join(valid_data_dirs)))
+ else:
+ messages.append("No data dirs to analyze.")
+
+ return (status, ["\n".join(messages)])
+ else:
+ # Report errors
+ return (RESULT_STATE_CRITICAL, ["\n".join(errors)])
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/ambari/blob/51620c6e/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_alert_datanode_unmounted_data_dir.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_alert_datanode_unmounted_data_dir.py b/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_alert_datanode_unmounted_data_dir.py
new file mode 100644
index 0000000..4406231
--- /dev/null
+++ b/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_alert_datanode_unmounted_data_dir.py
@@ -0,0 +1,218 @@
+#!/usr/bin/env python
+
+'''
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+'''
+
+# System imports
+import os
+import sys
+import logging
+
+from mock.mock import patch
+
+# Local imports
+from stacks.utils.RMFTestCase import *
+import resource_management.libraries.functions.file_system
+
+COMMON_SERVICES_ALERTS_DIR = "HDFS/2.1.0.2.0/package/alerts"
+DATA_DIR_MOUNT_HIST_FILE_PATH = "/etc/hadoop/conf/dfs_data_dir_mount.hist"
+
+file_path = os.path.dirname(os.path.abspath(__file__))
+file_path = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(file_path)))))
+file_path = os.path.join(file_path, "main", "resources", "common-services", COMMON_SERVICES_ALERTS_DIR)
+
+RESULT_STATE_OK = "OK"
+RESULT_STATE_WARNING = "WARNING"
+RESULT_STATE_CRITICAL = "CRITICAL"
+RESULT_STATE_UNKNOWN = "UNKNOWN"
+
+class TestAlertDataNodeUnmountedDataDir(RMFTestCase):
+
+ def setUp(self):
+ """
+ Import the class under test.
+ Because the class is present in a different folder, append its dir to the system path.
+ Also, shorten the import name and make it a global so the test functions can access it.
+ :return:
+ """
+ self.logger = logging.getLogger()
+ sys.path.append(file_path)
+ global alert
+ import alert_datanode_unmounted_data_dir as alert
+
+ def test_missing_configs(self):
+ """
+ Check that the status is UNKNOWN when configs are missing.
+ """
+ configs = {}
+ [status, messages] = alert.execute(configurations=configs)
+ self.assertEqual(status, RESULT_STATE_UNKNOWN)
+ self.assertTrue(messages is not None and len(messages) == 1)
+ self.assertTrue('is a required parameter for the script' in messages[0])
+
+ configs = {
+ "{{hdfs-site/dfs.datanode.data.dir}}": ""
+ }
+ [status, messages] = alert.execute(configurations=configs)
+ self.assertEqual(status, RESULT_STATE_UNKNOWN)
+ self.assertTrue(messages is not None and len(messages) == 1)
+ self.assertTrue('is a required parameter for the script' in messages[0])
+
+ configs = {
+ "{{hadoop-env/dfs.datanode.data.dir.mount.file}}": DATA_DIR_MOUNT_HIST_FILE_PATH
+ }
+ [status, messages] = alert.execute(configurations=configs)
+ self.assertEqual(status, RESULT_STATE_UNKNOWN)
+ self.assertTrue(messages is not None and len(messages) == 1)
+ self.assertTrue('is a required parameter for the script' in messages[0])
+
+ configs = {
+ "{{hdfs-site/dfs.datanode.data.dir}}": "",
+ "{{hadoop-env/dfs.datanode.data.dir.mount.file}}": DATA_DIR_MOUNT_HIST_FILE_PATH
+ }
+ [status, messages] = alert.execute(configurations=configs)
+ self.assertNotEqual(status, RESULT_STATE_UNKNOWN)
+
+ @patch("resource_management.libraries.functions.file_system.get_mount_point_for_dir")
+ @patch("os.path.exists")
+ @patch("os.path.isdir")
+ def test_mount_history_file_does_not_exist(self, is_dir_mock, exists_mock, get_mount_mock):
+ """
+ Test that the status is WARNING when the data dirs are mounted on root, but the mount history file
+ does not exist.
+ """
+ configs = {
+ "{{hdfs-site/dfs.datanode.data.dir}}": "/grid/0/data",
+ "{{hadoop-env/dfs.datanode.data.dir.mount.file}}": DATA_DIR_MOUNT_HIST_FILE_PATH
+ }
+
+ # Mock calls
+ exists_mock.return_value = False
+ is_dir_mock.return_value = True
+ get_mount_mock.return_value = "/"
+
+ [status, messages] = alert.execute(configurations=configs)
+ self.assertEqual(status, RESULT_STATE_WARNING)
+ self.assertTrue(messages is not None and len(messages) == 1)
+ self.assertTrue("File not found, {0}".format(DATA_DIR_MOUNT_HIST_FILE_PATH) in messages[0])
+
+ @patch("resource_management.libraries.functions.dfs_datanode_helper.get_data_dir_to_mount_from_file")
+ @patch("resource_management.libraries.functions.file_system.get_mount_point_for_dir")
+ @patch("os.path.exists")
+ @patch("os.path.isdir")
+ def test_all_dirs_on_root(self, is_dir_mock, exists_mock, get_mount_mock, get_data_dir_to_mount_from_file_mock):
+ """
+ Test that the status is OK when all drives are mounted on the root partition
+ and this coincides with the expected values.
+ """
+ configs = {
+ "{{hdfs-site/dfs.datanode.data.dir}}": "/grid/0/data,/grid/1/data,/grid/2/data",
+ "{{hadoop-env/dfs.datanode.data.dir.mount.file}}": DATA_DIR_MOUNT_HIST_FILE_PATH
+ }
+
+ # Mock calls
+ exists_mock.return_value = True
+ is_dir_mock.return_value = True
+ get_mount_mock.return_value = "/"
+ get_data_dir_to_mount_from_file_mock.return_value = {"/grid/0/data": "/",
+ "/grid/1/data": "/",
+ "/grid/2/data": "/"}
+
+ [status, messages] = alert.execute(configurations=configs)
+ self.assertEqual(status, RESULT_STATE_OK)
+ self.assertTrue(messages is not None and len(messages) == 1)
+ self.assertTrue("Data dir(s) are fine" in messages[0])
+
+ @patch("resource_management.libraries.functions.dfs_datanode_helper.get_data_dir_to_mount_from_file")
+ @patch("resource_management.libraries.functions.file_system.get_mount_point_for_dir")
+ @patch("os.path.exists")
+ @patch("os.path.isdir")
+ def test_match_expected(self, is_dir_mock, exists_mock, get_mount_mock, get_data_dir_to_mount_from_file_mock):
+ """
+ Test that the status is OK when the mount points match the expected values.
+ """
+ configs = {
+ "{{hdfs-site/dfs.datanode.data.dir}}": "/grid/0/data,/grid/1/data,/grid/2/data",
+ "{{hadoop-env/dfs.datanode.data.dir.mount.file}}": DATA_DIR_MOUNT_HIST_FILE_PATH
+ }
+
+ # Mock calls
+ exists_mock.return_value = True
+ is_dir_mock.return_value = True
+ get_mount_mock.side_effect = ["/device1", "/device2", "/"]
+ get_data_dir_to_mount_from_file_mock.return_value = {"/grid/0/data": "/device1",
+ "/grid/1/data": "/device2",
+ "/grid/2/data": "/"}
+
+ [status, messages] = alert.execute(configurations=configs)
+ self.assertEqual(status, RESULT_STATE_OK)
+ self.assertTrue(messages is not None and len(messages) == 1)
+ self.assertTrue("Data dir(s) are fine" in messages[0])
+
+ @patch("resource_management.libraries.functions.dfs_datanode_helper.get_data_dir_to_mount_from_file")
+ @patch("resource_management.libraries.functions.file_system.get_mount_point_for_dir")
+ @patch("os.path.exists")
+ @patch("os.path.isdir")
+ def test_critical_one_root_one_mounted(self, is_dir_mock, exists_mock, get_mount_mock, get_data_dir_to_mount_from_file_mock):
+ """
+ Test that the status is CRITICAL when the history file is missing
+ and at least one data dir is on a mount and at least one data dir is on the root partition.
+ """
+ configs = {
+ "{{hdfs-site/dfs.datanode.data.dir}}": "/grid/0/data,/grid/1/data,/grid/2/data,/grid/3/data",
+ "{{hadoop-env/dfs.datanode.data.dir.mount.file}}": DATA_DIR_MOUNT_HIST_FILE_PATH
+ }
+
+ # Mock calls
+ exists_mock.return_value = False
+ is_dir_mock.return_value = True
+ # The first 2 data dirs will report an error.
+ get_mount_mock.side_effect = ["/", "/", "/device1", "/device2"]
+
+ [status, messages] = alert.execute(configurations=configs)
+ self.assertEqual(status, RESULT_STATE_CRITICAL)
+ self.assertTrue(messages is not None and len(messages) == 1)
+ self.assertTrue("Detected at least one data dir on a mount point, but these are writing to the root partition: /grid/0/data, /grid/1/data" in messages[0])
+
+ @patch("resource_management.libraries.functions.dfs_datanode_helper.get_data_dir_to_mount_from_file")
+ @patch("resource_management.libraries.functions.file_system.get_mount_point_for_dir")
+ @patch("os.path.exists")
+ @patch("os.path.isdir")
+ def test_critical_unmounted(self, is_dir_mock, exists_mock, get_mount_mock, get_data_dir_to_mount_from_file_mock):
+ """
+ Test that the status is CRITICAL when the history file exists and one of the dirs
+ became unmounted.
+ """
+ configs = {
+ "{{hdfs-site/dfs.datanode.data.dir}}": "/grid/0/data,/grid/1/data,/grid/2/data,/grid/3/data",
+ "{{hadoop-env/dfs.datanode.data.dir.mount.file}}": DATA_DIR_MOUNT_HIST_FILE_PATH
+ }
+
+ # Mock calls
+ exists_mock.return_value = True
+ is_dir_mock.return_value = True
+ get_mount_mock.side_effect = ["/", "/", "/device3", "/device4"]
+ get_data_dir_to_mount_from_file_mock.return_value = {"/grid/0/data": "/", # remained on /
+ "/grid/1/data": "/device2", # became unmounted
+ "/grid/2/data": "/", # became mounted
+ "/grid/3/data": "/device4"} # remained mounted
+
+ [status, messages] = alert.execute(configurations=configs)
+ self.assertEqual(status, RESULT_STATE_CRITICAL)
+ self.assertTrue(messages is not None and len(messages) == 1)
+ self.assertTrue("Detected data dir(s) that became unmounted and are now writing to the root partition: /grid/1/data ." in messages[0])
\ No newline at end of file