You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2019/02/26 23:54:26 UTC
[impala] 04/04: IMPALA-8188: Fix DiskInfo::GetDeviceNames() for
NVME disks
This is an automated email from the ASF dual-hosted git repository.
joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 7ba6aa64a2455df30524ad42b10d7fd9227efacb
Author: Joe McDonnell <jo...@cloudera.com>
AuthorDate: Mon Feb 11 17:07:37 2019 -0800
IMPALA-8188: Fix DiskInfo::GetDeviceNames() for NVME disks
DiskInfo::GetDeviceNames() assumes a device name pattern where
the last digits can be removed to get the main disk device
(i.e. sda3 -> sda). NVME devices have a naming pattern of
nvme{device_id}n{namespace_id}p{partition_id} where the main
disk device removes the p{partition_id} part. This difference
in naming prevents DiskInfo::GetDeviceNames() from looking
up the associated information in /sys/block to find whether
it is a rotational device (and thus use an appropriate
number of DiskIo threads).
This adds a condition to detect an NVME device name and
handle it correctly.
Testing:
- Added unit test for the function used for detection
- Tested this on an AWS m5-4xlarge, which is a Nitro instance
type that exposes the main disk as an NVME device.
Change-Id: I4d5bd93b4b09682a8c8248e7aa123d23d27cfeb4
Reviewed-on: http://gerrit.cloudera.org:8080/12557
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Reviewed-by: Philip Zeyliger <ph...@cloudera.com>
---
be/src/util/disk-info.cc | 30 +++++++++++++++++++++++++++--
be/src/util/disk-info.h | 7 +++++++
be/src/util/sys-info-test.cc | 45 +++++++++++++++++++++++++++++++++++++++++++-
3 files changed, 79 insertions(+), 3 deletions(-)
diff --git a/be/src/util/disk-info.cc b/be/src/util/disk-info.cc
index 0a383c8..25d9f02 100644
--- a/be/src/util/disk-info.cc
+++ b/be/src/util/disk-info.cc
@@ -17,6 +17,7 @@
#include "util/disk-info.h"
+#include <regex>
#ifdef __APPLE__
#include <sys/mount.h>
#else
@@ -50,6 +51,22 @@ vector<DiskInfo::Disk> DiskInfo::disks_;
map<dev_t, int> DiskInfo::device_id_to_disk_id_;
map<string, int> DiskInfo::disk_name_to_disk_id_;
+bool DiskInfo::TryNVMETrim(const std::string& name_in, std::string* basename_out) {
+ // NVME drives do not follow the typical device naming pattern. The pattern for NVME
+ // drives is nvme{device_id}n{namespace_id}p{partition_id}. The appropriate thing
+ // to do for this pattern is to trim the "p{partition_id}" part.
+ std::regex nvme_regex = std::regex("(nvme[0-9]+n[0-9]+)(p[0-9]+)*");
+ std::smatch nvme_match_result;
+ if (std::regex_match(name_in, nvme_match_result, nvme_regex)) {
+ DCHECK_GE(nvme_match_result.size(), 2);
+ // Match 0 contains the whole string.
+ // Match 1 contains the base nvme device without the partition.
+ *basename_out = nvme_match_result[1];
+ return true;
+ }
+ return false;
+}
+
// Parses /proc/partitions to get the number of disks. A bit of looking around
// seems to indicate this as the best way to do this.
// TODO: is there not something better than this?
@@ -70,8 +87,17 @@ void DiskInfo::GetDeviceNames() {
string name = fields[3];
if (name == "name") continue;
- // Remove the partition# from the name. e.g. sda2 --> sda
- trim_right_if(name, is_any_of("0123456789"));
+ // NVME devices have a special format. Try to detect that, falling back to the normal
+ // method if this is not an NVME device.
+ std::string nvme_basename;
+ if (TryNVMETrim(name, &nvme_basename)) {
+ // This is an NVME device, use the returned basename
+ name = nvme_basename;
+ } else {
+ // Does not follow the NVME pattern, so use the logic for a normal disk device
+ // Remove the partition# from the name. e.g. sda2 --> sda
+ trim_right_if(name, is_any_of("0123456789"));
+ }
// Create a mapping of all device ids (one per partition) to the disk id.
int major_dev_id = atoi(fields[0].c_str());
diff --git a/be/src/util/disk-info.h b/be/src/util/disk-info.h
index edfea21..cb43b15 100644
--- a/be/src/util/disk-info.h
+++ b/be/src/util/disk-info.h
@@ -67,6 +67,8 @@ class DiskInfo {
static std::string DebugString();
private:
+ friend class DiskInfoTest;
+
static bool initialized_;
struct Disk {
@@ -93,6 +95,11 @@ class DiskInfo {
static std::map<std::string, int> disk_name_to_disk_id_;
static void GetDeviceNames();
+
+ /// See if 'name_in' is an NVME device. If it is, set 'basename_out' to the base
+ /// NVME device name (i.e. nvme0n1p1 -> nvme0n1) and return true. Otherwise,
+ /// return false and leave 'basename_out' unmodified.
+ static bool TryNVMETrim(const std::string& name_in, std::string* basename_out);
};
}
#endif
diff --git a/be/src/util/sys-info-test.cc b/be/src/util/sys-info-test.cc
index 7fe9da1..132c330 100644
--- a/be/src/util/sys-info-test.cc
+++ b/be/src/util/sys-info-test.cc
@@ -56,7 +56,22 @@ TEST(CpuInfoTest, InvalidSchedGetCpuValue) {
}
-TEST(DiskInfoTest, Basic) {
+class DiskInfoTest : public ::testing::Test {
+ protected:
+ void TestTryNVMETrimPositive(const string& name, const string& expected_basename) {
+ string nvme_basename;
+ ASSERT_TRUE(DiskInfo::TryNVMETrim(name, &nvme_basename));
+ ASSERT_EQ(nvme_basename, expected_basename);
+ }
+
+ void TestTryNVMETrimNegative(const string& name) {
+ string nvme_basename;
+ ASSERT_FALSE(DiskInfo::TryNVMETrim(name, &nvme_basename));
+ ASSERT_EQ(nvme_basename, "");
+ }
+};
+
+TEST_F(DiskInfoTest, Basic) {
cout << DiskInfo::DebugString();
cout << "Device name for disk 0: " << DiskInfo::device_name(0) << endl;
@@ -69,5 +84,33 @@ TEST(DiskInfoTest, Basic) {
cout << "No device name for /home";
}
}
+
+TEST_F(DiskInfoTest, NVMEDetection) {
+ // Positive case 1: NVME device with a partition specified
+ TestTryNVMETrimPositive("nvme0n1p2", "nvme0n1");
+
+ // Positive case 2: NVME device without partition specified
+ TestTryNVMETrimPositive("nvme0n1", "nvme0n1");
+
+ // Positive case 3: NVME multi-digit device id, namespace id, partition id
+ TestTryNVMETrimPositive("nvme15n13p24", "nvme15n13");
+
+ // Negative case 1: disk drive
+ TestTryNVMETrimNegative("sda3");
+ TestTryNVMETrimNegative("sda");
+
+ // Negative case 2: malformed NVME, missing namespace
+ TestTryNVMETrimNegative("nvme0p1");
+ TestTryNVMETrimNegative("nvme0");
+
+ // Negative case 3: malformed NVME, missing device id
+ TestTryNVMETrimNegative("nvmen1p3");
+ TestTryNVMETrimNegative("nvmen1");
+
+ // Negative case 4: extra garbage before / after
+ TestTryNVMETrimNegative("sdanvme0n1");
+ TestTryNVMETrimNegative("nvme0n1p0blah");
+}
+
}