You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2019/02/26 23:54:26 UTC

[impala] 04/04: IMPALA-8188: Fix DiskInfo::GetDeviceNames() for NVME disks

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 7ba6aa64a2455df30524ad42b10d7fd9227efacb
Author: Joe McDonnell <jo...@cloudera.com>
AuthorDate: Mon Feb 11 17:07:37 2019 -0800

    IMPALA-8188: Fix DiskInfo::GetDeviceNames() for NVME disks
    
    DiskInfo::GetDeviceNames() assumes a device name pattern where
    the last digits can be removed to get the main disk device
    (i.e. sda3 -> sda). NVME devices have a naming pattern of
    nvme{device_id}n{namespace_id}p{partition_id} where the main
    disk device removes the p{partition_id} part. This difference
    in naming prevents DiskInfo::GetDeviceNames() from looking
    up the associated information in /sys/block to find whether
    it is a rotational device (and thus use an appropriate
    number of DiskIo threads).
    
    This adds a condition to detect an NVME device name and
    handle it correctly.
    
    Testing:
     - Added unit test for the function used for detection
     - Tested this on an AWS m5-4xlarge, which is a Nitro instance
       type that exposes the main disk as an NVME device.
    
    Change-Id: I4d5bd93b4b09682a8c8248e7aa123d23d27cfeb4
    Reviewed-on: http://gerrit.cloudera.org:8080/12557
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Philip Zeyliger <ph...@cloudera.com>
---
 be/src/util/disk-info.cc     | 30 +++++++++++++++++++++++++++--
 be/src/util/disk-info.h      |  7 +++++++
 be/src/util/sys-info-test.cc | 45 +++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 79 insertions(+), 3 deletions(-)

diff --git a/be/src/util/disk-info.cc b/be/src/util/disk-info.cc
index 0a383c8..25d9f02 100644
--- a/be/src/util/disk-info.cc
+++ b/be/src/util/disk-info.cc
@@ -17,6 +17,7 @@
 
 #include "util/disk-info.h"
 
+#include <regex>
 #ifdef __APPLE__
 #include <sys/mount.h>
 #else
@@ -50,6 +51,22 @@ vector<DiskInfo::Disk> DiskInfo::disks_;
 map<dev_t, int> DiskInfo::device_id_to_disk_id_;
 map<string, int> DiskInfo::disk_name_to_disk_id_;
 
+bool DiskInfo::TryNVMETrim(const std::string& name_in, std::string* basename_out) {
+  // NVME drives do not follow the typical device naming pattern. The pattern for NVME
+  // drives is nvme{device_id}n{namespace_id}p{partition_id}. The appropriate thing
+  // to do for this pattern is to trim the "p{partition_id}" part.
+  std::regex nvme_regex = std::regex("(nvme[0-9]+n[0-9]+)(p[0-9]+)*");
+  std::smatch nvme_match_result;
+  if (std::regex_match(name_in, nvme_match_result, nvme_regex)) {
+    DCHECK_GE(nvme_match_result.size(), 2);
+    // Match 0 contains the whole string.
+    // Match 1 contains the base nvme device without the partition.
+    *basename_out = nvme_match_result[1];
+    return true;
+  }
+  return false;
+}
+
 // Parses /proc/partitions to get the number of disks.  A bit of looking around
 // seems to indicate this as the best way to do this.
 // TODO: is there not something better than this?
@@ -70,8 +87,17 @@ void DiskInfo::GetDeviceNames() {
     string name = fields[3];
     if (name == "name") continue;
 
-    // Remove the partition# from the name.  e.g. sda2 --> sda
-    trim_right_if(name, is_any_of("0123456789"));
+    // NVME devices have a special format. Try to detect that, falling back to the normal
+    // method if this is not an NVME device.
+    std::string nvme_basename;
+    if (TryNVMETrim(name, &nvme_basename)) {
+      // This is an NVME device, use the returned basename
+      name = nvme_basename;
+    } else {
+      // Does not follow the NVME pattern, so use the logic for a normal disk device
+      // Remove the partition# from the name.  e.g. sda2 --> sda
+      trim_right_if(name, is_any_of("0123456789"));
+    }
 
     // Create a mapping of all device ids (one per partition) to the disk id.
     int major_dev_id = atoi(fields[0].c_str());
diff --git a/be/src/util/disk-info.h b/be/src/util/disk-info.h
index edfea21..cb43b15 100644
--- a/be/src/util/disk-info.h
+++ b/be/src/util/disk-info.h
@@ -67,6 +67,8 @@ class DiskInfo {
   static std::string DebugString();
 
  private:
+  friend class DiskInfoTest;
+
   static bool initialized_;
 
   struct Disk {
@@ -93,6 +95,11 @@ class DiskInfo {
   static std::map<std::string, int> disk_name_to_disk_id_;
 
   static void GetDeviceNames();
+
+  /// See if 'name_in' is an NVME device. If it is, set 'basename_out' to the base
+  /// NVME device name (i.e. nvme0n1p1 -> nvme0n1) and return true. Otherwise,
+  /// return false and leave 'basename_out' unmodified.
+  static bool TryNVMETrim(const std::string& name_in, std::string* basename_out);
 };
 }
 #endif
diff --git a/be/src/util/sys-info-test.cc b/be/src/util/sys-info-test.cc
index 7fe9da1..132c330 100644
--- a/be/src/util/sys-info-test.cc
+++ b/be/src/util/sys-info-test.cc
@@ -56,7 +56,22 @@ TEST(CpuInfoTest, InvalidSchedGetCpuValue) {
 
 }
 
-TEST(DiskInfoTest, Basic) {
+class DiskInfoTest : public ::testing::Test {
+ protected:
+  void TestTryNVMETrimPositive(const string& name, const string& expected_basename) {
+    string nvme_basename;
+    ASSERT_TRUE(DiskInfo::TryNVMETrim(name, &nvme_basename));
+    ASSERT_EQ(nvme_basename, expected_basename);
+  }
+
+  void TestTryNVMETrimNegative(const string& name) {
+    string nvme_basename;
+    ASSERT_FALSE(DiskInfo::TryNVMETrim(name, &nvme_basename));
+    ASSERT_EQ(nvme_basename, "");
+  }
+};
+
+TEST_F(DiskInfoTest, Basic) {
   cout << DiskInfo::DebugString();
   cout << "Device name for disk 0: " << DiskInfo::device_name(0) << endl;
 
@@ -69,5 +84,33 @@ TEST(DiskInfoTest, Basic) {
     cout << "No device name for /home";
   }
 }
+
+TEST_F(DiskInfoTest, NVMEDetection) {
+  // Positive case 1: NVME device with a partition specified
+  TestTryNVMETrimPositive("nvme0n1p2", "nvme0n1");
+
+  // Positive case 2: NVME device without partition specified
+  TestTryNVMETrimPositive("nvme0n1", "nvme0n1");
+
+  // Positive case 3: NVME multi-digit device id, namespace id, partition id
+  TestTryNVMETrimPositive("nvme15n13p24", "nvme15n13");
+
+  // Negative case 1: disk drive
+  TestTryNVMETrimNegative("sda3");
+  TestTryNVMETrimNegative("sda");
+
+  // Negative case 2: malformed NVME, missing namespace
+  TestTryNVMETrimNegative("nvme0p1");
+  TestTryNVMETrimNegative("nvme0");
+
+  // Negative case 3: malformed NVME, missing device id
+  TestTryNVMETrimNegative("nvmen1p3");
+  TestTryNVMETrimNegative("nvmen1");
+
+  // Negative case 4: extra garbage before / after
+  TestTryNVMETrimNegative("sdanvme0n1");
+  TestTryNVMETrimNegative("nvme0n1p0blah");
+}
+
 }