You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by so...@apache.org on 2021/04/26 10:00:35 UTC

[hadoop] branch trunk updated: HDFS-15621. Datanode DirectoryScanner uses excessive memory (#2849). Contributed by Stephen O'Donnell

This is an automated email from the ASF dual-hosted git repository.

sodonnell pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/trunk by this push:
     new 605ed85  HDFS-15621. Datanode DirectoryScanner uses excessive memory (#2849). Contributed by Stephen O'Donnell
605ed85 is described below

commit 605ed85c291a6250b077da32a49dbb35f3b78bf7
Author: Stephen O'Donnell <st...@gmail.com>
AuthorDate: Mon Apr 26 11:00:23 2021 +0100

    HDFS-15621. Datanode DirectoryScanner uses excessive memory (#2849). Contributed by Stephen O'Donnell
---
 .../hdfs/server/datanode/DirectoryScanner.java     |   2 +-
 .../server/datanode/fsdataset/FsVolumeSpi.java     | 118 ++++++++++-----------
 .../datanode/fsdataset/impl/FsVolumeImpl.java      |   5 +-
 .../hdfs/server/datanode/TestDirectoryScanner.java |  37 +++----
 .../datanode/fsdataset/impl/TestFsDatasetImpl.java |   4 +-
 5 files changed, 77 insertions(+), 89 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java
index 63865f6..a3bceec 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java
@@ -582,7 +582,7 @@ public class DirectoryScanner implements Runnable {
       long blockId, FsVolumeSpi vol) {
     statsRecord.missingBlockFile++;
     statsRecord.missingMetaFile++;
-    diffRecord.add(new ScanInfo(blockId, null, null, vol));
+    diffRecord.add(new ScanInfo(blockId, null, null, null, vol));
   }
 
   /**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsVolumeSpi.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsVolumeSpi.java
index 68d1a15..8ae2043 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsVolumeSpi.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsVolumeSpi.java
@@ -227,27 +227,27 @@ public interface FsVolumeSpi
    */
   public static class ScanInfo implements Comparable<ScanInfo> {
     private final long blockId;
-
     /**
-     * The block file path, relative to the volume's base directory.
-     * If there was no block file found, this may be null. If 'vol'
-     * is null, then this is the full path of the block file.
+     * The full path to the folder containing the block / meta files.
      */
-    private final String blockSuffix;
-
+    private final File basePath;
     /**
-     * The suffix of the meta file path relative to the block file.
-     * If blockSuffix is null, then this will be the entire path relative
-     * to the volume base directory, or an absolute path if vol is also
-     * null.
+     * The block file name, with no path
      */
-    private final String metaSuffix;
+    private final String blockFile;
+    /**
+     * Holds the meta file name, with no path, only if blockFile is null.
+     * If blockFile is not null, the meta file will be named identically to
+     * the blockFile, but with a suffix like "_1234.meta". If the blockFile
+     * is present, we store only the meta file suffix.
+     */
+    private final String metaFile;
 
     private final FsVolumeSpi volume;
 
     private final FileRegion fileRegion;
     /**
-     * Get the file's length in async block scan
+     * Get the file's length in async block scan.
      */
     private final long blockLength;
 
@@ -258,34 +258,18 @@ public interface FsVolumeSpi
         Matcher.quoteReplacement(File.separator);
 
     /**
-     * Get the most condensed version of the path.
-     *
-     * For example, the condensed version of /foo//bar is /foo/bar
-     * Unlike {@link File#getCanonicalPath()}, this will never perform I/O
-     * on the filesystem.
-     *
-     * @param path the path to condense
-     * @return the condensed path
-     */
-    private static String getCondensedPath(String path) {
-      return CONDENSED_PATH_REGEX.matcher(path).
-          replaceAll(QUOTED_FILE_SEPARATOR);
-    }
-
-    /**
      * Get a path suffix.
      *
-     * @param f            The file to get the suffix for.
+     * @param f            The string to get the suffix for.
      * @param prefix       The prefix we're stripping off.
      *
-     * @return             A suffix such that prefix + suffix = path to f
+     * @return             A suffix such that prefix + suffix = f
      */
-    private static String getSuffix(File f, String prefix) {
-      String fullPath = getCondensedPath(f.getAbsolutePath());
-      if (fullPath.startsWith(prefix)) {
-        return fullPath.substring(prefix.length());
+    private static String getSuffix(String f, String prefix) {
+      if (f.startsWith(prefix)) {
+        return f.substring(prefix.length());
       }
-      throw new RuntimeException(prefix + " is not a prefix of " + fullPath);
+      throw new RuntimeException(prefix + " is not a prefix of " + f);
     }
 
     /**
@@ -293,27 +277,27 @@ public interface FsVolumeSpi
      * the block data and meta-data files.
      *
      * @param blockId the block ID
-     * @param blockFile the path to the block data file
-     * @param metaFile the path to the block meta-data file
+     * @param basePath The full path to the directory the block is stored in
+     * @param blockFile The block filename, with no path
+     * @param metaFile The meta filename, with no path. If blockFile is not null
+     *                 then the metaFile and blockFile should have the same
+     *                 prefix, with the meta file having a suffix like
+     *                 "_1234.meta". To save memory, if the blockFile is present
+     *                 we store only the meta file suffix in the object
      * @param vol the volume that contains the block
      */
-    public ScanInfo(long blockId, File blockFile, File metaFile,
-        FsVolumeSpi vol) {
+    public ScanInfo(long blockId, File basePath, String blockFile,
+        String metaFile, FsVolumeSpi vol) {
       this.blockId = blockId;
-      String condensedVolPath =
-          (vol == null || vol.getBaseURI() == null) ? null :
-              getCondensedPath(new File(vol.getBaseURI()).getAbsolutePath());
-      this.blockSuffix = blockFile == null ? null :
-              getSuffix(blockFile, condensedVolPath);
-      this.blockLength = (blockFile != null) ? blockFile.length() : 0;
-      if (metaFile == null) {
-        this.metaSuffix = null;
-      } else if (blockFile == null) {
-        this.metaSuffix = getSuffix(metaFile, condensedVolPath);
+      this.basePath = basePath;
+      this.blockFile = blockFile;
+      if (blockFile != null && metaFile != null) {
+        this.metaFile = getSuffix(metaFile, blockFile);
       } else {
-        this.metaSuffix = getSuffix(metaFile,
-            condensedVolPath + blockSuffix);
+        this.metaFile = metaFile;
       }
+      this.blockLength = (blockFile != null) ?
+          new File(basePath, blockFile).length() : 0;
       this.volume = vol;
       this.fileRegion = null;
     }
@@ -333,8 +317,9 @@ public interface FsVolumeSpi
       this.blockLength = length;
       this.volume = vol;
       this.fileRegion = fileRegion;
-      this.blockSuffix = null;
-      this.metaSuffix = null;
+      this.basePath = null;
+      this.blockFile = null;
+      this.metaFile = null;
     }
 
     /**
@@ -343,8 +328,8 @@ public interface FsVolumeSpi
      * @return the block data file
      */
     public File getBlockFile() {
-      return (blockSuffix == null) ? null :
-        new File(new File(volume.getBaseURI()).getAbsolutePath(), blockSuffix);
+      return (blockFile == null) ? null :
+          new File(basePath.getAbsolutePath(), blockFile);
     }
 
     /**
@@ -363,15 +348,10 @@ public interface FsVolumeSpi
      * @return the block meta data file
      */
     public File getMetaFile() {
-      if (metaSuffix == null) {
+      if (metaFile == null) {
         return null;
       }
-      String fileSuffix = metaSuffix;
-      if (blockSuffix != null) {
-        fileSuffix = blockSuffix + metaSuffix;
-      }
-      return new File(new File(volume.getBaseURI()).getAbsolutePath(),
-          fileSuffix);
+      return new File(basePath.getAbsolutePath(), fullMetaFile());
     }
 
     /**
@@ -414,14 +394,24 @@ public interface FsVolumeSpi
     }
 
     public long getGenStamp() {
-      return metaSuffix != null ? Block.getGenerationStamp(
-          getMetaFile().getName()) :
-            HdfsConstants.GRANDFATHER_GENERATION_STAMP;
+      return metaFile != null ? Block.getGenerationStamp(fullMetaFile())
+          : HdfsConstants.GRANDFATHER_GENERATION_STAMP;
     }
 
     public FileRegion getFileRegion() {
       return fileRegion;
     }
+
+    private String fullMetaFile() {
+      if (metaFile == null) {
+        return null;
+      }
+      if (blockFile == null) {
+        return metaFile;
+      } else {
+        return blockFile + metaFile;
+      }
+    }
   }
 
   /**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java
index 6681f6f..1dda858 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java
@@ -1451,7 +1451,7 @@ public class FsVolumeImpl implements FsVolumeSpi {
           long blockId = Block.getBlockId(file.getName());
           verifyFileLocation(file, bpFinalizedDir,
               blockId);
-          report.add(new ScanInfo(blockId, null, file, this));
+          report.add(new ScanInfo(blockId, dir, null, fileNames.get(i), this));
         }
         continue;
       }
@@ -1474,7 +1474,8 @@ public class FsVolumeImpl implements FsVolumeSpi {
         }
       }
       verifyFileLocation(blockFile, bpFinalizedDir, blockId);
-      report.add(new ScanInfo(blockId, blockFile, metaFile, this));
+      report.add(new ScanInfo(blockId, dir, blockFile.getName(),
+          metaFile == null ? null : metaFile.getName(), this));
     }
   }
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDirectoryScanner.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDirectoryScanner.java
index 44d99a2..e2a15a8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDirectoryScanner.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDirectoryScanner.java
@@ -1040,19 +1040,21 @@ public class TestDirectoryScanner {
 
   private final static String BPID_2 = "BP-367845636-127.0.0.1-5895645674231";
 
-  void testScanInfoObject(long blockId, File blockFile, File metaFile)
+  void testScanInfoObject(long blockId, File baseDir, String blockFile,
+                          String metaFile)
       throws Exception {
     FsVolumeSpi.ScanInfo scanInfo =
-        new FsVolumeSpi.ScanInfo(blockId, blockFile, metaFile, TEST_VOLUME);
+        new FsVolumeSpi.ScanInfo(blockId, baseDir, blockFile, metaFile,
+            TEST_VOLUME);
     assertEquals(blockId, scanInfo.getBlockId());
     if (blockFile != null) {
-      assertEquals(blockFile.getAbsolutePath(),
+      assertEquals(new File(baseDir, blockFile).getAbsolutePath(),
           scanInfo.getBlockFile().getAbsolutePath());
     } else {
       assertNull(scanInfo.getBlockFile());
     }
     if (metaFile != null) {
-      assertEquals(metaFile.getAbsolutePath(),
+      assertEquals(new File(baseDir, metaFile).getAbsolutePath(),
           scanInfo.getMetaFile().getAbsolutePath());
     } else {
       assertNull(scanInfo.getMetaFile());
@@ -1062,7 +1064,7 @@ public class TestDirectoryScanner {
 
   void testScanInfoObject(long blockId) throws Exception {
     FsVolumeSpi.ScanInfo scanInfo =
-        new FsVolumeSpi.ScanInfo(blockId, null, null, null);
+        new FsVolumeSpi.ScanInfo(blockId, null, null, null, null);
     assertEquals(blockId, scanInfo.getBlockId());
     assertNull(scanInfo.getBlockFile());
     assertNull(scanInfo.getMetaFile());
@@ -1071,24 +1073,19 @@ public class TestDirectoryScanner {
   @Test(timeout = 120000)
   public void TestScanInfo() throws Exception {
     testScanInfoObject(123,
-        new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(),
-            "blk_123"),
-        new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(),
-            "blk_123__1001.meta"));
+        new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath()),
+            "blk_123", "blk_123__1001.meta");
     testScanInfoObject(464,
-        new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(),
-            "blk_123"),
-        null);
-    testScanInfoObject(523, null,
-        new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(),
-            "blk_123__1009.meta"));
-    testScanInfoObject(789, null, null);
+        new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath()),
+            "blk_123", null);
+    testScanInfoObject(523,
+        new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath()),
+            null, "blk_123__1009.meta");
+    testScanInfoObject(789, null, null, null);
     testScanInfoObject(456);
     testScanInfoObject(123,
-        new File(TEST_VOLUME.getFinalizedDir(BPID_2).getAbsolutePath(),
-            "blk_567"),
-        new File(TEST_VOLUME.getFinalizedDir(BPID_2).getAbsolutePath(),
-            "blk_567__1004.meta"));
+        new File(TEST_VOLUME.getFinalizedDir(BPID_2).getAbsolutePath()),
+            "blk_567", "blk_567__1004.meta");
   }
 
   /**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java
index 778ef97..fbd9f00 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java
@@ -1786,8 +1786,8 @@ public class TestFsDatasetImpl {
       assertFalse(metaFile.exists());
 
       FsVolumeSpi.ScanInfo info = new FsVolumeSpi.ScanInfo(
-          replicaInfo.getBlockId(), blockFile.getAbsoluteFile(),
-          metaFile.getAbsoluteFile(), replicaInfo.getVolume());
+          replicaInfo.getBlockId(), blockFile.getParentFile().getAbsoluteFile(),
+          blockFile.getName(), metaFile.getName(), replicaInfo.getVolume());
       fsdataset.checkAndUpdate(bpid, info);
 
       BlockManager blockManager = cluster.getNameNode().

---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org