You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by so...@apache.org on 2021/04/26 11:57:16 UTC

[hadoop] branch branch-3.1 updated: HDFS-15621. Datanode DirectoryScanner uses excessive memory (#2849). Contributed by Stephen O'Donnell

This is an automated email from the ASF dual-hosted git repository.

sodonnell pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new 71a9885  HDFS-15621. Datanode DirectoryScanner uses excessive memory (#2849). Contributed by Stephen O'Donnell
71a9885 is described below

commit 71a9885c978c45085997915575bc1ac05729ed65
Author: Stephen O'Donnell <st...@gmail.com>
AuthorDate: Mon Apr 26 11:00:23 2021 +0100

    HDFS-15621. Datanode DirectoryScanner uses excessive memory (#2849). Contributed by Stephen O'Donnell
    
    (cherry picked from commit 605ed85c291a6250b077da32a49dbb35f3b78bf7)
    
     Conflicts:
    	hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java
    
    (cherry picked from commit f6efb58b0735fa19a93fa9acae05666cdba8997a)
    
     Conflicts:
    	hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsVolumeSpi.java
    	hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDirectoryScanner.java
    
    (cherry picked from commit 7a81e50bd2a48d372bd9788aac369078e580e107)
---
 .../hdfs/server/datanode/DirectoryScanner.java     |   2 +-
 .../server/datanode/fsdataset/FsVolumeSpi.java     | 118 ++++++++++-----------
 .../datanode/fsdataset/impl/FsVolumeImpl.java      |   5 +-
 .../hdfs/server/datanode/TestDirectoryScanner.java |  38 +++----
 4 files changed, 74 insertions(+), 89 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java
index 02aa00b..f36d03b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java
@@ -515,7 +515,7 @@ public class DirectoryScanner implements Runnable {
                              FsVolumeSpi vol) {
     statsRecord.missingBlockFile++;
     statsRecord.missingMetaFile++;
-    diffRecord.add(new ScanInfo(blockId, null, null, vol));
+    diffRecord.add(new ScanInfo(blockId, null, null, null, vol));
   }
 
   /**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsVolumeSpi.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsVolumeSpi.java
index 20a153d..822be0f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsVolumeSpi.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsVolumeSpi.java
@@ -224,27 +224,27 @@ public interface FsVolumeSpi
    */
   public static class ScanInfo implements Comparable<ScanInfo> {
     private final long blockId;
-
     /**
-     * The block file path, relative to the volume's base directory.
-     * If there was no block file found, this may be null. If 'vol'
-     * is null, then this is the full path of the block file.
+     * The full path to the folder containing the block / meta files.
      */
-    private final String blockSuffix;
-
+    private final File basePath;
     /**
-     * The suffix of the meta file path relative to the block file.
-     * If blockSuffix is null, then this will be the entire path relative
-     * to the volume base directory, or an absolute path if vol is also
-     * null.
+     * The block file name, with no path
      */
-    private final String metaSuffix;
+    private final String blockFile;
+    /**
+     * Holds the meta file name, with no path, only if blockFile is null.
+     * If blockFile is not null, the meta file will be named identically to
+     * the blockFile, but with a suffix like "_1234.meta". If the blockFile
+     * is present, we store only the meta file suffix.
+     */
+    private final String metaFile;
 
     private final FsVolumeSpi volume;
 
     private final FileRegion fileRegion;
     /**
-     * Get the file's length in async block scan
+     * Get the file's length in async block scan.
      */
     private final long blockLength;
 
@@ -255,34 +255,18 @@ public interface FsVolumeSpi
         Matcher.quoteReplacement(File.separator);
 
     /**
-     * Get the most condensed version of the path.
-     *
-     * For example, the condensed version of /foo//bar is /foo/bar
-     * Unlike {@link File#getCanonicalPath()}, this will never perform I/O
-     * on the filesystem.
-     *
-     * @param path the path to condense
-     * @return the condensed path
-     */
-    private static String getCondensedPath(String path) {
-      return CONDENSED_PATH_REGEX.matcher(path).
-          replaceAll(QUOTED_FILE_SEPARATOR);
-    }
-
-    /**
      * Get a path suffix.
      *
-     * @param f            The file to get the suffix for.
+     * @param f            The string to get the suffix for.
      * @param prefix       The prefix we're stripping off.
      *
-     * @return             A suffix such that prefix + suffix = path to f
+     * @return             A suffix such that prefix + suffix = f
      */
-    private static String getSuffix(File f, String prefix) {
-      String fullPath = getCondensedPath(f.getAbsolutePath());
-      if (fullPath.startsWith(prefix)) {
-        return fullPath.substring(prefix.length());
+    private static String getSuffix(String f, String prefix) {
+      if (f.startsWith(prefix)) {
+        return f.substring(prefix.length());
       }
-      throw new RuntimeException(prefix + " is not a prefix of " + fullPath);
+      throw new RuntimeException(prefix + " is not a prefix of " + f);
     }
 
     /**
@@ -290,27 +274,27 @@ public interface FsVolumeSpi
      * the block data and meta-data files.
      *
      * @param blockId the block ID
-     * @param blockFile the path to the block data file
-     * @param metaFile the path to the block meta-data file
+     * @param basePath The full path to the directory the block is stored in
+     * @param blockFile The block filename, with no path
+     * @param metaFile The meta filename, with no path. If blockFile is not null
+     *                 then the metaFile and blockFile should have the same
+     *                 prefix, with the meta file having a suffix like
+     *                 "_1234.meta". To save memory, if the blockFile is present
+     *                 we store only the meta file suffix in the object
      * @param vol the volume that contains the block
      */
-    public ScanInfo(long blockId, File blockFile, File metaFile,
-        FsVolumeSpi vol) {
+    public ScanInfo(long blockId, File basePath, String blockFile,
+        String metaFile, FsVolumeSpi vol) {
       this.blockId = blockId;
-      String condensedVolPath =
-          (vol == null || vol.getBaseURI() == null) ? null :
-              getCondensedPath(new File(vol.getBaseURI()).getAbsolutePath());
-      this.blockSuffix = blockFile == null ? null :
-              getSuffix(blockFile, condensedVolPath);
-      this.blockLength = (blockFile != null) ? blockFile.length() : 0;
-      if (metaFile == null) {
-        this.metaSuffix = null;
-      } else if (blockFile == null) {
-        this.metaSuffix = getSuffix(metaFile, condensedVolPath);
+      this.basePath = basePath;
+      this.blockFile = blockFile;
+      if (blockFile != null && metaFile != null) {
+        this.metaFile = getSuffix(metaFile, blockFile);
       } else {
-        this.metaSuffix = getSuffix(metaFile,
-            condensedVolPath + blockSuffix);
+        this.metaFile = metaFile;
       }
+      this.blockLength = (blockFile != null) ?
+          new File(basePath, blockFile).length() : 0;
       this.volume = vol;
       this.fileRegion = null;
     }
@@ -330,8 +314,9 @@ public interface FsVolumeSpi
       this.blockLength = length;
       this.volume = vol;
       this.fileRegion = fileRegion;
-      this.blockSuffix = null;
-      this.metaSuffix = null;
+      this.basePath = null;
+      this.blockFile = null;
+      this.metaFile = null;
     }
 
     /**
@@ -340,8 +325,8 @@ public interface FsVolumeSpi
      * @return the block data file
      */
     public File getBlockFile() {
-      return (blockSuffix == null) ? null :
-        new File(new File(volume.getBaseURI()).getAbsolutePath(), blockSuffix);
+      return (blockFile == null) ? null :
+          new File(basePath.getAbsolutePath(), blockFile);
     }
 
     /**
@@ -360,15 +345,10 @@ public interface FsVolumeSpi
      * @return the block meta data file
      */
     public File getMetaFile() {
-      if (metaSuffix == null) {
+      if (metaFile == null) {
         return null;
-      } else if (blockSuffix == null) {
-        return new File(new File(volume.getBaseURI()).getAbsolutePath(),
-            metaSuffix);
-      } else {
-        return new File(new File(volume.getBaseURI()).getAbsolutePath(),
-            blockSuffix + metaSuffix);
       }
+      return new File(basePath.getAbsolutePath(), fullMetaFile());
     }
 
     /**
@@ -417,14 +397,24 @@ public interface FsVolumeSpi
     }
 
     public long getGenStamp() {
-      return metaSuffix != null ? Block.getGenerationStamp(
-          getMetaFile().getName()) :
-            HdfsConstants.GRANDFATHER_GENERATION_STAMP;
+      return metaFile != null ? Block.getGenerationStamp(fullMetaFile())
+          : HdfsConstants.GRANDFATHER_GENERATION_STAMP;
     }
 
     public FileRegion getFileRegion() {
       return fileRegion;
     }
+
+    private String fullMetaFile() {
+      if (metaFile == null) {
+        return null;
+      }
+      if (blockFile == null) {
+        return metaFile;
+      } else {
+        return blockFile + metaFile;
+      }
+    }
   }
 
   /**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java
index 8810bfc..921e795 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java
@@ -1379,7 +1379,7 @@ public class FsVolumeImpl implements FsVolumeSpi {
           long blockId = Block.getBlockId(file.getName());
           verifyFileLocation(file, bpFinalizedDir,
               blockId);
-          report.add(new ScanInfo(blockId, null, file, this));
+          report.add(new ScanInfo(blockId, dir, null, fileNames.get(i), this));
         }
         continue;
       }
@@ -1402,7 +1402,8 @@ public class FsVolumeImpl implements FsVolumeSpi {
         }
       }
       verifyFileLocation(blockFile, bpFinalizedDir, blockId);
-      report.add(new ScanInfo(blockId, blockFile, metaFile, this));
+      report.add(new ScanInfo(blockId, dir, blockFile.getName(),
+          metaFile == null ? null : metaFile.getName(), this));
     }
     return report;
   }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDirectoryScanner.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDirectoryScanner.java
index 778707d..0e62a87 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDirectoryScanner.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDirectoryScanner.java
@@ -1029,19 +1029,21 @@ public class TestDirectoryScanner {
   
   private final static String BPID_2 = "BP-367845636-127.0.0.1-5895645674231";
       
-  void testScanInfoObject(long blockId, File blockFile, File metaFile)
+  void testScanInfoObject(long blockId, File baseDir, String blockFile,
+                          String metaFile)
       throws Exception {
     FsVolumeSpi.ScanInfo scanInfo =
-        new FsVolumeSpi.ScanInfo(blockId, blockFile, metaFile, TEST_VOLUME);
+        new FsVolumeSpi.ScanInfo(blockId, baseDir, blockFile, metaFile,
+            TEST_VOLUME);
     assertEquals(blockId, scanInfo.getBlockId());
     if (blockFile != null) {
-      assertEquals(blockFile.getAbsolutePath(),
+      assertEquals(new File(baseDir, blockFile).getAbsolutePath(),
           scanInfo.getBlockFile().getAbsolutePath());
     } else {
       assertNull(scanInfo.getBlockFile());
     }
     if (metaFile != null) {
-      assertEquals(metaFile.getAbsolutePath(),
+      assertEquals(new File(baseDir, metaFile).getAbsolutePath(),
           scanInfo.getMetaFile().getAbsolutePath());
     } else {
       assertNull(scanInfo.getMetaFile());
@@ -1051,7 +1053,7 @@ public class TestDirectoryScanner {
   
   void testScanInfoObject(long blockId) throws Exception {
     FsVolumeSpi.ScanInfo scanInfo =
-        new FsVolumeSpi.ScanInfo(blockId, null, null, null);
+        new FsVolumeSpi.ScanInfo(blockId, null, null, null, null);
     assertEquals(blockId, scanInfo.getBlockId());
     assertNull(scanInfo.getBlockFile());
     assertNull(scanInfo.getMetaFile());
@@ -1060,27 +1062,19 @@ public class TestDirectoryScanner {
   @Test(timeout=120000)
   public void TestScanInfo() throws Exception {
     testScanInfoObject(123,
-        new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(),
-            "blk_123"),
-        new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(),
-            "blk_123__1001.meta"));
+        new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath()),
+            "blk_123", "blk_123__1001.meta");
     testScanInfoObject(464,
-        new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(),
-            "blk_123"),
-        null);
+        new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath()),
+            "blk_123", null);
     testScanInfoObject(523,
-        null,
-        new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(),
-            "blk_123__1009.meta"));
-    testScanInfoObject(789,
-        null,
-        null);
+        new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath()),
+            null, "blk_123__1009.meta");
+    testScanInfoObject(789, null, null, null);
     testScanInfoObject(456);
     testScanInfoObject(123,
-        new File(TEST_VOLUME.getFinalizedDir(BPID_2).getAbsolutePath(),
-            "blk_567"),
-        new File(TEST_VOLUME.getFinalizedDir(BPID_2).getAbsolutePath(),
-            "blk_567__1004.meta"));
+        new File(TEST_VOLUME.getFinalizedDir(BPID_2).getAbsolutePath()),
+            "blk_567", "blk_567__1004.meta");
   }
 
   /**

---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org