You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by so...@apache.org on 2021/04/26 10:25:34 UTC
[hadoop] branch branch-3.3 updated: HDFS-15621. Datanode
DirectoryScanner uses excessive memory (#2849). Contributed by Stephen
O'Donnell
This is an automated email from the ASF dual-hosted git repository.
sodonnell pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/branch-3.3 by this push:
new f6efb58 HDFS-15621. Datanode DirectoryScanner uses excessive memory (#2849). Contributed by Stephen O'Donnell
f6efb58 is described below
commit f6efb58b0735fa19a93fa9acae05666cdba8997a
Author: Stephen O'Donnell <st...@gmail.com>
AuthorDate: Mon Apr 26 11:00:23 2021 +0100
HDFS-15621. Datanode DirectoryScanner uses excessive memory (#2849). Contributed by Stephen O'Donnell
(cherry picked from commit 605ed85c291a6250b077da32a49dbb35f3b78bf7)
Conflicts:
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java
---
.../hdfs/server/datanode/DirectoryScanner.java | 2 +-
.../server/datanode/fsdataset/FsVolumeSpi.java | 118 ++++++++++-----------
.../datanode/fsdataset/impl/FsVolumeImpl.java | 5 +-
.../hdfs/server/datanode/TestDirectoryScanner.java | 37 +++----
4 files changed, 75 insertions(+), 87 deletions(-)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java
index be7e47a..b48d6ce 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java
@@ -582,7 +582,7 @@ public class DirectoryScanner implements Runnable {
long blockId, FsVolumeSpi vol) {
statsRecord.missingBlockFile++;
statsRecord.missingMetaFile++;
- diffRecord.add(new ScanInfo(blockId, null, null, vol));
+ diffRecord.add(new ScanInfo(blockId, null, null, null, vol));
}
/**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsVolumeSpi.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsVolumeSpi.java
index be978d7..c1043ae 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsVolumeSpi.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsVolumeSpi.java
@@ -224,27 +224,27 @@ public interface FsVolumeSpi
*/
public static class ScanInfo implements Comparable<ScanInfo> {
private final long blockId;
-
/**
- * The block file path, relative to the volume's base directory.
- * If there was no block file found, this may be null. If 'vol'
- * is null, then this is the full path of the block file.
+ * The full path to the folder containing the block / meta files.
*/
- private final String blockSuffix;
-
+ private final File basePath;
/**
- * The suffix of the meta file path relative to the block file.
- * If blockSuffix is null, then this will be the entire path relative
- * to the volume base directory, or an absolute path if vol is also
- * null.
+ * The block file name, with no path
*/
- private final String metaSuffix;
+ private final String blockFile;
+ /**
+ * Holds the meta file name, with no path, only if blockFile is null.
+ * If blockFile is not null, the meta file will be named identically to
+ * the blockFile, but with a suffix like "_1234.meta". If the blockFile
+ * is present, we store only the meta file suffix.
+ */
+ private final String metaFile;
private final FsVolumeSpi volume;
private final FileRegion fileRegion;
/**
- * Get the file's length in async block scan
+ * Get the file's length in async block scan.
*/
private final long blockLength;
@@ -255,34 +255,18 @@ public interface FsVolumeSpi
Matcher.quoteReplacement(File.separator);
/**
- * Get the most condensed version of the path.
- *
- * For example, the condensed version of /foo//bar is /foo/bar
- * Unlike {@link File#getCanonicalPath()}, this will never perform I/O
- * on the filesystem.
- *
- * @param path the path to condense
- * @return the condensed path
- */
- private static String getCondensedPath(String path) {
- return CONDENSED_PATH_REGEX.matcher(path).
- replaceAll(QUOTED_FILE_SEPARATOR);
- }
-
- /**
* Get a path suffix.
*
- * @param f The file to get the suffix for.
+ * @param f The string to get the suffix for.
* @param prefix The prefix we're stripping off.
*
- * @return A suffix such that prefix + suffix = path to f
+ * @return A suffix such that prefix + suffix = f
*/
- private static String getSuffix(File f, String prefix) {
- String fullPath = getCondensedPath(f.getAbsolutePath());
- if (fullPath.startsWith(prefix)) {
- return fullPath.substring(prefix.length());
+ private static String getSuffix(String f, String prefix) {
+ if (f.startsWith(prefix)) {
+ return f.substring(prefix.length());
}
- throw new RuntimeException(prefix + " is not a prefix of " + fullPath);
+ throw new RuntimeException(prefix + " is not a prefix of " + f);
}
/**
@@ -290,27 +274,27 @@ public interface FsVolumeSpi
* the block data and meta-data files.
*
* @param blockId the block ID
- * @param blockFile the path to the block data file
- * @param metaFile the path to the block meta-data file
+ * @param basePath The full path to the directory the block is stored in
+ * @param blockFile The block filename, with no path
+ * @param metaFile The meta filename, with no path. If blockFile is not null
+ * then the metaFile and blockFile should have the same
+ * prefix, with the meta file having a suffix like
+ * "_1234.meta". To save memory, if the blockFile is present
+ * we store only the meta file suffix in the object
* @param vol the volume that contains the block
*/
- public ScanInfo(long blockId, File blockFile, File metaFile,
- FsVolumeSpi vol) {
+ public ScanInfo(long blockId, File basePath, String blockFile,
+ String metaFile, FsVolumeSpi vol) {
this.blockId = blockId;
- String condensedVolPath =
- (vol == null || vol.getBaseURI() == null) ? null :
- getCondensedPath(new File(vol.getBaseURI()).getAbsolutePath());
- this.blockSuffix = blockFile == null ? null :
- getSuffix(blockFile, condensedVolPath);
- this.blockLength = (blockFile != null) ? blockFile.length() : 0;
- if (metaFile == null) {
- this.metaSuffix = null;
- } else if (blockFile == null) {
- this.metaSuffix = getSuffix(metaFile, condensedVolPath);
+ this.basePath = basePath;
+ this.blockFile = blockFile;
+ if (blockFile != null && metaFile != null) {
+ this.metaFile = getSuffix(metaFile, blockFile);
} else {
- this.metaSuffix = getSuffix(metaFile,
- condensedVolPath + blockSuffix);
+ this.metaFile = metaFile;
}
+ this.blockLength = (blockFile != null) ?
+ new File(basePath, blockFile).length() : 0;
this.volume = vol;
this.fileRegion = null;
}
@@ -330,8 +314,9 @@ public interface FsVolumeSpi
this.blockLength = length;
this.volume = vol;
this.fileRegion = fileRegion;
- this.blockSuffix = null;
- this.metaSuffix = null;
+ this.basePath = null;
+ this.blockFile = null;
+ this.metaFile = null;
}
/**
@@ -340,8 +325,8 @@ public interface FsVolumeSpi
* @return the block data file
*/
public File getBlockFile() {
- return (blockSuffix == null) ? null :
- new File(new File(volume.getBaseURI()).getAbsolutePath(), blockSuffix);
+ return (blockFile == null) ? null :
+ new File(basePath.getAbsolutePath(), blockFile);
}
/**
@@ -360,15 +345,10 @@ public interface FsVolumeSpi
* @return the block meta data file
*/
public File getMetaFile() {
- if (metaSuffix == null) {
+ if (metaFile == null) {
return null;
}
- String fileSuffix = metaSuffix;
- if (blockSuffix != null) {
- fileSuffix = blockSuffix + metaSuffix;
- }
- return new File(new File(volume.getBaseURI()).getAbsolutePath(),
- fileSuffix);
+ return new File(basePath.getAbsolutePath(), fullMetaFile());
}
/**
@@ -411,14 +391,24 @@ public interface FsVolumeSpi
}
public long getGenStamp() {
- return metaSuffix != null ? Block.getGenerationStamp(
- getMetaFile().getName()) :
- HdfsConstants.GRANDFATHER_GENERATION_STAMP;
+ return metaFile != null ? Block.getGenerationStamp(fullMetaFile())
+ : HdfsConstants.GRANDFATHER_GENERATION_STAMP;
}
public FileRegion getFileRegion() {
return fileRegion;
}
+
+ private String fullMetaFile() {
+ if (metaFile == null) {
+ return null;
+ }
+ if (blockFile == null) {
+ return metaFile;
+ } else {
+ return blockFile + metaFile;
+ }
+ }
}
/**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java
index e83b7c9..15048ef 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java
@@ -1392,7 +1392,7 @@ public class FsVolumeImpl implements FsVolumeSpi {
long blockId = Block.getBlockId(file.getName());
verifyFileLocation(file, bpFinalizedDir,
blockId);
- report.add(new ScanInfo(blockId, null, file, this));
+ report.add(new ScanInfo(blockId, dir, null, fileNames.get(i), this));
}
continue;
}
@@ -1415,7 +1415,8 @@ public class FsVolumeImpl implements FsVolumeSpi {
}
}
verifyFileLocation(blockFile, bpFinalizedDir, blockId);
- report.add(new ScanInfo(blockId, blockFile, metaFile, this));
+ report.add(new ScanInfo(blockId, dir, blockFile.getName(),
+ metaFile == null ? null : metaFile.getName(), this));
}
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDirectoryScanner.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDirectoryScanner.java
index 2547001..f80ad5b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDirectoryScanner.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDirectoryScanner.java
@@ -1021,19 +1021,21 @@ public class TestDirectoryScanner {
private final static String BPID_2 = "BP-367845636-127.0.0.1-5895645674231";
- void testScanInfoObject(long blockId, File blockFile, File metaFile)
+ void testScanInfoObject(long blockId, File baseDir, String blockFile,
+ String metaFile)
throws Exception {
FsVolumeSpi.ScanInfo scanInfo =
- new FsVolumeSpi.ScanInfo(blockId, blockFile, metaFile, TEST_VOLUME);
+ new FsVolumeSpi.ScanInfo(blockId, baseDir, blockFile, metaFile,
+ TEST_VOLUME);
assertEquals(blockId, scanInfo.getBlockId());
if (blockFile != null) {
- assertEquals(blockFile.getAbsolutePath(),
+ assertEquals(new File(baseDir, blockFile).getAbsolutePath(),
scanInfo.getBlockFile().getAbsolutePath());
} else {
assertNull(scanInfo.getBlockFile());
}
if (metaFile != null) {
- assertEquals(metaFile.getAbsolutePath(),
+ assertEquals(new File(baseDir, metaFile).getAbsolutePath(),
scanInfo.getMetaFile().getAbsolutePath());
} else {
assertNull(scanInfo.getMetaFile());
@@ -1043,7 +1045,7 @@ public class TestDirectoryScanner {
void testScanInfoObject(long blockId) throws Exception {
FsVolumeSpi.ScanInfo scanInfo =
- new FsVolumeSpi.ScanInfo(blockId, null, null, null);
+ new FsVolumeSpi.ScanInfo(blockId, null, null, null, null);
assertEquals(blockId, scanInfo.getBlockId());
assertNull(scanInfo.getBlockFile());
assertNull(scanInfo.getMetaFile());
@@ -1052,24 +1054,19 @@ public class TestDirectoryScanner {
@Test(timeout = 120000)
public void TestScanInfo() throws Exception {
testScanInfoObject(123,
- new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(),
- "blk_123"),
- new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(),
- "blk_123__1001.meta"));
+ new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath()),
+ "blk_123", "blk_123__1001.meta");
testScanInfoObject(464,
- new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(),
- "blk_123"),
- null);
- testScanInfoObject(523, null,
- new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(),
- "blk_123__1009.meta"));
- testScanInfoObject(789, null, null);
+ new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath()),
+ "blk_123", null);
+ testScanInfoObject(523,
+ new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath()),
+ null, "blk_123__1009.meta");
+ testScanInfoObject(789, null, null, null);
testScanInfoObject(456);
testScanInfoObject(123,
- new File(TEST_VOLUME.getFinalizedDir(BPID_2).getAbsolutePath(),
- "blk_567"),
- new File(TEST_VOLUME.getFinalizedDir(BPID_2).getAbsolutePath(),
- "blk_567__1004.meta"));
+ new File(TEST_VOLUME.getFinalizedDir(BPID_2).getAbsolutePath()),
+ "blk_567", "blk_567__1004.meta");
}
/**
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org