You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by so...@apache.org on 2021/04/08 11:31:05 UTC
[hadoop] branch branch-3.3 updated: HDFS-15937. Reduce memory used
during datanode layout upgrade. Contributed by Stephen O'Donnell (#2838)
This is an automated email from the ASF dual-hosted git repository.
sodonnell pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/branch-3.3 by this push:
new ef95f7a HDFS-15937. Reduce memory used during datanode layout upgrade. Contributed by Stephen O'Donnell (#2838)
ef95f7a is described below
commit ef95f7a96355e93d69eb9a7c9d9dd672974ff22e
Author: Stephen O'Donnell <st...@gmail.com>
AuthorDate: Thu Apr 8 11:59:02 2021 +0100
HDFS-15937. Reduce memory used during datanode layout upgrade. Contributed by Stephen O'Donnell (#2838)
(cherry picked from commit 4c567fcff7af45c75117ee4a75c087aa454a89e5)
---
.../hadoop/hdfs/server/datanode/DataStorage.java | 98 ++++++++++++++--------
1 file changed, 62 insertions(+), 36 deletions(-)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataStorage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataStorage.java
index e917b77..1d2f10f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataStorage.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataStorage.java
@@ -1064,12 +1064,26 @@ public class DataStorage extends Storage {
}
private static class LinkArgs {
- File src;
- File dst;
+ private File srcDir;
+ private File dstDir;
+ private String blockFile;
+
+ LinkArgs(File srcDir, File dstDir, String blockFile) {
+ this.srcDir = srcDir;
+ this.dstDir = dstDir;
+ this.blockFile = blockFile;
+ }
+
+ public File src() {
+ return new File(srcDir, blockFile);
+ }
- LinkArgs(File src, File dst) {
- this.src = src;
- this.dst = dst;
+ public File dst() {
+ return new File(dstDir, blockFile);
+ }
+
+ public String blockFile() {
+ return blockFile;
}
}
@@ -1095,8 +1109,9 @@ public class DataStorage extends Storage {
}
final ArrayList<LinkArgs> idBasedLayoutSingleLinks = Lists.newArrayList();
- linkBlocksHelper(from, to, oldLV, hl, upgradeToIdBasedLayout, to,
- idBasedLayoutSingleLinks);
+ final Map<File, File> pathCache = new HashMap<>();
+ linkBlocksHelper(from, to, hl, upgradeToIdBasedLayout, to,
+ idBasedLayoutSingleLinks, pathCache);
// Detect and remove duplicate entries.
final ArrayList<LinkArgs> duplicates =
@@ -1122,7 +1137,7 @@ public class DataStorage extends Storage {
idBasedLayoutSingleLinks.size());
for (int j = iCopy; j < upperBound; j++) {
LinkArgs cur = idBasedLayoutSingleLinks.get(j);
- HardLink.createHardLink(cur.src, cur.dst);
+ HardLink.createHardLink(cur.src(), cur.dst());
}
return null;
}
@@ -1155,9 +1170,9 @@ public class DataStorage extends Storage {
@Override
public int compare(LinkArgs a, LinkArgs b) {
return ComparisonChain.start().
- compare(a.src.getName(), b.src.getName()).
- compare(a.src, b.src).
- compare(a.dst, b.dst).
+ compare(a.blockFile(), b.blockFile()).
+ compare(a.src(), b.src()).
+ compare(a.dst(), b.dst()).
result();
}
});
@@ -1167,8 +1182,8 @@ public class DataStorage extends Storage {
boolean addedPrev = false;
for (int i = 0; i < all.size(); i++) {
LinkArgs args = all.get(i);
- long blockId = Block.getBlockId(args.src.getName());
- boolean isMeta = Block.isMetaFilename(args.src.getName());
+ long blockId = Block.getBlockId(args.blockFile());
+ boolean isMeta = Block.isMetaFilename(args.blockFile());
if ((prevBlockId == null) ||
(prevBlockId.longValue() != blockId)) {
prevBlockId = blockId;
@@ -1207,10 +1222,10 @@ public class DataStorage extends Storage {
TreeMap<Long, List<LinkArgs>> highestGenstamps =
new TreeMap<Long, List<LinkArgs>>();
for (LinkArgs duplicate : duplicates) {
- if (!Block.isMetaFilename(duplicate.src.getName())) {
+ if (!Block.isMetaFilename(duplicate.blockFile())) {
continue;
}
- long blockId = Block.getBlockId(duplicate.src.getName());
+ long blockId = Block.getBlockId(duplicate.blockFile());
List<LinkArgs> prevHighest = highestGenstamps.get(blockId);
if (prevHighest == null) {
List<LinkArgs> highest = new LinkedList<LinkArgs>();
@@ -1219,8 +1234,8 @@ public class DataStorage extends Storage {
continue;
}
long prevGenstamp =
- Block.getGenerationStamp(prevHighest.get(0).src.getName());
- long genstamp = Block.getGenerationStamp(duplicate.src.getName());
+ Block.getGenerationStamp(prevHighest.get(0).blockFile());
+ long genstamp = Block.getGenerationStamp(duplicate.blockFile());
if (genstamp < prevGenstamp) {
continue;
}
@@ -1234,19 +1249,19 @@ public class DataStorage extends Storage {
// from the duplicates list.
for (Iterator<LinkArgs> iter = duplicates.iterator(); iter.hasNext(); ) {
LinkArgs duplicate = iter.next();
- long blockId = Block.getBlockId(duplicate.src.getName());
+ long blockId = Block.getBlockId(duplicate.blockFile());
List<LinkArgs> highest = highestGenstamps.get(blockId);
if (highest != null) {
boolean found = false;
for (LinkArgs high : highest) {
- if (high.src.getParent().equals(duplicate.src.getParent())) {
+ if (high.src().getParent().equals(duplicate.src().getParent())) {
found = true;
break;
}
}
if (!found) {
LOG.warn("Unexpectedly low genstamp on {}.",
- duplicate.src.getAbsolutePath());
+ duplicate.src().getAbsolutePath());
iter.remove();
}
}
@@ -1257,25 +1272,25 @@ public class DataStorage extends Storage {
// preserving one block file / metadata file pair.
TreeMap<Long, LinkArgs> longestBlockFiles = new TreeMap<Long, LinkArgs>();
for (LinkArgs duplicate : duplicates) {
- if (Block.isMetaFilename(duplicate.src.getName())) {
+ if (Block.isMetaFilename(duplicate.blockFile())) {
continue;
}
- long blockId = Block.getBlockId(duplicate.src.getName());
+ long blockId = Block.getBlockId(duplicate.blockFile());
LinkArgs prevLongest = longestBlockFiles.get(blockId);
if (prevLongest == null) {
longestBlockFiles.put(blockId, duplicate);
continue;
}
- long blockLength = duplicate.src.length();
- long prevBlockLength = prevLongest.src.length();
+ long blockLength = duplicate.src().length();
+ long prevBlockLength = prevLongest.src().length();
if (blockLength < prevBlockLength) {
LOG.warn("Unexpectedly short length on {}.",
- duplicate.src.getAbsolutePath());
+ duplicate.src().getAbsolutePath());
continue;
}
if (blockLength > prevBlockLength) {
LOG.warn("Unexpectedly short length on {}.",
- prevLongest.src.getAbsolutePath());
+ prevLongest.src().getAbsolutePath());
}
longestBlockFiles.put(blockId, duplicate);
}
@@ -1284,21 +1299,22 @@ public class DataStorage extends Storage {
// arbitrarily selected by us.
for (Iterator<LinkArgs> iter = all.iterator(); iter.hasNext(); ) {
LinkArgs args = iter.next();
- long blockId = Block.getBlockId(args.src.getName());
+ long blockId = Block.getBlockId(args.blockFile());
LinkArgs bestDuplicate = longestBlockFiles.get(blockId);
if (bestDuplicate == null) {
continue; // file has no duplicates
}
- if (!bestDuplicate.src.getParent().equals(args.src.getParent())) {
- LOG.warn("Discarding {}.", args.src.getAbsolutePath());
+ if (!bestDuplicate.src().getParent().equals(args.src().getParent())) {
+ LOG.warn("Discarding {}.", args.src().getAbsolutePath());
iter.remove();
}
}
}
- static void linkBlocksHelper(File from, File to, int oldLV, HardLink hl,
- boolean upgradeToIdBasedLayout, File blockRoot,
- List<LinkArgs> idBasedLayoutSingleLinks) throws IOException {
+ static void linkBlocksHelper(File from, File to, HardLink hl,
+ boolean upgradeToIdBasedLayout, File blockRoot,
+ List<LinkArgs> idBasedLayoutSingleLinks, Map<File, File> pathCache)
+ throws IOException {
if (!from.exists()) {
return;
}
@@ -1338,8 +1354,18 @@ public class DataStorage extends Storage {
throw new IOException("Failed to mkdirs " + blockLocation);
}
}
- idBasedLayoutSingleLinks.add(new LinkArgs(new File(from, blockName),
- new File(blockLocation, blockName)));
+ /**
+ * The destination path is 32x32, so 1024 distinct paths. Therefore
+ * we cache the destination path and reuse the same File object on
+ * potentially thousands of blocks located on this volume.
+ * This method is called recursively so the cache is passed through
+ * each recursive call. There is one cache per volume, and it is only
+ * accessed by a single thread so no locking is needed.
+ */
+ File cachedDest = pathCache
+ .computeIfAbsent(blockLocation, k -> blockLocation);
+ idBasedLayoutSingleLinks.add(new LinkArgs(from,
+ cachedDest, blockName));
hl.linkStats.countSingleLinks++;
}
} else {
@@ -1362,8 +1388,8 @@ public class DataStorage extends Storage {
if (otherNames != null) {
for (int i = 0; i < otherNames.length; i++) {
linkBlocksHelper(new File(from, otherNames[i]),
- new File(to, otherNames[i]), oldLV, hl, upgradeToIdBasedLayout,
- blockRoot, idBasedLayoutSingleLinks);
+ new File(to, otherNames[i]), hl, upgradeToIdBasedLayout,
+ blockRoot, idBasedLayoutSingleLinks, pathCache);
}
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org