You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hdfs-commits@hadoop.apache.org by cm...@apache.org on 2013/06/19 02:41:49 UTC
svn commit: r1494404 - in
/hadoop/common/branches/branch-2.1-beta/hadoop-hdfs-project/hadoop-hdfs: ./
src/main/java/org/apache/hadoop/hdfs/server/datanode/
src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/
src/main/java/org/apache/hadoop/...
Author: cmccabe
Date: Wed Jun 19 00:41:49 2013
New Revision: 1494404
URL: http://svn.apache.org/r1494404
Log:
HDFS-4461. DirectoryScanner: volume prefix takes up memory for every block that is scanned (Colin Patrick McCabe)
Modified:
hadoop/common/branches/branch-2.1-beta/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
hadoop/common/branches/branch-2.1-beta/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java
hadoop/common/branches/branch-2.1-beta/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsVolumeSpi.java
hadoop/common/branches/branch-2.1-beta/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java
hadoop/common/branches/branch-2.1-beta/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDirectoryScanner.java
Modified: hadoop/common/branches/branch-2.1-beta/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.1-beta/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt?rev=1494404&r1=1494403&r2=1494404&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.1-beta/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt (original)
+++ hadoop/common/branches/branch-2.1-beta/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt Wed Jun 19 00:41:49 2013
@@ -55,6 +55,9 @@ Release 2.1.0-beta - UNRELEASED
IMPROVEMENTS
+ HDFS-4461. DirectoryScanner: volume path prefix takes up memory for every
+ block that is scanned (Colin Patrick McCabe)
+
HDFS-4222. NN is unresponsive and loses heartbeats from DNs when
configured to use LDAP and LDAP has issues. (Xiaobo Peng, suresh)
Modified: hadoop/common/branches/branch-2.1-beta/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.1-beta/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java?rev=1494404&r1=1494403&r2=1494404&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.1-beta/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java (original)
+++ hadoop/common/branches/branch-2.1-beta/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java Wed Jun 19 00:41:49 2013
@@ -33,6 +33,8 @@ import java.util.concurrent.Future;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -154,30 +156,109 @@ public class DirectoryScanner implements
* Tracks the files and other information related to a block on the disk
* Missing file is indicated by setting the corresponding member
* to null.
+ *
+ * Because millions of these structures may be created, we try to save
+ * memory here. So instead of storing full paths, we store path suffixes.
+ * The block file, if it exists, will have a path like this:
+ * <volume_base_path>/<block_path>
+ * So we don't need to store the volume path, since we already know what the
+ * volume is.
+ *
+ * The metadata file, if it exists, will have a path like this:
+ * <volume_base_path>/<block_path>_<genstamp>.meta
+ * So if we have a block file, there isn't any need to store the block path
+ * again.
+ *
+ * The accessor functions take care of these manipulations.
*/
static class ScanInfo implements Comparable<ScanInfo> {
private final long blockId;
- private final File metaFile;
- private final File blockFile;
+
+ /**
+ * The block file path, relative to the volume's base directory.
+ * If there was no block file found, this may be null. If 'vol'
+ * is null, then this is the full path of the block file.
+ */
+ private final String blockSuffix;
+
+ /**
+ * The suffix of the meta file path relative to the block file.
+ * If blockSuffix is null, then this will be the entire path relative
+ * to the volume base directory, or an absolute path if vol is also
+ * null.
+ */
+ private final String metaSuffix;
+
private final FsVolumeSpi volume;
+ private final static Pattern CONDENSED_PATH_REGEX =
+ Pattern.compile("(?<!^)(\\\\|/){2,}");
+
+ private final static String QUOTED_FILE_SEPARATOR =
+ Matcher.quoteReplacement(File.separator);
+
+ /**
+ * Get the most condensed version of the path.
+ *
+ * For example, the condensed version of /foo//bar is /foo/bar
+ * Unlike {@link File#getCanonicalPath()}, this will never perform I/O
+ * on the filesystem.
+ */
+ private static String getCondensedPath(String path) {
+ return CONDENSED_PATH_REGEX.matcher(path).
+ replaceAll(QUOTED_FILE_SEPARATOR);
+ }
+
+ /**
+ * Get a path suffix.
+ *
+ * @param f The file to get the suffix for.
+ * @param prefix The prefix we're stripping off.
+ *
+ * @return A suffix such that prefix + suffix = path to f
+ */
+ private static String getSuffix(File f, String prefix) {
+ String fullPath = getCondensedPath(f.getAbsolutePath());
+ if (fullPath.startsWith(prefix)) {
+ return fullPath.substring(prefix.length());
+ }
+ throw new RuntimeException(prefix + " is not a prefix of " + fullPath);
+ }
+
ScanInfo(long blockId) {
this(blockId, null, null, null);
}
ScanInfo(long blockId, File blockFile, File metaFile, FsVolumeSpi vol) {
this.blockId = blockId;
- this.metaFile = metaFile;
- this.blockFile = blockFile;
+ String condensedVolPath = vol == null ? null :
+ getCondensedPath(vol.getBasePath());
+ this.blockSuffix = blockFile == null ? null :
+ getSuffix(blockFile, condensedVolPath);
+ if (metaFile == null) {
+ this.metaSuffix = null;
+ } else if (blockFile == null) {
+ this.metaSuffix = getSuffix(metaFile, condensedVolPath);
+ } else {
+ this.metaSuffix = getSuffix(metaFile,
+ condensedVolPath + blockSuffix);
+ }
this.volume = vol;
}
- File getMetaFile() {
- return metaFile;
+ File getBlockFile() {
+ return (blockSuffix == null) ? null :
+ new File(volume.getBasePath(), blockSuffix);
}
- File getBlockFile() {
- return blockFile;
+ File getMetaFile() {
+ if (metaSuffix == null) {
+ return null;
+ } else if (blockSuffix == null) {
+ return new File(volume.getBasePath(), metaSuffix);
+ } else {
+ return new File(volume.getBasePath(), blockSuffix + metaSuffix);
+ }
}
long getBlockId() {
@@ -216,8 +297,9 @@ public class DirectoryScanner implements
}
public long getGenStamp() {
- return metaFile != null ? Block.getGenerationStamp(metaFile.getName()) :
- GenerationStamp.GRANDFATHER_GENERATION_STAMP;
+ return metaSuffix != null ? Block.getGenerationStamp(
+ getMetaFile().getName()) :
+ GenerationStamp.GRANDFATHER_GENERATION_STAMP;
}
}
Modified: hadoop/common/branches/branch-2.1-beta/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsVolumeSpi.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.1-beta/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsVolumeSpi.java?rev=1494404&r1=1494403&r2=1494404&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.1-beta/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsVolumeSpi.java (original)
+++ hadoop/common/branches/branch-2.1-beta/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsVolumeSpi.java Wed Jun 19 00:41:49 2013
@@ -30,6 +30,9 @@ public interface FsVolumeSpi {
/** @return the available storage space in bytes. */
public long getAvailable() throws IOException;
+ /** @return the base path to the volume */
+ public String getBasePath();
+
/** @return the path to the volume */
public String getPath(String bpid) throws IOException;
Modified: hadoop/common/branches/branch-2.1-beta/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.1-beta/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java?rev=1494404&r1=1494403&r2=1494404&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.1-beta/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java (original)
+++ hadoop/common/branches/branch-2.1-beta/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java Wed Jun 19 00:41:49 2013
@@ -125,6 +125,11 @@ class FsVolumeImpl implements FsVolumeSp
}
@Override
+ public String getBasePath() {
+ return currentDir.getParent();
+ }
+
+ @Override
public String getPath(String bpid) throws IOException {
return getBlockPoolSlice(bpid).getDirectory().getAbsolutePath();
}
Modified: hadoop/common/branches/branch-2.1-beta/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDirectoryScanner.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.1-beta/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDirectoryScanner.java?rev=1494404&r1=1494403&r2=1494404&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.1-beta/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDirectoryScanner.java (original)
+++ hadoop/common/branches/branch-2.1-beta/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDirectoryScanner.java Wed Jun 19 00:41:49 2013
@@ -380,4 +380,93 @@ public class TestDirectoryScanner {
assertNotNull(memBlock);
assertEquals(genStamp, memBlock.getGenerationStamp());
}
+
+ private static class TestFsVolumeSpi implements FsVolumeSpi {
+ @Override
+ public String[] getBlockPoolList() {
+ return new String[0];
+ }
+
+ @Override
+ public long getAvailable() throws IOException {
+ return 0;
+ }
+
+ @Override
+ public String getBasePath() {
+ return "/base";
+ }
+
+ @Override
+ public String getPath(String bpid) throws IOException {
+ return "/base/current/" + bpid;
+ }
+
+ @Override
+ public File getFinalizedDir(String bpid) throws IOException {
+ return new File("/base/current/" + bpid + "/finalized");
+ }
+ }
+
+ private final static TestFsVolumeSpi TEST_VOLUME = new TestFsVolumeSpi();
+
+ private final static String BPID_1 = "BP-783049782-127.0.0.1-1370971773491";
+
+ private final static String BPID_2 = "BP-367845636-127.0.0.1-5895645674231";
+
+ void testScanInfoObject(long blockId, File blockFile, File metaFile)
+ throws Exception {
+ assertEquals("/base/current/" + BPID_1 + "/finalized",
+ TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath());
+ DirectoryScanner.ScanInfo scanInfo =
+ new DirectoryScanner.ScanInfo(blockId, blockFile, metaFile, TEST_VOLUME);
+ assertEquals(blockId, scanInfo.getBlockId());
+ if (blockFile != null) {
+ assertEquals(blockFile.getAbsolutePath(),
+ scanInfo.getBlockFile().getAbsolutePath());
+ } else {
+ assertNull(scanInfo.getBlockFile());
+ }
+ if (metaFile != null) {
+ assertEquals(metaFile.getAbsolutePath(),
+ scanInfo.getMetaFile().getAbsolutePath());
+ } else {
+ assertNull(scanInfo.getMetaFile());
+ }
+ assertEquals(TEST_VOLUME, scanInfo.getVolume());
+ }
+
+ void testScanInfoObject(long blockId) throws Exception {
+ DirectoryScanner.ScanInfo scanInfo =
+ new DirectoryScanner.ScanInfo(blockId);
+ assertEquals(blockId, scanInfo.getBlockId());
+ assertNull(scanInfo.getBlockFile());
+ assertNull(scanInfo.getMetaFile());
+ }
+
+ @Test(timeout=120000)
+ public void TestScanInfo() throws Exception {
+ testScanInfoObject(123,
+ new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(),
+ "blk_123"),
+ new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(),
+ "blk_123__1001.meta"));
+ testScanInfoObject(464,
+ new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(),
+ "blk_123"),
+ null);
+ testScanInfoObject(523,
+ null,
+ new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(),
+ "blk_123__1009.meta"));
+ testScanInfoObject(789,
+ null,
+ null);
+ testScanInfoObject(456);
+ testScanInfoObject(123,
+ new File(TEST_VOLUME.getFinalizedDir(BPID_2).getAbsolutePath(),
+ "blk_567"),
+ new File(TEST_VOLUME.getFinalizedDir(BPID_2).getAbsolutePath(),
+ "blk_567__1004.meta"));
+ }
}