You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by om...@apache.org on 2011/03/04 04:51:31 UTC
svn commit: r1077203 -
/hadoop/common/branches/branch-0.20-security-patches/src/core/org/apache/hadoop/fs/HarFileSystem.java
Author: omalley
Date: Fri Mar 4 03:51:31 2011
New Revision: 1077203
URL: http://svn.apache.org/viewvc?rev=1077203&view=rev
Log:
commit adde1686a98f922348feca8195e388f3cc6a159d
Author: Mahadev Konar <ma...@cdev6022.inktomisearch.com>
Date: Tue Feb 23 06:57:55 2010 +0000
HADOOP:6467 from http://issues.apache.org/jira/secure/attachment/12436653/HADOOP-6467-y.0.20-branch-v2.patch
+++ b/YAHOO-CHANGES.txt
+ HADOOP-6467. Performance improvement for liststatus on directories in
+ hadoop archives. (mahadev)
+
Modified:
hadoop/common/branches/branch-0.20-security-patches/src/core/org/apache/hadoop/fs/HarFileSystem.java
Modified: hadoop/common/branches/branch-0.20-security-patches/src/core/org/apache/hadoop/fs/HarFileSystem.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/core/org/apache/hadoop/fs/HarFileSystem.java?rev=1077203&r1=1077202&r2=1077203&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/core/org/apache/hadoop/fs/HarFileSystem.java (original)
+++ hadoop/common/branches/branch-0.20-security-patches/src/core/org/apache/hadoop/fs/HarFileSystem.java Fri Mar 4 03:51:31 2011
@@ -324,25 +324,12 @@ public class HarFileSystem extends Filte
@Override
public BlockLocation[] getFileBlockLocations(FileStatus file, long start,
long len) throws IOException {
- // need to look up the file in the underlying fs
- // look up the index
-
- // make sure this is a prt of this har filesystem
- Path p = makeQualified(file.getPath());
- Path harPath = getPathInHar(p);
- String line = fileStatusInIndex(harPath);
- if (line == null) {
- throw new FileNotFoundException("File " + file.getPath() + " not found");
- }
- HarStatus harStatus = new HarStatus(line);
- if (harStatus.isDir()) {
- return new BlockLocation[0];
- }
- FileStatus fsFile = fs.getFileStatus(new Path(archivePath,
- harStatus.getPartName()));
- BlockLocation[] rawBlocks = fs.getFileBlockLocations(fsFile,
- harStatus.getStartIndex() + start, len);
- return fakeBlockLocations(rawBlocks, harStatus.getStartIndex());
+ // just fake block locations
+ // its fast and simpler
+ // doing various block location manipulation
+ // with part files adds a lot of overhead because
+ // of the look ups of filestatus in index files
+ return new BlockLocation[]{ new BlockLocation() };
}
/**
@@ -386,6 +373,63 @@ public class HarFileSystem extends Filte
public int endHash;
}
+ /**
+ * Get filestatuses of all the children of a given directory. This just reads
+ * through index file and reads line by line to get all statuses for children
+ * of a directory. Its a brute force way of getting all such filestatuses
+ *
+ * @param parent
+ * the parent path directory
+ * @param statuses
+ * the list to add the children filestatuses to
+ * @param children
+ * the string list of children for this parent
+ * @param archiveIndexStat
+ * the archive index filestatus
+ */
+ private void fileStatusesInIndex(HarStatus parent, List<FileStatus> statuses,
+ List<String> children, FileStatus archiveIndexStat) throws IOException {
+ // read the index file
+ FSDataInputStream aIn = null;
+ try {
+ aIn = fs.open(archiveIndex);
+ LineReader aLin;
+ long read = 0;
+ aLin = new LineReader(aIn, getConf());
+ String parentString = parent.getName();
+ Path harPath = new Path(parentString);
+ int harlen = harPath.depth();
+ Text line = new Text();
+ while (read < archiveIndexStat.getLen()) {
+ int tmp = aLin.readLine(line);
+ read += tmp;
+ String lineFeed = line.toString();
+ String child = lineFeed.substring(0, lineFeed.indexOf(" "));
+ if ((child.startsWith(parentString))) {
+ Path thisPath = new Path(child);
+ if (thisPath.depth() == harlen + 1) {
+ // bingo!
+ HarStatus hstatus = new HarStatus(lineFeed);
+ FileStatus childStatus = new FileStatus(hstatus.isDir() ? 0
+ : hstatus.getLength(), hstatus.isDir(), (int) archiveIndexStat
+ .getReplication(), archiveIndexStat.getBlockSize(),
+ archiveIndexStat.getModificationTime(), archiveIndexStat
+ .getAccessTime(), new FsPermission(archiveIndexStat
+ .getPermission()), archiveIndexStat.getOwner(),
+ archiveIndexStat.getGroup(), makeRelative(this.uri.toString(),
+ new Path(hstatus.name)));
+ statuses.add(childStatus);
+ }
+ line.clear();
+ }
+ }
+ } finally {
+ if (aIn != null) {
+ aIn.close();
+ }
+ }
+ }
+
// make sure that this harPath is relative to the har filesystem
// this only works for relative paths. This returns the line matching
// the file in the index. Returns a null if there is not matching
@@ -649,10 +693,8 @@ public class HarFileSystem extends Filte
archiveStatus.getOwner(), archiveStatus.getGroup(),
makeRelative(this.uri.toString(), new Path(hstatus.name))));
else
- for (String child: hstatus.children) {
- FileStatus tmp = getFileStatus(new Path(tmpPath, child));
- statuses.add(tmp);
- }
+ fileStatusesInIndex(hstatus, statuses, hstatus.children, archiveStatus);
+
return statuses.toArray(new FileStatus[statuses.size()]);
}