You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by om...@apache.org on 2011/03/04 04:51:31 UTC

svn commit: r1077203 - /hadoop/common/branches/branch-0.20-security-patches/src/core/org/apache/hadoop/fs/HarFileSystem.java

Author: omalley
Date: Fri Mar  4 03:51:31 2011
New Revision: 1077203

URL: http://svn.apache.org/viewvc?rev=1077203&view=rev
Log:
commit adde1686a98f922348feca8195e388f3cc6a159d
Author: Mahadev Konar <ma...@cdev6022.inktomisearch.com>
Date:   Tue Feb 23 06:57:55 2010 +0000

    HADOOP:6467 from http://issues.apache.org/jira/secure/attachment/12436653/HADOOP-6467-y.0.20-branch-v2.patch
    
    +++ b/YAHOO-CHANGES.txt
    +    HADOOP-6467.  Performance improvement for liststatus on directories in
    +    hadoop archives. (mahadev)
    +

Modified:
    hadoop/common/branches/branch-0.20-security-patches/src/core/org/apache/hadoop/fs/HarFileSystem.java

Modified: hadoop/common/branches/branch-0.20-security-patches/src/core/org/apache/hadoop/fs/HarFileSystem.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/core/org/apache/hadoop/fs/HarFileSystem.java?rev=1077203&r1=1077202&r2=1077203&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/core/org/apache/hadoop/fs/HarFileSystem.java (original)
+++ hadoop/common/branches/branch-0.20-security-patches/src/core/org/apache/hadoop/fs/HarFileSystem.java Fri Mar  4 03:51:31 2011
@@ -324,25 +324,12 @@ public class HarFileSystem extends Filte
   @Override
   public BlockLocation[] getFileBlockLocations(FileStatus file, long start,
       long len) throws IOException {
-    // need to look up the file in the underlying fs
-    // look up the index 
-    
-    // make sure this is a prt of this har filesystem
-    Path p = makeQualified(file.getPath());
-    Path harPath = getPathInHar(p);
-    String line = fileStatusInIndex(harPath);
-    if (line == null)  {
-      throw new FileNotFoundException("File " + file.getPath() + " not found");
-    }
-    HarStatus harStatus = new HarStatus(line);
-    if (harStatus.isDir()) {
-      return new BlockLocation[0];
-    }
-    FileStatus fsFile = fs.getFileStatus(new Path(archivePath,
-        harStatus.getPartName()));
-    BlockLocation[] rawBlocks = fs.getFileBlockLocations(fsFile, 
-        harStatus.getStartIndex() + start, len);
-    return fakeBlockLocations(rawBlocks, harStatus.getStartIndex());
+    // just fake block locations
+    // its fast and simpler
+    // doing various block location manipulation
+    // with part files adds a lot of overhead because 
+    // of the look ups of filestatus in index files
+    return new BlockLocation[]{ new BlockLocation() };
   }
   
   /**
@@ -386,6 +373,63 @@ public class HarFileSystem extends Filte
     public int endHash;
   }
   
+  /**
+   * Get filestatuses of all the children of a given directory. This just reads
+   * through index file and reads line by line to get all statuses for children
+   * of a directory. Its a brute force way of getting all such filestatuses
+   * 
+   * @param parent
+   *          the parent path directory
+   * @param statuses
+   *          the list to add the children filestatuses to
+   * @param children
+   *          the string list of children for this parent
+   * @param archiveIndexStat
+   *          the archive index filestatus
+   */
+  private void fileStatusesInIndex(HarStatus parent, List<FileStatus> statuses,
+      List<String> children, FileStatus archiveIndexStat) throws IOException {
+    // read the index file
+    FSDataInputStream aIn = null;
+    try {
+      aIn = fs.open(archiveIndex);
+      LineReader aLin;
+      long read = 0;
+      aLin = new LineReader(aIn, getConf());
+      String parentString = parent.getName();
+      Path harPath = new Path(parentString);
+      int harlen = harPath.depth();
+      Text line = new Text();
+      while (read < archiveIndexStat.getLen()) {
+        int tmp = aLin.readLine(line);
+        read += tmp;
+        String lineFeed = line.toString();
+        String child = lineFeed.substring(0, lineFeed.indexOf(" "));
+        if ((child.startsWith(parentString))) {
+          Path thisPath = new Path(child);
+          if (thisPath.depth() == harlen + 1) {
+            // bingo!
+            HarStatus hstatus = new HarStatus(lineFeed);
+            FileStatus childStatus = new FileStatus(hstatus.isDir() ? 0
+                : hstatus.getLength(), hstatus.isDir(), (int) archiveIndexStat
+                .getReplication(), archiveIndexStat.getBlockSize(),
+                archiveIndexStat.getModificationTime(), archiveIndexStat
+                    .getAccessTime(), new FsPermission(archiveIndexStat
+                    .getPermission()), archiveIndexStat.getOwner(),
+                archiveIndexStat.getGroup(), makeRelative(this.uri.toString(),
+                    new Path(hstatus.name)));
+            statuses.add(childStatus);
+          }
+          line.clear();
+        }
+      }
+    } finally {
+      if (aIn != null) {
+        aIn.close();
+      }
+    }
+  }
+  
   // make sure that this harPath is relative to the har filesystem
   // this only works for relative paths. This returns the line matching
   // the file in the index. Returns a null if there is not matching 
@@ -649,10 +693,8 @@ public class HarFileSystem extends Filte
             archiveStatus.getOwner(), archiveStatus.getGroup(), 
             makeRelative(this.uri.toString(), new Path(hstatus.name))));
     else 
-      for (String child: hstatus.children) {
-        FileStatus tmp = getFileStatus(new Path(tmpPath, child));
-        statuses.add(tmp);
-      }
+      fileStatusesInIndex(hstatus, statuses, hstatus.children, archiveStatus);
+    
     return statuses.toArray(new FileStatus[statuses.size()]);
   }