You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by sz...@apache.org on 2010/02/10 00:36:13 UTC

svn commit: r908283 - in /hadoop/mapreduce/trunk: CHANGES.txt src/tools/org/apache/hadoop/tools/HadoopArchives.java

Author: szetszwo
Date: Tue Feb  9 23:36:01 2010
New Revision: 908283

URL: http://svn.apache.org/viewvc?rev=908283&view=rev
Log:
MAPREDUCE-1425. Reduce memory usage by archive. Contributed by mahadev

Modified:
    hadoop/mapreduce/trunk/CHANGES.txt
    hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/HadoopArchives.java

Modified: hadoop/mapreduce/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/CHANGES.txt?rev=908283&r1=908282&r2=908283&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/CHANGES.txt (original)
+++ hadoop/mapreduce/trunk/CHANGES.txt Tue Feb  9 23:36:01 2010
@@ -166,6 +166,8 @@
     MAPREDUCE-1470. Move delegation tokens from HDFS to Common so that 
     MapReduce can use them too. (omalley)
 
+    MAPREDUCE-1425. Reduce memory usage by archive. (mahadev via szetszwo)
+
   OPTIMIZATIONS
 
     MAPREDUCE-270. Fix the tasktracker to optionally send an out-of-band

Modified: hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/HadoopArchives.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/HadoopArchives.java?rev=908283&r1=908282&r2=908283&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/HadoopArchives.java (original)
+++ hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/HadoopArchives.java Tue Feb  9 23:36:01 2010
@@ -127,22 +127,23 @@
   /**
    * this assumes that there are two types of files file/dir
    * @param fs the input filesystem
-   * @param p the top level path 
+   * @param fdir the filestatusdir of the path  
    * @param out the list of paths output of recursive ls
    * @throws IOException
    */
-  private void recursivels(FileSystem fs, Path p, List<FileStatus> out) 
+  private void recursivels(FileSystem fs, FileStatusDir fdir, List<FileStatusDir> out) 
   throws IOException {
-    FileStatus fstatus = fs.getFileStatus(p);
-    if (!fstatus.isDir()) {
-      out.add(fstatus);
+    if (!fdir.getFileStatus().isDir()) {
+      out.add(fdir);
       return;
     }
     else {
-      out.add(fstatus);
-      FileStatus[] listStatus = fs.listStatus(p);
+      out.add(fdir);
+      FileStatus[] listStatus = fs.listStatus(fdir.getFileStatus().getPath());
+      fdir.setChildren(listStatus);
       for (FileStatus stat: listStatus) {
-        recursivels(fs, stat.getPath(), out);
+        FileStatusDir fstatDir = new FileStatusDir(stat, null);
+        recursivels(fs, fstatDir, out);
       }
     }
   }
@@ -343,6 +344,50 @@
       }
     }
   }
+    
+  /**
+   * A static class that keeps
+   * track of status of a path 
+   * and there children if path is a dir
+   */
+  static class FileStatusDir {
+    private FileStatus fstatus;
+    private FileStatus[] children = null;
+    
+    /**
+     * constructor for filestatusdir
+     * @param fstatus the filestatus object that maps to filestatusdir
+     * @param children the children list if fs is a directory
+     */
+    FileStatusDir(FileStatus fstatus, FileStatus[] children) {
+      this.fstatus  = fstatus;
+      this.children = children;
+    }
+    
+    /**
+     * set children of this object
+     * @param listStatus the list of children
+     */
+    public void setChildren(FileStatus[] listStatus) {
+      this.children = listStatus;
+    }
+
+    /**
+     * the filestatus of this object
+     * @return the filestatus of this object
+     */
+    FileStatus getFileStatus() {
+      return this.fstatus;
+    }
+    
+    /**
+     * the children list of this object, null if  
+     * @return the children list
+     */
+    FileStatus[] getChildren() {
+      return this.children;
+    }
+  }
   
   /**archive the given source paths into
    * the dest
@@ -399,15 +444,18 @@
       // and then write them to the input file 
       // one at a time
       for (Path src: srcPaths) {
-        ArrayList<FileStatus> allFiles = new ArrayList<FileStatus>();
-        recursivels(fs, src, allFiles);
-        for (FileStatus stat: allFiles) {
+        ArrayList<FileStatusDir> allFiles = new ArrayList<FileStatusDir>();
+        FileStatus fstatus = fs.getFileStatus(src);
+        FileStatusDir fdir = new FileStatusDir(fstatus, null);
+        recursivels(fs, fdir, allFiles);
+        for (FileStatusDir statDir: allFiles) {
+          FileStatus stat = statDir.getFileStatus();
           String toWrite = "";
           long len = stat.isDir()? 0:stat.getLen();
           if (stat.isDir()) {
             toWrite = "" + relPathToRoot(stat.getPath(), parentPath) + " dir ";
             //get the children 
-            FileStatus[] list = fs.listStatus(stat.getPath());
+            FileStatus[] list = statDir.getChildren();
             StringBuffer sbuff = new StringBuffer();
             sbuff.append(toWrite);
             for (FileStatus stats: list) {