You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by sz...@apache.org on 2010/02/10 00:36:13 UTC
svn commit: r908283 - in /hadoop/mapreduce/trunk: CHANGES.txt
src/tools/org/apache/hadoop/tools/HadoopArchives.java
Author: szetszwo
Date: Tue Feb 9 23:36:01 2010
New Revision: 908283
URL: http://svn.apache.org/viewvc?rev=908283&view=rev
Log:
MAPREDUCE-1425. Reduce memory usage by archive. Contributed by mahadev
Modified:
hadoop/mapreduce/trunk/CHANGES.txt
hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/HadoopArchives.java
Modified: hadoop/mapreduce/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/CHANGES.txt?rev=908283&r1=908282&r2=908283&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/CHANGES.txt (original)
+++ hadoop/mapreduce/trunk/CHANGES.txt Tue Feb 9 23:36:01 2010
@@ -166,6 +166,8 @@
MAPREDUCE-1470. Move delegation tokens from HDFS to Common so that
MapReduce can use them too. (omalley)
+ MAPREDUCE-1425. Reduce memory usage by archive. (mahadev via szetszwo)
+
OPTIMIZATIONS
MAPREDUCE-270. Fix the tasktracker to optionally send an out-of-band
Modified: hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/HadoopArchives.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/HadoopArchives.java?rev=908283&r1=908282&r2=908283&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/HadoopArchives.java (original)
+++ hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/HadoopArchives.java Tue Feb 9 23:36:01 2010
@@ -127,22 +127,23 @@
/**
* this assumes that there are two types of files file/dir
* @param fs the input filesystem
- * @param p the top level path
+ * @param fdir the filestatusdir of the path
* @param out the list of paths output of recursive ls
* @throws IOException
*/
- private void recursivels(FileSystem fs, Path p, List<FileStatus> out)
+ private void recursivels(FileSystem fs, FileStatusDir fdir, List<FileStatusDir> out)
throws IOException {
- FileStatus fstatus = fs.getFileStatus(p);
- if (!fstatus.isDir()) {
- out.add(fstatus);
+ if (!fdir.getFileStatus().isDir()) {
+ out.add(fdir);
return;
}
else {
- out.add(fstatus);
- FileStatus[] listStatus = fs.listStatus(p);
+ out.add(fdir);
+ FileStatus[] listStatus = fs.listStatus(fdir.getFileStatus().getPath());
+ fdir.setChildren(listStatus);
for (FileStatus stat: listStatus) {
- recursivels(fs, stat.getPath(), out);
+ FileStatusDir fstatDir = new FileStatusDir(stat, null);
+ recursivels(fs, fstatDir, out);
}
}
}
@@ -343,6 +344,50 @@
}
}
}
+
+ /**
+ * A static class that keeps
+ * track of status of a path
+ * and there children if path is a dir
+ */
+ static class FileStatusDir {
+ private FileStatus fstatus;
+ private FileStatus[] children = null;
+
+ /**
+ * constructor for filestatusdir
+ * @param fstatus the filestatus object that maps to filestatusdir
+ * @param children the children list if fs is a directory
+ */
+ FileStatusDir(FileStatus fstatus, FileStatus[] children) {
+ this.fstatus = fstatus;
+ this.children = children;
+ }
+
+ /**
+ * set children of this object
+ * @param listStatus the list of children
+ */
+ public void setChildren(FileStatus[] listStatus) {
+ this.children = listStatus;
+ }
+
+ /**
+ * the filestatus of this object
+ * @return the filestatus of this object
+ */
+ FileStatus getFileStatus() {
+ return this.fstatus;
+ }
+
+ /**
+ * the children list of this object, null if
+ * @return the children list
+ */
+ FileStatus[] getChildren() {
+ return this.children;
+ }
+ }
/**archive the given source paths into
* the dest
@@ -399,15 +444,18 @@
// and then write them to the input file
// one at a time
for (Path src: srcPaths) {
- ArrayList<FileStatus> allFiles = new ArrayList<FileStatus>();
- recursivels(fs, src, allFiles);
- for (FileStatus stat: allFiles) {
+ ArrayList<FileStatusDir> allFiles = new ArrayList<FileStatusDir>();
+ FileStatus fstatus = fs.getFileStatus(src);
+ FileStatusDir fdir = new FileStatusDir(fstatus, null);
+ recursivels(fs, fdir, allFiles);
+ for (FileStatusDir statDir: allFiles) {
+ FileStatus stat = statDir.getFileStatus();
String toWrite = "";
long len = stat.isDir()? 0:stat.getLen();
if (stat.isDir()) {
toWrite = "" + relPathToRoot(stat.getPath(), parentPath) + " dir ";
//get the children
- FileStatus[] list = fs.listStatus(stat.getPath());
+ FileStatus[] list = statDir.getChildren();
StringBuffer sbuff = new StringBuffer();
sbuff.append(toWrite);
for (FileStatus stats: list) {