You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by bo...@apache.org on 2012/03/06 23:44:27 UTC

svn commit: r1297771 - in /hadoop/common/trunk/hadoop-common-project/hadoop-common: ./ src/main/java/org/apache/hadoop/fs/shell/ src/test/java/org/apache/hadoop/fs/

Author: bobby
Date: Tue Mar  6 22:44:26 2012
New Revision: 1297771

URL: http://svn.apache.org/viewvc?rev=1297771&view=rev
Log:
HADOOP-8140. dfs -getmerge should process its argments better (Daryn Sharp via bobby)

Modified:
    hadoop/common/trunk/hadoop-common-project/hadoop-common/CHANGES.txt
    hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CopyCommands.java
    hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/PathData.java
    hadoop/common/trunk/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellCopy.java

Modified: hadoop/common/trunk/hadoop-common-project/hadoop-common/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/CHANGES.txt?rev=1297771&r1=1297770&r2=1297771&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-common-project/hadoop-common/CHANGES.txt (original)
+++ hadoop/common/trunk/hadoop-common-project/hadoop-common/CHANGES.txt Tue Mar  6 22:44:26 2012
@@ -345,6 +345,9 @@ Release 0.23.2 - UNRELEASED 
     HADOOP-8064. Remove unnecessary dependency on w3c.org in document processing
     (Khiwal Lee via bobby)
 
+    HADOOP-8140. dfs -getmerge should process its argments better (Daryn Sharp
+    via bobby)
+
 Release 0.23.1 - 2012-02-17 
 
   INCOMPATIBLE CHANGES

Modified: hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CopyCommands.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CopyCommands.java?rev=1297771&r1=1297770&r2=1297771&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CopyCommands.java (original)
+++ hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CopyCommands.java Tue Mar  6 22:44:26 2012
@@ -25,7 +25,10 @@ import java.util.List;
 
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
-import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.shell.PathExceptions.PathIsDirectoryException;
+import org.apache.hadoop.io.IOUtils;
 
 /** Various commands for copy files */
 @InterfaceAudience.Private
@@ -53,21 +56,69 @@ class CopyCommands {  
 
     protected PathData dst = null;
     protected String delimiter = null;
+    protected List<PathData> srcs = null;
 
     @Override
     protected void processOptions(LinkedList<String> args) throws IOException {
-      CommandFormat cf = new CommandFormat(2, 3, "nl");
+      CommandFormat cf = new CommandFormat(2, Integer.MAX_VALUE, "nl");
       cf.parse(args);
 
       delimiter = cf.getOpt("nl") ? "\n" : null;
 
       dst = new PathData(new File(args.removeLast()), getConf());
+      if (dst.exists && dst.stat.isDirectory()) {
+        throw new PathIsDirectoryException(dst.toString());
+      }
+      srcs = new LinkedList<PathData>();
+    }
+
+    @Override
+    protected void processArguments(LinkedList<PathData> items)
+    throws IOException {
+      super.processArguments(items);
+      if (exitCode != 0) { // check for error collecting paths
+        return;
+      }
+      FSDataOutputStream out = dst.fs.create(dst.path);
+      try {
+        FSDataInputStream in = null;
+        for (PathData src : srcs) {
+          try {
+            in = src.fs.open(src.path);
+            IOUtils.copyBytes(in, out, getConf(), false);
+            if (delimiter != null) {
+              out.write(delimiter.getBytes("UTF-8"));
+            }
+          } finally {
+            in.close();
+          }
+        }
+      } finally {
+        out.close();
+      }      
+    }
+ 
+    @Override
+    protected void processNonexistentPath(PathData item) throws IOException {
+      exitCode = 1; // flag that a path is bad
+      super.processNonexistentPath(item);
     }
 
+    // this command is handled a bit differently than others.  the paths
+    // are batched up instead of actually being processed.  this avoids
+    // unnecessarily streaming into the merge file and then encountering
+    // a path error that should abort the merge
+    
     @Override
     protected void processPath(PathData src) throws IOException {
-      FileUtil.copyMerge(src.fs, src.path,
-          dst.fs, dst.path, false, getConf(), delimiter);
+      // for directories, recurse one level to get its files, else skip it
+      if (src.stat.isDirectory()) {
+        if (getDepth() == 0) {
+          recursePath(src);
+        } // skip subdirs
+      } else {
+        srcs.add(src);
+      }
     }
   }
 

Modified: hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/PathData.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/PathData.java?rev=1297771&r1=1297770&r2=1297771&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/PathData.java (original)
+++ hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/PathData.java Tue Mar  6 22:44:26 2012
@@ -23,6 +23,7 @@ import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.net.URI;
 import java.net.URISyntaxException;
+import java.util.Arrays;
 
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
@@ -35,6 +36,7 @@ import org.apache.hadoop.fs.shell.PathEx
 import org.apache.hadoop.fs.shell.PathExceptions.PathIsDirectoryException;
 import org.apache.hadoop.fs.shell.PathExceptions.PathIsNotDirectoryException;
 import org.apache.hadoop.fs.shell.PathExceptions.PathNotFoundException;
+import org.apache.hadoop.io.Writable;
 
 /**
  * Encapsulates a Path (path), its FileStatus (stat), and its FileSystem (fs).
@@ -43,7 +45,7 @@ import org.apache.hadoop.fs.shell.PathEx
 @InterfaceAudience.Private
 @InterfaceStability.Unstable
 
-public class PathData {
+public class PathData implements Comparable<PathData> {
   protected final URI uri;
   public final FileSystem fs;
   public final Path path;
@@ -210,6 +212,7 @@ public class PathData {
       String child = getStringForChildPath(stats[i].getPath());
       items[i] = new PathData(fs, child, stats[i]);
     }
+    Arrays.sort(items);
     return items;
   }
 
@@ -303,6 +306,7 @@ public class PathData {
         items[i++] = new PathData(fs, globMatch, stat);
       }
     }
+    Arrays.sort(items);
     return items;
   }
 
@@ -446,4 +450,20 @@ public class PathData {
     }
   }
 
+  @Override
+  public int compareTo(PathData o) {
+    return path.compareTo(((PathData)o).path);
+  }
+  
+  @Override
+  public boolean equals(Object o) {
+    return (o != null) &&
+           (o instanceof PathData) &&
+           path.equals(((PathData)o).path);
+  }
+  
+  @Override
+  public int hashCode() {
+    return path.hashCode();
+  }
 }

Modified: hadoop/common/trunk/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellCopy.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellCopy.java?rev=1297771&r1=1297770&r2=1297771&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellCopy.java (original)
+++ hadoop/common/trunk/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellCopy.java Tue Mar  6 22:44:26 2012
@@ -225,6 +225,110 @@ public class TestFsShellCopy {  
     assertEquals(exitCode, gotExit);
   }
   
+  @Test
+  public void testCopyMerge() throws Exception {
+    Path root = new Path(testRootDir, "TestMerge");
+    Path f1 = new Path(root, "f1");
+    Path f2 = new Path(root, "f2");
+    Path f3 = new Path(root, "f3");
+    Path fnf = new Path(root, "fnf");
+    Path d = new Path(root, "dir");
+    Path df1 = new Path(d, "df1");
+    Path df2 = new Path(d, "df2");
+    Path df3 = new Path(d, "df3");
+    
+    createFile(f1, f2, f3, df1, df2, df3);
+    
+    int exit;
+    // one file, kind of silly
+    exit = shell.run(new String[]{
+        "-getmerge",
+        f1.toString(),
+        "out" });
+    assertEquals(0, exit);
+    assertEquals("f1", readFile("out"));
+
+    exit = shell.run(new String[]{
+        "-getmerge",
+        fnf.toString(),
+        "out" });
+    assertEquals(1, exit);
+    assertFalse(lfs.exists(new Path("out")));
+
+    // two files
+    exit = shell.run(new String[]{
+        "-getmerge",
+        f1.toString(), f2.toString(),
+        "out" });
+    assertEquals(0, exit);
+    assertEquals("f1f2", readFile("out"));
+
+    // two files, preserves order
+    exit = shell.run(new String[]{
+        "-getmerge",
+        f2.toString(), f1.toString(),
+        "out" });
+    assertEquals(0, exit);
+    assertEquals("f2f1", readFile("out"));
+
+    // two files
+    exit = shell.run(new String[]{
+        "-getmerge", "-nl",
+        f1.toString(), f2.toString(),
+        "out" });
+    assertEquals(0, exit);
+    assertEquals("f1\nf2\n", readFile("out"));
+
+    // glob three files
+    shell.run(new String[]{
+        "-getmerge", "-nl",
+        new Path(root, "f*").toString(),
+        "out" });
+    assertEquals(0, exit);
+    assertEquals("f1\nf2\nf3\n", readFile("out"));
+
+    // directory with 3 files, should skip subdir
+    shell.run(new String[]{
+        "-getmerge", "-nl",
+        root.toString(),
+        "out" });
+    assertEquals(0, exit);
+    assertEquals("f1\nf2\nf3\n", readFile("out"));
+
+    // subdir
+    shell.run(new String[]{
+        "-getmerge", "-nl",
+        d.toString(), "out"});
+    assertEquals(0, exit);
+    assertEquals("df1\ndf2\ndf3\n", readFile("out"));
+
+    // file, dir, file
+    shell.run(new String[]{
+        "-getmerge", "-nl",
+        f1.toString(), d.toString(), f2.toString(), "out" });
+    assertEquals(0, exit);
+    assertEquals("f1\ndf1\ndf2\ndf3\nf2\n", readFile("out"));
+  }
+  
+  private void createFile(Path ... paths) throws IOException {
+    for (Path path : paths) {
+      FSDataOutputStream out = lfs.create(path);
+      out.write(path.getName().getBytes());
+      out.close();
+    }
+  }
+  
+  private String readFile(String out) throws IOException {
+    Path path = new Path(out);
+    FileStatus stat = lfs.getFileStatus(path);
+    FSDataInputStream in = lfs.open(path);
+    byte[] buffer = new byte[(int)stat.getLen()];
+    in.readFully(buffer);
+    in.close();
+    lfs.delete(path, false);
+    return new String(buffer);
+  }
+  
   // path handles "." rather oddly
   private String pathAsString(Path p) {
     String s = (p == null) ? Path.CUR_DIR : p.toString();