You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by sz...@apache.org on 2009/05/26 19:45:43 UTC

svn commit: r778811 - in /hadoop/core/trunk: CHANGES.txt src/test/hdfs-with-mr/org/apache/hadoop/fs/TestCopyFiles.java src/tools/org/apache/hadoop/tools/DistCp.java

Author: szetszwo
Date: Tue May 26 17:45:43 2009
New Revision: 778811

URL: http://svn.apache.org/viewvc?rev=778811&view=rev
Log:
HADOOP-5472. Change DistCp to support globbing of input paths.  Contributed by Dhruba Borthakur and Rodrigo Schmidt

Modified:
    hadoop/core/trunk/CHANGES.txt
    hadoop/core/trunk/src/test/hdfs-with-mr/org/apache/hadoop/fs/TestCopyFiles.java
    hadoop/core/trunk/src/tools/org/apache/hadoop/tools/DistCp.java

Modified: hadoop/core/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/CHANGES.txt?rev=778811&r1=778810&r2=778811&view=diff
==============================================================================
--- hadoop/core/trunk/CHANGES.txt (original)
+++ hadoop/core/trunk/CHANGES.txt Tue May 26 17:45:43 2009
@@ -392,6 +392,9 @@
     HADOOP-5438. Provide a single FileSystem method to create or open-for-append 
     to a file.  (He Yongqiang via dhruba)
 
+    HADOOP-5472. Change DistCp to support globbing of input paths.  (Dhruba
+    Borthakur and Rodrigo Schmidt via szetszwo)
+
   OPTIMIZATIONS
 
     HADOOP-5595. NameNode does not need to run a replicator to choose a

Modified: hadoop/core/trunk/src/test/hdfs-with-mr/org/apache/hadoop/fs/TestCopyFiles.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/test/hdfs-with-mr/org/apache/hadoop/fs/TestCopyFiles.java?rev=778811&r1=778810&r2=778811&view=diff
==============================================================================
--- hadoop/core/trunk/src/test/hdfs-with-mr/org/apache/hadoop/fs/TestCopyFiles.java (original)
+++ hadoop/core/trunk/src/test/hdfs-with-mr/org/apache/hadoop/fs/TestCopyFiles.java Tue May 26 17:45:43 2009
@@ -818,6 +818,36 @@
     }
   }
 
+  /** test globbing  */
+  public void testGlobbing() throws Exception {
+    String namenode = null;
+    MiniDFSCluster cluster = null;
+    try {
+      Configuration conf = new Configuration();
+      cluster = new MiniDFSCluster(conf, 2, true, null);
+      final FileSystem hdfs = cluster.getFileSystem();
+      namenode = FileSystem.getDefaultUri(conf).toString();
+      if (namenode.startsWith("hdfs://")) {
+        MyFile[] files = createFiles(URI.create(namenode), "/srcdat");
+        ToolRunner.run(new DistCp(conf), new String[] {
+                                         "-log",
+                                         namenode+"/logs",
+                                         namenode+"/srcdat/*",
+                                         namenode+"/destdat"});
+        assertTrue("Source and destination directories do not match.",
+                   checkFiles(hdfs, "/destdat", files));
+        FileSystem fs = FileSystem.get(URI.create(namenode+"/logs"), conf);
+        assertTrue("Log directory does not exist.",
+                   fs.exists(new Path(namenode+"/logs")));
+        deldir(hdfs, "/destdat");
+        deldir(hdfs, "/srcdat");
+        deldir(hdfs, "/logs");
+      }
+    } finally {
+      if (cluster != null) { cluster.shutdown(); }
+    }
+  }
+  
   static void create(FileSystem fs, Path f) throws IOException {
     FSDataOutputStream out = fs.create(f);
     try {

Modified: hadoop/core/trunk/src/tools/org/apache/hadoop/tools/DistCp.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/tools/org/apache/hadoop/tools/DistCp.java?rev=778811&r1=778810&r2=778811&view=diff
==============================================================================
--- hadoop/core/trunk/src/tools/org/apache/hadoop/tools/DistCp.java (original)
+++ hadoop/core/trunk/src/tools/org/apache/hadoop/tools/DistCp.java Tue May 26 17:45:43 2009
@@ -27,6 +27,7 @@
 import java.util.ArrayList;
 import java.util.EnumSet;
 import java.util.Iterator;
+import java.util.LinkedList;
 import java.util.List;
 import java.util.Random;
 import java.util.Stack;
@@ -616,15 +617,24 @@
   private static void checkSrcPath(Configuration conf, List<Path> srcPaths
       ) throws IOException {
     List<IOException> rslt = new ArrayList<IOException>();
+    List<Path> unglobbed = new LinkedList<Path>(); 
     for (Path p : srcPaths) {
       FileSystem fs = p.getFileSystem(conf);
-      if (!fs.exists(p)) {
+      FileStatus[] inputs = fs.globStatus(p);
+      
+      if(inputs.length > 0) {
+        for (FileStatus onePath: inputs) {
+          unglobbed.add(onePath.getPath());
+        }
+      } else {
         rslt.add(new IOException("Input source " + p + " does not exist."));
       }
     }
     if (!rslt.isEmpty()) {
       throw new InvalidInputException(rslt);
     }
+    srcPaths.clear();
+    srcPaths.addAll(unglobbed);
   }
 
   /**