You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by cd...@apache.org on 2010/02/13 11:11:21 UTC

svn commit: r909783 - in /hadoop/mapreduce/trunk: CHANGES.txt src/test/mapred/org/apache/hadoop/tools/TestCopyFiles.java src/tools/org/apache/hadoop/tools/DistCp.java

Author: cdouglas
Date: Sat Feb 13 10:11:20 2010
New Revision: 909783

URL: http://svn.apache.org/viewvc?rev=909783&view=rev
Log:
MAPREDUCE-1305. Improve efficiency of distcp -delete. Contributed by Peter Romianowski

Modified:
    hadoop/mapreduce/trunk/CHANGES.txt
    hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/TestCopyFiles.java
    hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/DistCp.java

Modified: hadoop/mapreduce/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/CHANGES.txt?rev=909783&r1=909782&r2=909783&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/CHANGES.txt (original)
+++ hadoop/mapreduce/trunk/CHANGES.txt Sat Feb 13 10:11:20 2010
@@ -326,6 +326,9 @@
     MAPREDUCE-1469. Sqoop should disable speculative execution in export.
     (Aaron Kimball via tomwhite)
 
+    MAPREDUCE-1305. Improve efficiency of distcp -delete. (Peter Romianowski
+    via cdouglas)
+
 Release 0.21.0 - Unreleased
 
   INCOMPATIBLE CHANGES

Modified: hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/TestCopyFiles.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/TestCopyFiles.java?rev=909783&r1=909782&r2=909783&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/TestCopyFiles.java (original)
+++ hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/TestCopyFiles.java Sat Feb 13 10:11:20 2010
@@ -951,6 +951,7 @@
   /** test -delete */
   public void testDelete() throws Exception {
     final Configuration conf = new Configuration();
+    conf.setInt("fs.trash.interval", 60);
     MiniDFSCluster cluster = null;
     try {
       cluster = new MiniDFSCluster(conf, 2, true, null);
@@ -1001,6 +1002,12 @@
         dstresults = removePrefix(dstresults, dstrootdir);
         System.out.println("second dstresults=" +  dstresults);
         assertEquals(srcresults, dstresults);
+        // verify that files removed in -delete were moved to the trash
+        // regrettably, this test will break if Trash changes incompatibly
+        assertTrue(fs.exists(new Path(fs.getHomeDirectory(),
+                ".Trash/Current" + dstrootdir + "/foo")));
+        assertTrue(fs.exists(new Path(fs.getHomeDirectory(),
+                ".Trash/Current" + dstrootdir + "/foobar")));
 
         //cleanup
         deldir(fs, dstrootdir);

Modified: hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/DistCp.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/DistCp.java?rev=909783&r1=909782&r2=909783&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/DistCp.java (original)
+++ hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/DistCp.java Sat Feb 13 10:11:20 2010
@@ -48,12 +48,14 @@
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FsShell;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.Trash;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hdfs.protocol.QuotaExceededException;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.mapred.FileOutputFormat;
@@ -1571,7 +1573,7 @@
     //write dst lsr results
     final Path dstlsr = new Path(jobdir, "_distcp_dst_lsr");
     final SequenceFile.Writer writer = SequenceFile.createWriter(jobfs, jobconf,
-        dstlsr, Text.class, dstroot.getClass(),
+        dstlsr, Text.class, NullWritable.class,
         SequenceFile.CompressionType.NONE);
     try {
       //do lsr to get all file statuses in dstroot
@@ -1581,7 +1583,7 @@
         if (status.isDir()) {
           for(FileStatus child : dstfs.listStatus(status.getPath())) {
             String relative = makeRelative(dstroot.getPath(), child.getPath());
-            writer.append(new Text(relative), child);
+            writer.append(new Text(relative), NullWritable.get());
             lsrstack.push(child);
           }
         }
@@ -1593,7 +1595,7 @@
     //sort lsr results
     final Path sortedlsr = new Path(jobdir, "_distcp_dst_lsr_sorted");
     SequenceFile.Sorter sorter = new SequenceFile.Sorter(jobfs,
-        new Text.Comparator(), Text.class, FileStatus.class, jobconf);
+        new Text.Comparator(), Text.class, NullWritable.class, jobconf);
     sorter.sort(dstlsr, sortedlsr);
 
     //compare lsr list and dst list  
@@ -1606,16 +1608,15 @@
 
       //compare sorted lsr list and sorted dst list
       final Text lsrpath = new Text();
-      final FileStatus lsrstatus = new FileStatus();
       final Text dstpath = new Text();
       final Text dstfrom = new Text();
-      final FsShell shell = new FsShell(conf);
-      final String[] shellargs = {"-rmr", null};
+      final Trash trash = new Trash(dstfs, conf);
+      Path lastpath = null;
 
       boolean hasnext = dstin.next(dstpath, dstfrom);
-      for(; lsrin.next(lsrpath, lsrstatus); ) {
+      while (lsrin.next(lsrpath, NullWritable.get())) {
         int dst_cmp_lsr = dstpath.compareTo(lsrpath);
-        for(; hasnext && dst_cmp_lsr < 0; ) {
+        while (hasnext && dst_cmp_lsr < 0) {
           hasnext = dstin.next(dstpath, dstfrom);
           dst_cmp_lsr = dstpath.compareTo(lsrpath);
         }
@@ -1623,23 +1624,15 @@
         if (dst_cmp_lsr == 0) {
           //lsrpath exists in dst, skip it
           hasnext = dstin.next(dstpath, dstfrom);
-        }
-        else {
+        } else {
           //lsrpath does not exist, delete it
-          String s = new Path(dstroot.getPath(), lsrpath.toString()).toString();
+          final Path rmpath = new Path(dstroot.getPath(), lsrpath.toString());
           ++deletedPathsCount;
-          if (shellargs[1] == null || !isAncestorPath(shellargs[1], s)) {
-            shellargs[1] = s;
-            int r = 0;
-            try {
-               r = shell.run(shellargs);
-            } catch(Exception e) {
-              throw new IOException("Exception from shell.", e);
-            }
-            if (r != 0) {
-              throw new IOException("\"" + shellargs[0] + " " + shellargs[1]
-                  + "\" returns non-zero value " + r);
+          if ((lastpath == null || !isAncestorPath(lastpath, rmpath))) {
+            if (!(trash.moveToTrash(rmpath) || dstfs.delete(rmpath, true))) {
+              throw new IOException("Failed to delete " + rmpath);
             }
+            lastpath = rmpath;
           }
         }
       }
@@ -1651,7 +1644,9 @@
   }
 
   //is x an ancestor path of y?
-  static private boolean isAncestorPath(String x, String y) {
+  static private boolean isAncestorPath(Path xp, Path yp) {
+    final String x = xp.toString();
+    final String y = yp.toString();
     if (!y.startsWith(x)) {
       return false;
     }