You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by wh...@apache.org on 2015/04/02 01:51:09 UTC

hadoop git commit: HDFS-8036. Use snapshot path as source when using snapshot diff report in DistCp. Contributed by Jing Zhao.

Repository: hadoop
Updated Branches:
  refs/heads/branch-2 6f753da4a -> 9e114ee60


HDFS-8036. Use snapshot path as source when using snapshot diff report in DistCp. Contributed by Jing Zhao.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/9e114ee6
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/9e114ee6
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/9e114ee6

Branch: refs/heads/branch-2
Commit: 9e114ee6078ce5c427275c1c4d05edd131c7051c
Parents: 6f753da
Author: Haohui Mai <wh...@apache.org>
Authored: Wed Apr 1 16:50:59 2015 -0700
Committer: Haohui Mai <wh...@apache.org>
Committed: Wed Apr 1 16:51:09 2015 -0700

----------------------------------------------------------------------
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt     |  3 ++
 .../org/apache/hadoop/tools/DistCpSync.java     | 21 +++++++++-
 .../hadoop/tools/mapred/CopyCommitter.java      |  3 +-
 .../org/apache/hadoop/tools/TestDistCpSync.java | 40 +++++++++++++++++++-
 4 files changed, 63 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/9e114ee6/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 8139b02..8f9fcd9 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -1041,6 +1041,9 @@ Release 2.7.0 - UNRELEASED
     HDFS-7748. Separate ECN flags from the Status in the DataTransferPipelineAck.
     (Anu Engineer and Haohui Mai via wheat9)
 
+    HDFS-8036. Use snapshot path as source when using snapshot diff report in
+    DistCp. (Jing Zhao via wheat9)
+
     BREAKDOWN OF HDFS-7584 SUBTASKS AND RELATED JIRAS
 
       HDFS-7720. Quota by Storage Type API, tools and ClientNameNode

http://git-wip-us.apache.org/repos/asf/hadoop/blob/9e114ee6/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java
index 26d7eb4..8e71b6f 100644
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java
@@ -22,6 +22,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
+import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport;
 
 import java.io.IOException;
@@ -86,6 +87,22 @@ class DistCpSync {
     } finally {
       deleteTargetTmpDir(targetFs, tmpDir);
       // TODO: since we have tmp directory, we can support "undo" with failures
+      // set the source path using the snapshot path
+      inputOptions.setSourcePaths(Arrays.asList(getSourceSnapshotPath(sourceDir,
+          inputOptions.getToSnapshot())));
+    }
+  }
+
+  private static String getSnapshotName(String name) {
+    return Path.CUR_DIR.equals(name) ? "" : name;
+  }
+
+  private static Path getSourceSnapshotPath(Path sourceDir, String snapshotName) {
+    if (Path.CUR_DIR.equals(snapshotName)) {
+      return sourceDir;
+    } else {
+      return new Path(sourceDir,
+          HdfsConstants.DOT_SNAPSHOT_DIR + Path.SEPARATOR + snapshotName);
     }
   }
 
@@ -136,8 +153,10 @@ class DistCpSync {
   static DiffInfo[] getDiffs(DistCpOptions inputOptions,
       DistributedFileSystem fs, Path sourceDir, Path targetDir) {
     try {
+      final String from = getSnapshotName(inputOptions.getFromSnapshot());
+      final String to = getSnapshotName(inputOptions.getToSnapshot());
       SnapshotDiffReport sourceDiff = fs.getSnapshotDiffReport(sourceDir,
-          inputOptions.getFromSnapshot(), inputOptions.getToSnapshot());
+          from, to);
       return DiffInfo.getDiffs(sourceDiff, targetDir);
     } catch (IOException e) {
       DistCp.LOG.warn("Failed to compute snapshot diff on " + sourceDir, e);

http://git-wip-us.apache.org/repos/asf/hadoop/blob/9e114ee6/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java
index 9ec57f4..2b1e510 100644
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java
@@ -90,8 +90,7 @@ public class CopyCommitter extends FileOutputCommitter {
     }
 
     try {
-      if (conf.getBoolean(DistCpConstants.CONF_LABEL_DELETE_MISSING, false)
-          && !(conf.getBoolean(DistCpConstants.CONF_LABEL_DIFF, false))) {
+      if (conf.getBoolean(DistCpConstants.CONF_LABEL_DELETE_MISSING, false)) {
         deleteMissing(conf);
       } else if (conf.getBoolean(DistCpConstants.CONF_LABEL_ATOMIC_COPY, false)) {
         commitData(conf);

http://git-wip-us.apache.org/repos/asf/hadoop/blob/9e114ee6/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java
index 7d5dad0..75d1de5 100644
--- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java
@@ -24,6 +24,7 @@ import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.io.SequenceFile;
@@ -97,6 +98,8 @@ public class TestDistCpSync {
     dfs.createSnapshot(source, "s2");
     dfs.createSnapshot(target, "s1");
     Assert.assertTrue(DistCpSync.sync(options, conf));
+    // reset source paths in options
+    options.setSourcePaths(Arrays.asList(source));
 
     // changes have been made in target
     final Path subTarget = new Path(target, "sub");
@@ -183,9 +186,21 @@ public class TestDistCpSync {
     changeData(source);
     dfs.createSnapshot(source, "s2");
 
+    // before sync, make some further changes on source. this should not affect
+    // the later distcp since we're copying (s2-s1) to target
+    final Path toDelete = new Path(source, "foo/d1/foo/f1");
+    dfs.delete(toDelete, true);
+    final Path newdir = new Path(source, "foo/d1/foo/newdir");
+    dfs.mkdirs(newdir);
+
     // do the sync
     Assert.assertTrue(DistCpSync.sync(options, conf));
 
+    // make sure the source path has been updated to the snapshot path
+    final Path spath = new Path(source,
+        HdfsConstants.DOT_SNAPSHOT_DIR + Path.SEPARATOR + "s2");
+    Assert.assertEquals(spath, options.getSourcePaths().get(0));
+
     // build copy listing
     final Path listingPath = new Path("/tmp/META/fileList.seq");
     CopyListing listing = new GlobbedCopyListing(conf, new Credentials());
@@ -209,7 +224,7 @@ public class TestDistCpSync {
         .getCounter(CopyMapper.Counter.BYTESCOPIED).getValue());
 
     // verify the source and target now has the same structure
-    verifyCopy(dfs.getFileStatus(source), dfs.getFileStatus(target), false);
+    verifyCopy(dfs.getFileStatus(spath), dfs.getFileStatus(target), false);
   }
 
   private Map<Text, CopyListingFileStatus> getListing(Path listingPath)
@@ -248,6 +263,29 @@ public class TestDistCpSync {
     }
   }
 
+  /**
+   * Similar test with testSync, but the "to" snapshot is specified as "."
+   * @throws Exception
+   */
+  @Test
+  public void testSyncWithCurrent() throws Exception {
+    options.setUseDiff(true, "s1", ".");
+    initData(source);
+    initData(target);
+    dfs.allowSnapshot(source);
+    dfs.allowSnapshot(target);
+    dfs.createSnapshot(source, "s1");
+    dfs.createSnapshot(target, "s1");
+
+    // make changes under source
+    changeData(source);
+
+    // do the sync
+    Assert.assertTrue(DistCpSync.sync(options, conf));
+    // make sure the source path is still unchanged
+    Assert.assertEquals(source, options.getSourcePaths().get(0));
+  }
+
   private void initData2(Path dir) throws Exception {
     final Path test = new Path(dir, "test");
     final Path foo = new Path(dir, "foo");