You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by zh...@apache.org on 2015/04/02 20:09:42 UTC
[04/50] [abbrv] hadoop git commit: HDFS-8036. Use snapshot path as
source when using snapshot diff report in DistCp. Contributed by Jing Zhao.
HDFS-8036. Use snapshot path as source when using snapshot diff report in DistCp. Contributed by Jing Zhao.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/75cb1d42
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/75cb1d42
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/75cb1d42
Branch: refs/heads/HDFS-7285
Commit: 75cb1d42abec54ef5484636e020949ceebe189e9
Parents: 3c7adaa
Author: Haohui Mai <wh...@apache.org>
Authored: Wed Apr 1 16:50:59 2015 -0700
Committer: Haohui Mai <wh...@apache.org>
Committed: Wed Apr 1 16:50:59 2015 -0700
----------------------------------------------------------------------
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 ++
.../org/apache/hadoop/tools/DistCpSync.java | 21 +++++++++-
.../hadoop/tools/mapred/CopyCommitter.java | 3 +-
.../org/apache/hadoop/tools/TestDistCpSync.java | 40 +++++++++++++++++++-
4 files changed, 63 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/75cb1d42/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index f265ead..1d9e200 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -1345,6 +1345,9 @@ Release 2.7.0 - UNRELEASED
HDFS-7748. Separate ECN flags from the Status in the DataTransferPipelineAck.
(Anu Engineer and Haohui Mai via wheat9)
+ HDFS-8036. Use snapshot path as source when using snapshot diff report in
+ DistCp. (Jing Zhao via wheat9)
+
BREAKDOWN OF HDFS-7584 SUBTASKS AND RELATED JIRAS
HDFS-7720. Quota by Storage Type API, tools and ClientNameNode
http://git-wip-us.apache.org/repos/asf/hadoop/blob/75cb1d42/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java
index 26d7eb4..8e71b6f 100644
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java
@@ -22,6 +22,7 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
+import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport;
import java.io.IOException;
@@ -86,6 +87,22 @@ class DistCpSync {
} finally {
deleteTargetTmpDir(targetFs, tmpDir);
// TODO: since we have tmp directory, we can support "undo" with failures
+ // set the source path using the snapshot path
+ inputOptions.setSourcePaths(Arrays.asList(getSourceSnapshotPath(sourceDir,
+ inputOptions.getToSnapshot())));
+ }
+ }
+
+ private static String getSnapshotName(String name) {
+ return Path.CUR_DIR.equals(name) ? "" : name;
+ }
+
+ private static Path getSourceSnapshotPath(Path sourceDir, String snapshotName) {
+ if (Path.CUR_DIR.equals(snapshotName)) {
+ return sourceDir;
+ } else {
+ return new Path(sourceDir,
+ HdfsConstants.DOT_SNAPSHOT_DIR + Path.SEPARATOR + snapshotName);
}
}
@@ -136,8 +153,10 @@ class DistCpSync {
static DiffInfo[] getDiffs(DistCpOptions inputOptions,
DistributedFileSystem fs, Path sourceDir, Path targetDir) {
try {
+ final String from = getSnapshotName(inputOptions.getFromSnapshot());
+ final String to = getSnapshotName(inputOptions.getToSnapshot());
SnapshotDiffReport sourceDiff = fs.getSnapshotDiffReport(sourceDir,
- inputOptions.getFromSnapshot(), inputOptions.getToSnapshot());
+ from, to);
return DiffInfo.getDiffs(sourceDiff, targetDir);
} catch (IOException e) {
DistCp.LOG.warn("Failed to compute snapshot diff on " + sourceDir, e);
http://git-wip-us.apache.org/repos/asf/hadoop/blob/75cb1d42/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java
index 9ec57f4..2b1e510 100644
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java
@@ -90,8 +90,7 @@ public class CopyCommitter extends FileOutputCommitter {
}
try {
- if (conf.getBoolean(DistCpConstants.CONF_LABEL_DELETE_MISSING, false)
- && !(conf.getBoolean(DistCpConstants.CONF_LABEL_DIFF, false))) {
+ if (conf.getBoolean(DistCpConstants.CONF_LABEL_DELETE_MISSING, false)) {
deleteMissing(conf);
} else if (conf.getBoolean(DistCpConstants.CONF_LABEL_ATOMIC_COPY, false)) {
commitData(conf);
http://git-wip-us.apache.org/repos/asf/hadoop/blob/75cb1d42/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java
index 7d5dad0..75d1de5 100644
--- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCpSync.java
@@ -24,6 +24,7 @@ import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.SequenceFile;
@@ -97,6 +98,8 @@ public class TestDistCpSync {
dfs.createSnapshot(source, "s2");
dfs.createSnapshot(target, "s1");
Assert.assertTrue(DistCpSync.sync(options, conf));
+ // reset source paths in options
+ options.setSourcePaths(Arrays.asList(source));
// changes have been made in target
final Path subTarget = new Path(target, "sub");
@@ -183,9 +186,21 @@ public class TestDistCpSync {
changeData(source);
dfs.createSnapshot(source, "s2");
+ // before sync, make some further changes on source. this should not affect
+ // the later distcp since we're copying (s2-s1) to target
+ final Path toDelete = new Path(source, "foo/d1/foo/f1");
+ dfs.delete(toDelete, true);
+ final Path newdir = new Path(source, "foo/d1/foo/newdir");
+ dfs.mkdirs(newdir);
+
// do the sync
Assert.assertTrue(DistCpSync.sync(options, conf));
+ // make sure the source path has been updated to the snapshot path
+ final Path spath = new Path(source,
+ HdfsConstants.DOT_SNAPSHOT_DIR + Path.SEPARATOR + "s2");
+ Assert.assertEquals(spath, options.getSourcePaths().get(0));
+
// build copy listing
final Path listingPath = new Path("/tmp/META/fileList.seq");
CopyListing listing = new GlobbedCopyListing(conf, new Credentials());
@@ -209,7 +224,7 @@ public class TestDistCpSync {
.getCounter(CopyMapper.Counter.BYTESCOPIED).getValue());
// verify the source and target now has the same structure
- verifyCopy(dfs.getFileStatus(source), dfs.getFileStatus(target), false);
+ verifyCopy(dfs.getFileStatus(spath), dfs.getFileStatus(target), false);
}
private Map<Text, CopyListingFileStatus> getListing(Path listingPath)
@@ -248,6 +263,29 @@ public class TestDistCpSync {
}
}
+ /**
+ * Similar test with testSync, but the "to" snapshot is specified as "."
+ * @throws Exception
+ */
+ @Test
+ public void testSyncWithCurrent() throws Exception {
+ options.setUseDiff(true, "s1", ".");
+ initData(source);
+ initData(target);
+ dfs.allowSnapshot(source);
+ dfs.allowSnapshot(target);
+ dfs.createSnapshot(source, "s1");
+ dfs.createSnapshot(target, "s1");
+
+ // make changes under source
+ changeData(source);
+
+ // do the sync
+ Assert.assertTrue(DistCpSync.sync(options, conf));
+ // make sure the source path is still unchanged
+ Assert.assertEquals(source, options.getSourcePaths().get(0));
+ }
+
private void initData2(Path dir) throws Exception {
final Path test = new Path(dir, "test");
final Path foo = new Path(dir, "foo");