You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by st...@apache.org on 2021/05/28 19:52:03 UTC
[hadoop] branch branch-3.3 updated: MAPREDUCE-7287. Distcp will
delete exists file ,
If we use "-delete and -update" options and distcp file. (#2852)
This is an automated email from the ASF dual-hosted git repository.
stevel pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/branch-3.3 by this push:
new 7feb41b MAPREDUCE-7287. Distcp will delete exists file , If we use "-delete and -update" options and distcp file. (#2852)
7feb41b is described below
commit 7feb41b73d581c4d8943d0f70fc223dacac0e115
Author: zhengchenyu <zh...@gmail.com>
AuthorDate: Sat May 29 03:21:37 2021 +0800
MAPREDUCE-7287. Distcp will delete exists file , If we use "-delete and -update" options and distcp file. (#2852)
Contributed by zhengchenyu
Change-Id: I61edf9a443c0c6cd5b5dd911901708530cf131ed
---
.../apache/hadoop/tools/mapred/CopyCommitter.java | 6 +--
.../tools/contract/AbstractContractDistCpTest.java | 56 ++++++++++++++++++++++
.../hadoop/tools/mapred/TestCopyCommitter.java | 45 +++++++++++++++++
.../apache/hadoop/tools/util/TestDistCpUtils.java | 9 ++++
4 files changed, 111 insertions(+), 5 deletions(-)
diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java
index 33ab3ee..2272781 100644
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java
@@ -553,10 +553,6 @@ public class CopyCommitter extends FileOutputCommitter {
conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH));
List<Path> targets = new ArrayList<>(1);
targets.add(targetFinalPath);
- Path resultNonePath = Path.getPathWithoutSchemeAndAuthority(targetFinalPath)
- .toString().startsWith(DistCpConstants.HDFS_RESERVED_RAW_DIRECTORY_NAME)
- ? DistCpConstants.RAW_NONE_PATH
- : DistCpConstants.NONE_PATH;
//
// Set up options to be the same from the CopyListing.buildListing's
// perspective, so to collect similar listings as when doing the copy
@@ -568,7 +564,7 @@ public class CopyCommitter extends FileOutputCommitter {
conf.getBoolean(DistCpConstants.CONF_LABEL_USE_ITERATOR, false);
LOG.info("Scanning destination directory {} with thread count: {}",
targetFinalPath, threads);
- DistCpOptions options = new DistCpOptions.Builder(targets, resultNonePath)
+ DistCpOptions options = new DistCpOptions.Builder(targets, targetFinalPath)
.withOverwrite(overwrite)
.withSyncFolder(syncFolder)
.withNumListstatusThreads(threads)
diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java
index fab14d1..159338f 100644
--- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java
@@ -709,4 +709,60 @@ public abstract class AbstractContractDistCpTest
Collections.singletonList(srcDir), destDir)
.withDirectWrite(true)));
}
+
+ @Test
+ public void testDistCpWithFile() throws Exception {
+ describe("Distcp only file");
+
+ Path source = new Path(remoteDir, "file");
+ Path dest = new Path(localDir, "file");
+ dest = localFS.makeQualified(dest);
+
+ mkdirs(remoteFS, remoteDir);
+ mkdirs(localFS, localDir);
+
+ int len = 4;
+ int base = 0x40;
+ byte[] block = dataset(len, base, base + len);
+ ContractTestUtils.createFile(remoteFS, source, true, block);
+ verifyPathExists(remoteFS, "", source);
+ verifyPathExists(localFS, "", localDir);
+
+ DistCpTestUtils.assertRunDistCp(DistCpConstants.SUCCESS, source.toString(),
+ dest.toString(), null, conf);
+
+ Assertions
+ .assertThat(RemoteIterators.toList(localFS.listFiles(dest, true)))
+ .describedAs("files").hasSize(1);
+ verifyFileContents(localFS, dest, block);
+ }
+
+ @Test
+ public void testDistCpWithUpdateExistFile() throws Exception {
+ describe("Now update an exist file.");
+
+ Path source = new Path(remoteDir, "file");
+ Path dest = new Path(localDir, "file");
+ dest = localFS.makeQualified(dest);
+
+ mkdirs(remoteFS, remoteDir);
+ mkdirs(localFS, localDir);
+
+ int len = 4;
+ int base = 0x40;
+ byte[] block = dataset(len, base, base + len);
+ byte[] destBlock = dataset(len, base, base + len + 1);
+ ContractTestUtils.createFile(remoteFS, source, true, block);
+ ContractTestUtils.createFile(localFS, dest, true, destBlock);
+
+ verifyPathExists(remoteFS, "", source);
+ verifyPathExists(localFS, "", dest);
+ DistCpTestUtils.assertRunDistCp(DistCpConstants.SUCCESS, source.toString(),
+ dest.toString(), "-delete -update", conf);
+
+ Assertions.assertThat(RemoteIterators.toList(localFS.listFiles(dest, true)))
+ .hasSize(1);
+ verifyFileContents(localFS, dest, block);
+ }
+
}
\ No newline at end of file
diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyCommitter.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyCommitter.java
index 685f030..62940f6 100644
--- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyCommitter.java
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyCommitter.java
@@ -265,6 +265,51 @@ public class TestCopyCommitter {
}
}
+ @Test
+ public void testDeleteMissingWithOnlyFile() throws IOException {
+ TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
+ JobContext jobContext = new JobContextImpl(taskAttemptContext
+ .getConfiguration(), taskAttemptContext.getTaskAttemptID().getJobID());
+ Configuration conf = jobContext.getConfiguration();
+
+ String sourceBase;
+ String targetBase;
+ FileSystem fs = null;
+ try {
+ OutputCommitter committer = new CopyCommitter(null, taskAttemptContext);
+ fs = FileSystem.get(conf);
+ sourceBase = TestDistCpUtils.createTestSetupWithOnlyFile(fs,
+ FsPermission.getDefault());
+ targetBase = TestDistCpUtils.createTestSetupWithOnlyFile(fs,
+ FsPermission.getDefault());
+
+ final DistCpOptions options = new DistCpOptions.Builder(
+ Collections.singletonList(new Path(sourceBase)), new Path(targetBase))
+ .withSyncFolder(true).withDeleteMissing(true).build();
+ options.appendToConf(conf);
+ final DistCpContext context = new DistCpContext(options);
+
+ CopyListing listing = new GlobbedCopyListing(conf, CREDENTIALS);
+ Path listingFile = new Path(sourceBase);
+ listing.buildListing(listingFile, context);
+
+ conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase);
+ conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, targetBase);
+
+ committer.commitJob(jobContext);
+ verifyFoldersAreInSync(fs, targetBase, sourceBase);
+ verifyFoldersAreInSync(fs, sourceBase, targetBase);
+
+ //Test for idempotent commit
+ committer.commitJob(jobContext);
+ verifyFoldersAreInSync(fs, targetBase, sourceBase);
+ verifyFoldersAreInSync(fs, sourceBase, targetBase);
+ } finally {
+ TestDistCpUtils.delete(fs, "/tmp1");
+ conf.set(DistCpConstants.CONF_LABEL_DELETE_MISSING, "false");
+ }
+ }
+
// for HDFS-14621, should preserve times after -delete
@Test
public void testPreserveTimeWithDeleteMiss() throws IOException {
diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestDistCpUtils.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestDistCpUtils.java
index f10dbf5..dd1e65d 100644
--- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestDistCpUtils.java
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestDistCpUtils.java
@@ -1360,6 +1360,15 @@ public class TestDistCpUtils {
return base + "/" + location;
}
+ public static String createTestSetupWithOnlyFile(FileSystem fs,
+ FsPermission perm) throws IOException {
+ String location = String.valueOf(rand.nextLong());
+ fs.mkdirs(new Path("/tmp1/" + location));
+ fs.setPermission(new Path("/tmp1/" + location), perm);
+ createFile(fs, new Path("/tmp1/" + location + "/file"));
+ return "/tmp1/" + location + "/file";
+ }
+
public static void delete(FileSystem fs, String path) {
try {
if (fs != null) {
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org