You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by st...@apache.org on 2021/05/28 19:52:03 UTC

[hadoop] branch branch-3.3 updated: MAPREDUCE-7287. Distcp will delete exists file , If we use "-delete and -update" options and distcp file. (#2852)

This is an automated email from the ASF dual-hosted git repository.

stevel pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/branch-3.3 by this push:
     new 7feb41b  MAPREDUCE-7287. Distcp will delete exists file , If we use "-delete and -update" options and distcp file. (#2852)
7feb41b is described below

commit 7feb41b73d581c4d8943d0f70fc223dacac0e115
Author: zhengchenyu <zh...@gmail.com>
AuthorDate: Sat May 29 03:21:37 2021 +0800

    MAPREDUCE-7287. Distcp will delete exists file , If we use "-delete and -update" options and distcp file. (#2852)
    
    Contributed by zhengchenyu
    
    Change-Id: I61edf9a443c0c6cd5b5dd911901708530cf131ed
---
 .../apache/hadoop/tools/mapred/CopyCommitter.java  |  6 +--
 .../tools/contract/AbstractContractDistCpTest.java | 56 ++++++++++++++++++++++
 .../hadoop/tools/mapred/TestCopyCommitter.java     | 45 +++++++++++++++++
 .../apache/hadoop/tools/util/TestDistCpUtils.java  |  9 ++++
 4 files changed, 111 insertions(+), 5 deletions(-)

diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java
index 33ab3ee..2272781 100644
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java
@@ -553,10 +553,6 @@ public class CopyCommitter extends FileOutputCommitter {
         conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH));
     List<Path> targets = new ArrayList<>(1);
     targets.add(targetFinalPath);
-    Path resultNonePath = Path.getPathWithoutSchemeAndAuthority(targetFinalPath)
-        .toString().startsWith(DistCpConstants.HDFS_RESERVED_RAW_DIRECTORY_NAME)
-        ? DistCpConstants.RAW_NONE_PATH
-        : DistCpConstants.NONE_PATH;
     //
     // Set up options to be the same from the CopyListing.buildListing's
     // perspective, so to collect similar listings as when doing the copy
@@ -568,7 +564,7 @@ public class CopyCommitter extends FileOutputCommitter {
         conf.getBoolean(DistCpConstants.CONF_LABEL_USE_ITERATOR, false);
     LOG.info("Scanning destination directory {} with thread count: {}",
         targetFinalPath, threads);
-    DistCpOptions options = new DistCpOptions.Builder(targets, resultNonePath)
+    DistCpOptions options = new DistCpOptions.Builder(targets, targetFinalPath)
         .withOverwrite(overwrite)
         .withSyncFolder(syncFolder)
         .withNumListstatusThreads(threads)
diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java
index fab14d1..159338f 100644
--- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java
@@ -709,4 +709,60 @@ public abstract class AbstractContractDistCpTest
                     Collections.singletonList(srcDir), destDir)
                     .withDirectWrite(true)));
   }
+
+  @Test
+  public void testDistCpWithFile() throws Exception {
+    describe("Distcp only file");
+
+    Path source = new Path(remoteDir, "file");
+    Path dest = new Path(localDir, "file");
+    dest = localFS.makeQualified(dest);
+
+    mkdirs(remoteFS, remoteDir);
+    mkdirs(localFS, localDir);
+
+    int len = 4;
+    int base = 0x40;
+    byte[] block = dataset(len, base, base + len);
+    ContractTestUtils.createFile(remoteFS, source, true, block);
+    verifyPathExists(remoteFS, "", source);
+    verifyPathExists(localFS, "", localDir);
+
+    DistCpTestUtils.assertRunDistCp(DistCpConstants.SUCCESS, source.toString(),
+        dest.toString(), null, conf);
+
+    Assertions
+        .assertThat(RemoteIterators.toList(localFS.listFiles(dest, true)))
+        .describedAs("files").hasSize(1);
+    verifyFileContents(localFS, dest, block);
+  }
+
+  @Test
+  public void testDistCpWithUpdateExistFile() throws Exception {
+    describe("Now update an exist file.");
+
+    Path source = new Path(remoteDir, "file");
+    Path dest = new Path(localDir, "file");
+    dest = localFS.makeQualified(dest);
+
+    mkdirs(remoteFS, remoteDir);
+    mkdirs(localFS, localDir);
+
+    int len = 4;
+    int base = 0x40;
+    byte[] block = dataset(len, base, base + len);
+    byte[] destBlock = dataset(len, base, base + len + 1);
+    ContractTestUtils.createFile(remoteFS, source, true, block);
+    ContractTestUtils.createFile(localFS, dest, true, destBlock);
+
+    verifyPathExists(remoteFS, "", source);
+    verifyPathExists(localFS, "", dest);
+    DistCpTestUtils.assertRunDistCp(DistCpConstants.SUCCESS, source.toString(),
+        dest.toString(), "-delete -update", conf);
+
+    Assertions.assertThat(RemoteIterators.toList(localFS.listFiles(dest, true)))
+        .hasSize(1);
+    verifyFileContents(localFS, dest, block);
+  }
+
 }
\ No newline at end of file
diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyCommitter.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyCommitter.java
index 685f030..62940f6 100644
--- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyCommitter.java
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyCommitter.java
@@ -265,6 +265,51 @@ public class TestCopyCommitter {
     }
   }
 
+  @Test
+  public void testDeleteMissingWithOnlyFile() throws IOException {
+    TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
+    JobContext jobContext = new JobContextImpl(taskAttemptContext
+        .getConfiguration(), taskAttemptContext.getTaskAttemptID().getJobID());
+    Configuration conf = jobContext.getConfiguration();
+
+    String sourceBase;
+    String targetBase;
+    FileSystem fs = null;
+    try {
+      OutputCommitter committer = new CopyCommitter(null, taskAttemptContext);
+      fs = FileSystem.get(conf);
+      sourceBase = TestDistCpUtils.createTestSetupWithOnlyFile(fs,
+          FsPermission.getDefault());
+      targetBase = TestDistCpUtils.createTestSetupWithOnlyFile(fs,
+          FsPermission.getDefault());
+
+      final DistCpOptions options = new DistCpOptions.Builder(
+          Collections.singletonList(new Path(sourceBase)), new Path(targetBase))
+          .withSyncFolder(true).withDeleteMissing(true).build();
+      options.appendToConf(conf);
+      final DistCpContext context = new DistCpContext(options);
+
+      CopyListing listing = new GlobbedCopyListing(conf, CREDENTIALS);
+      Path listingFile = new Path(sourceBase);
+      listing.buildListing(listingFile, context);
+
+      conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase);
+      conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, targetBase);
+
+      committer.commitJob(jobContext);
+      verifyFoldersAreInSync(fs, targetBase, sourceBase);
+      verifyFoldersAreInSync(fs, sourceBase, targetBase);
+
+      //Test for idempotent commit
+      committer.commitJob(jobContext);
+      verifyFoldersAreInSync(fs, targetBase, sourceBase);
+      verifyFoldersAreInSync(fs, sourceBase, targetBase);
+    } finally {
+      TestDistCpUtils.delete(fs, "/tmp1");
+      conf.set(DistCpConstants.CONF_LABEL_DELETE_MISSING, "false");
+    }
+  }
+
   // for HDFS-14621, should preserve times after -delete
   @Test
   public void testPreserveTimeWithDeleteMiss() throws IOException {
diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestDistCpUtils.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestDistCpUtils.java
index f10dbf5..dd1e65d 100644
--- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestDistCpUtils.java
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestDistCpUtils.java
@@ -1360,6 +1360,15 @@ public class TestDistCpUtils {
     return base + "/" + location;
   }
 
+  public static String createTestSetupWithOnlyFile(FileSystem fs,
+      FsPermission perm) throws IOException {
+    String location = String.valueOf(rand.nextLong());
+    fs.mkdirs(new Path("/tmp1/" + location));
+    fs.setPermission(new Path("/tmp1/" + location), perm);
+    createFile(fs, new Path("/tmp1/" + location + "/file"));
+    return "/tmp1/" + location + "/file";
+  }
+
   public static void delete(FileSystem fs, String path) {
     try {
       if (fs != null) {

---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org