You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ay...@apache.org on 2019/07/20 07:58:16 UTC
[hadoop] branch trunk updated: HADOOP-16440. Distcp can not
preserve timestamp with -delete option. Contributed by ludun.
This is an automated email from the ASF dual-hosted git repository.
ayushsaxena pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/trunk by this push:
new e60f5e2 HADOOP-16440. Distcp can not preserve timestamp with -delete option. Contributed by ludun.
e60f5e2 is described below
commit e60f5e2572532e2bce44757997f1086065b8fd80
Author: Ayush Saxena <ay...@apache.org>
AuthorDate: Sat Jul 20 13:11:14 2019 +0530
HADOOP-16440. Distcp can not preserve timestamp with -delete option. Contributed by ludun.
---
.../apache/hadoop/tools/mapred/CopyCommitter.java | 14 ++--
.../hadoop/tools/mapred/TestCopyCommitter.java | 80 ++++++++++++++++++++++
2 files changed, 87 insertions(+), 7 deletions(-)
diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java
index b86f5ea..d7a730d 100644
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java
@@ -109,13 +109,6 @@ public class CopyCommitter extends FileOutputCommitter {
cleanupTempFiles(jobContext);
- String attributes = conf.get(DistCpConstants.CONF_LABEL_PRESERVE_STATUS);
- final boolean preserveRawXattrs =
- conf.getBoolean(DistCpConstants.CONF_LABEL_PRESERVE_RAWXATTRS, false);
- if ((attributes != null && !attributes.isEmpty()) || preserveRawXattrs) {
- preserveFileAttributesForDirectories(conf);
- }
-
try {
if (conf.getBoolean(DistCpConstants.CONF_LABEL_DELETE_MISSING, false)) {
deleteMissing(conf);
@@ -125,6 +118,13 @@ public class CopyCommitter extends FileOutputCommitter {
// save missing information to a directory
trackMissing(conf);
}
+ // for HDFS-14621, should preserve status after -delete
+ String attributes = conf.get(DistCpConstants.CONF_LABEL_PRESERVE_STATUS);
+ final boolean preserveRawXattrs = conf.getBoolean(
+ DistCpConstants.CONF_LABEL_PRESERVE_RAWXATTRS, false);
+ if ((attributes != null && !attributes.isEmpty()) || preserveRawXattrs) {
+ preserveFileAttributesForDirectories(conf);
+ }
taskAttemptContext.setStatus("Commit Successful");
}
finally {
diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyCommitter.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyCommitter.java
index 912205f..2ef89e5 100644
--- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyCommitter.java
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyCommitter.java
@@ -26,11 +26,15 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;
import org.apache.hadoop.mapreduce.task.JobContextImpl;
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
import org.apache.hadoop.tools.CopyListing;
+import org.apache.hadoop.tools.CopyListingFileStatus;
import org.apache.hadoop.tools.DistCpConstants;
import org.apache.hadoop.tools.DistCpContext;
import org.apache.hadoop.tools.DistCpOptions;
@@ -204,6 +208,61 @@ public class TestCopyCommitter {
}
}
+ // for HDFS-14621, should preserve times after -delete
+ @Test
+ public void testPreserveTimeWithDeleteMiss() throws IOException {
+ TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
+ JobContext jobContext = new JobContextImpl(
+ taskAttemptContext.getConfiguration(),
+ taskAttemptContext.getTaskAttemptID().getJobID());
+ Configuration conf = jobContext.getConfiguration();
+
+ FileSystem fs = null;
+ try {
+ OutputCommitter committer = new CopyCommitter(null, taskAttemptContext);
+ fs = FileSystem.get(conf);
+ String sourceBase = TestDistCpUtils.createTestSetup(
+ fs, FsPermission.getDefault());
+ String targetBase = TestDistCpUtils.createTestSetup(
+ fs, FsPermission.getDefault());
+ String targetBaseAdd = TestDistCpUtils.createTestSetup(
+ fs, FsPermission.getDefault());
+ fs.rename(new Path(targetBaseAdd), new Path(targetBase));
+
+ final DistCpOptions options = new DistCpOptions.Builder(
+ Collections.singletonList(new Path(sourceBase)), new Path("/out"))
+ .withSyncFolder(true).withDeleteMissing(true)
+ .preserve(FileAttribute.TIMES).build();
+ options.appendToConf(conf);
+ final DistCpContext context = new DistCpContext(options);
+
+ CopyListing listing = new GlobbedCopyListing(conf, CREDENTIALS);
+ Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong()));
+ listing.buildListing(listingFile, context);
+
+ conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase);
+ conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, targetBase);
+
+ Path sourceListing = new Path(
+ conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH));
+ SequenceFile.Reader sourceReader = new SequenceFile.Reader(conf,
+ SequenceFile.Reader.file(sourceListing));
+ Path targetRoot = new Path(targetBase);
+
+ committer.commitJob(jobContext);
+ checkDirectoryTimes(fs, sourceReader, targetRoot);
+
+ //Test for idempotent commit
+ committer.commitJob(jobContext);
+ checkDirectoryTimes(fs, sourceReader, targetRoot);
+ } finally {
+ TestDistCpUtils.delete(fs, "/tmp1");
+ conf.unset(DistCpConstants.CONF_LABEL_PRESERVE_STATUS);
+ conf.set(DistCpConstants.CONF_LABEL_DELETE_MISSING, "false");
+ }
+ }
+
+
@Test
public void testDeleteMissingFlatInterleavedFiles() throws IOException {
TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
@@ -364,6 +423,27 @@ public class TestCopyCommitter {
}
}
+ private void checkDirectoryTimes(
+ FileSystem fs, SequenceFile.Reader sourceReader, Path targetRoot)
+ throws IOException {
+ try {
+ CopyListingFileStatus srcFileStatus = new CopyListingFileStatus();
+ Text srcRelPath = new Text();
+
+ // Iterate over every source path that was copied.
+ while (sourceReader.next(srcRelPath, srcFileStatus)) {
+ Path targetFile = new Path(targetRoot.toString() + "/" + srcRelPath);
+ FileStatus targetStatus = fs.getFileStatus(targetFile);
+ Assert.assertEquals(srcFileStatus.getModificationTime(),
+ targetStatus.getModificationTime());
+ Assert.assertEquals(srcFileStatus.getAccessTime(),
+ targetStatus.getAccessTime());
+ }
+ } finally {
+ IOUtils.closeStream(sourceReader);
+ }
+ }
+
private static class NullInputFormat extends InputFormat {
@Override
public List getSplits(JobContext context)
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org