You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ay...@apache.org on 2021/04/23 17:18:50 UTC
[hadoop] branch trunk updated: HADOOP-17620. DistCp: Use Iterator
for listing target directory as well. (#2861). Contributed by Ayush Saxena.
This is an automated email from the ASF dual-hosted git repository.
ayushsaxena pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/trunk by this push:
new 6800b21 HADOOP-17620. DistCp: Use Iterator for listing target directory as well. (#2861). Contributed by Ayush Saxena.
6800b21 is described below
commit 6800b21e3b07168fd5820133d20858c6ca4bdf59
Author: Ayush Saxena <ay...@apache.org>
AuthorDate: Fri Apr 23 22:48:15 2021 +0530
HADOOP-17620. DistCp: Use Iterator for listing target directory as well. (#2861). Contributed by Ayush Saxena.
Signed-off-by: Vinayakumar B <vi...@apache.org>
---
.../java/org/apache/hadoop/tools/SimpleCopyListing.java | 5 +++--
.../java/org/apache/hadoop/tools/mapred/CopyCommitter.java | 3 +++
.../hadoop/tools/contract/AbstractContractDistCpTest.java | 13 ++++++++++---
3 files changed, 16 insertions(+), 5 deletions(-)
diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java
index 900ce62..fb7ace5 100644
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java
@@ -65,7 +65,8 @@ import static org.apache.hadoop.tools.DistCpConstants
* Note: The SimpleCopyListing doesn't handle wild-cards in the input-paths.
*/
public class SimpleCopyListing extends CopyListing {
- private static final Logger LOG = LoggerFactory.getLogger(SimpleCopyListing.class);
+ public static final Logger LOG =
+ LoggerFactory.getLogger(SimpleCopyListing.class);
public static final int DEFAULT_FILE_STATUS_SIZE = 1000;
public static final boolean DEFAULT_RANDOMIZE_FILE_LISTING = true;
@@ -601,7 +602,7 @@ public class SimpleCopyListing extends CopyListing {
}
private void printStats() {
- LOG.info("Paths (files+dirs) cnt = {}; dirCnt = ", totalPaths, totalDirs);
+ LOG.info("Paths (files+dirs) cnt = {}; dirCnt = {}", totalPaths, totalDirs);
}
private void maybePrintStats() {
diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java
index e346d0b..33ab3ee 100644
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java
@@ -564,12 +564,15 @@ public class CopyCommitter extends FileOutputCommitter {
// thread count is picked up from the job
int threads = conf.getInt(DistCpConstants.CONF_LABEL_LISTSTATUS_THREADS,
DistCpConstants.DEFAULT_LISTSTATUS_THREADS);
+ boolean useIterator =
+ conf.getBoolean(DistCpConstants.CONF_LABEL_USE_ITERATOR, false);
LOG.info("Scanning destination directory {} with thread count: {}",
targetFinalPath, threads);
DistCpOptions options = new DistCpOptions.Builder(targets, resultNonePath)
.withOverwrite(overwrite)
.withSyncFolder(syncFolder)
.withNumListstatusThreads(threads)
+ .withUseIterator(useIterator)
.build();
DistCpContext distCpContext = new DistCpContext(options);
distCpContext.setTargetPathExists(targetPathExists);
diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java
index 202ead6..fab14d1 100644
--- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java
@@ -42,6 +42,7 @@ import org.apache.hadoop.tools.CopyListingFileStatus;
import org.apache.hadoop.tools.DistCp;
import org.apache.hadoop.tools.DistCpConstants;
import org.apache.hadoop.tools.DistCpOptions;
+import org.apache.hadoop.tools.SimpleCopyListing;
import org.apache.hadoop.tools.mapred.CopyMapper;
import org.apache.hadoop.tools.util.DistCpTestUtils;
import org.apache.hadoop.util.functional.RemoteIterators;
@@ -628,11 +629,17 @@ public abstract class AbstractContractDistCpTest
GenericTestUtils
.createFiles(remoteFS, source, getDepth(), getWidth(), getWidth());
+ GenericTestUtils.LogCapturer log =
+ GenericTestUtils.LogCapturer.captureLogs(SimpleCopyListing.LOG);
+
DistCpTestUtils.assertRunDistCp(DistCpConstants.SUCCESS, source.toString(),
- dest.toString(), "-useiterator", conf);
+ dest.toString(), "-useiterator -update -delete", conf);
+
+ // Check the target listing was also done using iterator.
+ Assertions.assertThat(log.getOutput()).contains(
+ "Building listing using iterator mode for " + dest.toString());
- Assertions
- .assertThat(RemoteIterators.toList(localFS.listFiles(dest, true)))
+ Assertions.assertThat(RemoteIterators.toList(localFS.listFiles(dest, true)))
.describedAs("files").hasSize(getTotalFiles());
}
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org