You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ar...@apache.org on 2016/01/12 05:32:12 UTC
[13/23] hadoop git commit: HDFS-9630. DistCp minor refactoring and
clean up. Contributed by Kai Zheng.
HDFS-9630. DistCp minor refactoring and clean up. Contributed by Kai Zheng.
Change-Id: I363c4ffcac32116ddcdc0a22fac3db92f14a0db0
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/95f32015
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/95f32015
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/95f32015
Branch: refs/heads/HDFS-1312
Commit: 95f32015ad9273420299130a9f10acdbafe63556
Parents: 8c1adea
Author: Zhe Zhang <zh...@apache.org>
Authored: Mon Jan 11 09:46:56 2016 -0800
Committer: Zhe Zhang <zh...@apache.org>
Committed: Mon Jan 11 09:46:56 2016 -0800
----------------------------------------------------------------------
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 2 ++
.../apache/hadoop/tools/RegexCopyFilter.java | 10 +++++++--
.../apache/hadoop/tools/SimpleCopyListing.java | 14 +++++++------
.../hadoop/tools/mapred/CopyCommitter.java | 10 +++++++--
.../hadoop/tools/mapred/CopyOutputFormat.java | 5 ++++-
.../tools/mapred/RetriableFileCopyCommand.java | 6 ++++--
.../tools/mapred/UniformSizeInputFormat.java | 6 +++++-
.../apache/hadoop/tools/util/DistCpUtils.java | 21 +++++++------------
.../hadoop/tools/util/ProducerConsumer.java | 10 +++------
.../hadoop/tools/util/ThrottledInputStream.java | 7 ++-----
.../hadoop/tools/util/WorkRequestProcessor.java | 3 ---
.../hadoop/tools/mapred/TestCopyMapper.java | 22 ++++++++++++--------
12 files changed, 65 insertions(+), 51 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/95f32015/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 4963b19..b8f8c63 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -211,6 +211,8 @@ Trunk (Unreleased)
HDFS-9582. TestLeaseRecoveryStriped file missing Apache License header
and not well formatted. (umamahesh)
+ HDFS-9630. DistCp minor refactoring and clean up. (Kai Zheng via zhz)
+
OPTIMIZATIONS
BUG FIXES
http://git-wip-us.apache.org/repos/asf/hadoop/blob/95f32015/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/RegexCopyFilter.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/RegexCopyFilter.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/RegexCopyFilter.java
index 1c2b324..f6c496e 100644
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/RegexCopyFilter.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/RegexCopyFilter.java
@@ -20,10 +20,16 @@ package org.apache.hadoop.tools;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.fs.*;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
-import java.io.*;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
http://git-wip-us.apache.org/repos/asf/hadoop/blob/95f32015/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java
index 77743eb..d2598a4 100644
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java
@@ -39,7 +39,8 @@ import org.apache.hadoop.security.Credentials;
import com.google.common.annotations.VisibleForTesting;
-import java.io.*;
+import java.io.FileNotFoundException;
+import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
@@ -165,9 +166,9 @@ public class SimpleCopyListing extends CopyListing {
}
}
- /** {@inheritDoc} */
@Override
- public void doBuildListing(Path pathToListingFile, DistCpOptions options) throws IOException {
+ protected void doBuildListing(Path pathToListingFile,
+ DistCpOptions options) throws IOException {
if(options.shouldUseDiff()) {
doBuildListingWithSnapshotDiff(getWriter(pathToListingFile), options);
}else {
@@ -227,8 +228,9 @@ public class SimpleCopyListing extends CopyListing {
* @throws IOException
*/
@VisibleForTesting
- public void doBuildListingWithSnapshotDiff(SequenceFile.Writer fileListWriter,
- DistCpOptions options) throws IOException {
+ protected void doBuildListingWithSnapshotDiff(
+ SequenceFile.Writer fileListWriter, DistCpOptions options)
+ throws IOException {
ArrayList<DiffInfo> diffList = distCpSync.prepareDiffList();
Path sourceRoot = options.getSourcePaths().get(0);
FileSystem sourceFS = sourceRoot.getFileSystem(getConf());
@@ -287,7 +289,7 @@ public class SimpleCopyListing extends CopyListing {
* @throws IOException
*/
@VisibleForTesting
- public void doBuildListing(SequenceFile.Writer fileListWriter,
+ protected void doBuildListing(SequenceFile.Writer fileListWriter,
DistCpOptions options) throws IOException {
if (options.getNumListstatusThreads() > 0) {
numListstatusThreads = options.getNumListstatusThreads();
http://git-wip-us.apache.org/repos/asf/hadoop/blob/95f32015/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java
index 2b1e510..6d2fef5 100644
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java
@@ -27,10 +27,16 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.*;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.JobStatus;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
-import org.apache.hadoop.tools.*;
+import org.apache.hadoop.tools.CopyListing;
+import org.apache.hadoop.tools.CopyListingFileStatus;
+import org.apache.hadoop.tools.DistCpConstants;
+import org.apache.hadoop.tools.DistCpOptions;
import org.apache.hadoop.tools.DistCpOptions.FileAttribute;
+import org.apache.hadoop.tools.GlobbedCopyListing;
import org.apache.hadoop.tools.util.DistCpUtils;
import java.io.IOException;
http://git-wip-us.apache.org/repos/asf/hadoop/blob/95f32015/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyOutputFormat.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyOutputFormat.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyOutputFormat.java
index a5bd605..7b41caa 100644
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyOutputFormat.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyOutputFormat.java
@@ -20,7 +20,10 @@ package org.apache.hadoop.tools.mapred;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapreduce.*;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.OutputCommitter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.mapreduce.security.TokenCache;
import org.apache.hadoop.tools.DistCpConstants;
http://git-wip-us.apache.org/repos/asf/hadoop/blob/95f32015/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java
index 6b5078c..071e500 100644
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java
@@ -201,11 +201,13 @@ public class RetriableFileCopyCommand extends RetriableCommand {
targetFS, target)) {
StringBuilder errorMessage = new StringBuilder("Check-sum mismatch between ")
.append(source).append(" and ").append(target).append(".");
- if (sourceFS.getFileStatus(source).getBlockSize() != targetFS.getFileStatus(target).getBlockSize()) {
+ if (sourceFS.getFileStatus(source).getBlockSize() !=
+ targetFS.getFileStatus(target).getBlockSize()) {
errorMessage.append(" Source and target differ in block-size.")
.append(" Use -pb to preserve block-sizes during copy.")
.append(" Alternatively, skip checksum-checks altogether, using -skipCrc.")
- .append(" (NOTE: By skipping checksums, one runs the risk of masking data-corruption during file-transfer.)");
+ .append(" (NOTE: By skipping checksums, one runs the risk of " +
+ "masking data-corruption during file-transfer.)");
}
throw new IOException(errorMessage.toString());
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/95f32015/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/UniformSizeInputFormat.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/UniformSizeInputFormat.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/UniformSizeInputFormat.java
index 8f31234..3e86d09 100644
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/UniformSizeInputFormat.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/UniformSizeInputFormat.java
@@ -23,12 +23,16 @@ import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.tools.CopyListingFileStatus;
import org.apache.hadoop.tools.DistCpConstants;
import org.apache.hadoop.tools.util.DistCpUtils;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileRecordReader;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.conf.Configuration;
http://git-wip-us.apache.org/repos/asf/hadoop/blob/95f32015/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java
index 70d947e..d3d7677 100644
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java
@@ -18,17 +18,7 @@
package org.apache.hadoop.tools.util;
-import java.io.IOException;
-import java.net.InetAddress;
-import java.net.URI;
-import java.net.UnknownHostException;
-import java.text.DecimalFormat;
-import java.util.EnumSet;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-import java.util.Map.Entry;
-
+import com.google.common.collect.Maps;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
@@ -49,10 +39,15 @@ import org.apache.hadoop.tools.CopyListingFileStatus;
import org.apache.hadoop.tools.DistCpOptions;
import org.apache.hadoop.tools.DistCpOptions.FileAttribute;
import org.apache.hadoop.tools.mapred.UniformSizeInputFormat;
-
-import com.google.common.collect.Maps;
import org.apache.hadoop.util.StringUtils;
+import java.io.IOException;
+import java.text.DecimalFormat;
+import java.util.EnumSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
/**
* Utility functions used in DistCp.
*/
http://git-wip-us.apache.org/repos/asf/hadoop/blob/95f32015/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/ProducerConsumer.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/ProducerConsumer.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/ProducerConsumer.java
index bf72bb8..16bf254 100644
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/ProducerConsumer.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/ProducerConsumer.java
@@ -20,15 +20,11 @@ package org.apache.hadoop.tools.util;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.tools.util.WorkReport;
-import org.apache.hadoop.tools.util.WorkRequest;
-import org.apache.hadoop.tools.util.WorkRequestProcessor;
-import java.util.concurrent.atomic.AtomicInteger;
-import java.util.ArrayList;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.atomic.AtomicInteger;
/**
* ProducerConsumer class encapsulates input and output queues and a
@@ -51,8 +47,8 @@ public class ProducerConsumer<T, R> {
* @param numThreads Size of thread-pool to execute Workers.
*/
public ProducerConsumer(int numThreads) {
- this.inputQueue = new LinkedBlockingQueue<WorkRequest<T>>();
- this.outputQueue = new LinkedBlockingQueue<WorkReport<R>>();
+ this.inputQueue = new LinkedBlockingQueue<>();
+ this.outputQueue = new LinkedBlockingQueue<>();
executor = Executors.newFixedThreadPool(numThreads);
workCnt = new AtomicInteger(0);
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/95f32015/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/ThrottledInputStream.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/ThrottledInputStream.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/ThrottledInputStream.java
index 2be8ef0..7035527 100644
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/ThrottledInputStream.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/ThrottledInputStream.java
@@ -18,13 +18,10 @@
package org.apache.hadoop.tools.util;
-import java.io.IOException;
-import java.io.InputStream;
-
-import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.PositionedReadable;
-import com.google.common.base.Preconditions;
+import java.io.IOException;
+import java.io.InputStream;
/**
* The ThrottleInputStream provides bandwidth throttling on a specified
http://git-wip-us.apache.org/repos/asf/hadoop/blob/95f32015/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/WorkRequestProcessor.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/WorkRequestProcessor.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/WorkRequestProcessor.java
index b6d8a09..91f738e 100644
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/WorkRequestProcessor.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/WorkRequestProcessor.java
@@ -18,9 +18,6 @@
package org.apache.hadoop.tools.util;
-import org.apache.hadoop.tools.util.WorkReport;
-import org.apache.hadoop.tools.util.WorkRequest;
-
/**
* Interface for ProducerConsumer worker loop.
*
http://git-wip-us.apache.org/repos/asf/hadoop/blob/95f32015/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyMapper.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyMapper.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyMapper.java
index ec60fa8..4d0752f 100644
--- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyMapper.java
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyMapper.java
@@ -378,8 +378,9 @@ public class TestCopyMapper {
workPath);
copyMapper.setup(context);
- copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), pathList.get(0))),
- new CopyListingFileStatus(fs.getFileStatus(pathList.get(0))), context);
+ copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH),
+ pathList.get(0))),
+ new CopyListingFileStatus(fs.getFileStatus(pathList.get(0))), context);
Assert.assertTrue("There should have been an exception.", false);
}
@@ -525,7 +526,8 @@ public class TestCopyMapper {
mkdirs(TARGET_PATH);
cluster.getFileSystem().setPermission(new Path(SOURCE_PATH + "/src/file"),
new FsPermission(FsAction.READ, FsAction.READ, FsAction.READ));
- cluster.getFileSystem().setPermission(new Path(TARGET_PATH), new FsPermission((short)511));
+ cluster.getFileSystem().setPermission(new Path(TARGET_PATH),
+ new FsPermission((short)511));
final FileSystem tmpFS = tmpUser.doAs(new PrivilegedAction<FileSystem>() {
@Override
@@ -785,7 +787,8 @@ public class TestCopyMapper {
}
if (ignoreFailures) {
for (Text value : stubContext.getWriter().values()) {
- Assert.assertTrue(value.toString() + " is not skipped", value.toString().startsWith("FAIL:"));
+ Assert.assertTrue(value.toString() + " is not skipped",
+ value.toString().startsWith("FAIL:"));
}
}
Assert.assertTrue("There should have been an exception.", ignoreFailures);
@@ -813,7 +816,6 @@ public class TestCopyMapper {
@Test(timeout=40000)
public void testCopyFailOnBlockSizeDifference() {
try {
-
deleteState();
createSourceDataWithDifferentBlockSize();
@@ -833,16 +835,18 @@ public class TestCopyMapper {
for (Path path : pathList) {
final FileStatus fileStatus = fs.getFileStatus(path);
- copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)),
- new CopyListingFileStatus(fileStatus), context);
+ copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH),
+ path)), new CopyListingFileStatus(fileStatus), context);
}
Assert.fail("Copy should have failed because of block-size difference.");
}
catch (Exception exception) {
// Check that the exception suggests the use of -pb/-skipCrc.
- Assert.assertTrue("Failure exception should have suggested the use of -pb.", exception.getCause().getCause().getMessage().contains("pb"));
- Assert.assertTrue("Failure exception should have suggested the use of -skipCrc.", exception.getCause().getCause().getMessage().contains("skipCrc"));
+ Assert.assertTrue("Failure exception should have suggested the use of -pb.",
+ exception.getCause().getCause().getMessage().contains("pb"));
+ Assert.assertTrue("Failure exception should have suggested the use of -skipCrc.",
+ exception.getCause().getCause().getMessage().contains("skipCrc"));
}
}