You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ae...@apache.org on 2016/03/28 19:36:08 UTC
[40/48] hadoop git commit: HDFS-9640. Remove hsftp from DistCp in
trunk. Contributed by Wei-Chiu Chuang.
HDFS-9640. Remove hsftp from DistCp in trunk. Contributed by Wei-Chiu Chuang.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/18c7e582
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/18c7e582
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/18c7e582
Branch: refs/heads/HDFS-7240
Commit: 18c7e582839ea0b550463569b18b5827d23f8849
Parents: 55ae143
Author: Akira Ajisaka <aa...@apache.org>
Authored: Mon Mar 28 15:32:38 2016 +0900
Committer: Akira Ajisaka <aa...@apache.org>
Committed: Mon Mar 28 15:32:38 2016 +0900
----------------------------------------------------------------------
.../java/org/apache/hadoop/tools/DistCp.java | 79 --------------------
.../apache/hadoop/tools/DistCpConstants.java | 16 ----
.../apache/hadoop/tools/DistCpOptionSwitch.java | 9 ---
.../org/apache/hadoop/tools/DistCpOptions.java | 22 ------
.../org/apache/hadoop/tools/OptionsParser.java | 5 --
.../apache/hadoop/tools/mapred/CopyMapper.java | 68 -----------------
.../src/site/markdown/DistCp.md.vm | 39 ----------
.../apache/hadoop/tools/TestOptionsParser.java | 17 +----
8 files changed, 1 insertion(+), 254 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/18c7e582/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCp.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCp.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCp.java
index 9db2b1b..2342fa9 100644
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCp.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCp.java
@@ -19,7 +19,6 @@
package org.apache.hadoop.tools;
import java.io.IOException;
-import java.net.URL;
import java.util.Random;
import org.apache.commons.logging.Log;
@@ -254,89 +253,11 @@ public class DistCp extends Configured implements Tool {
job.getConfiguration().set(JobContext.NUM_MAPS,
String.valueOf(inputOptions.getMaxMaps()));
- if (inputOptions.getSslConfigurationFile() != null) {
- setupSSLConfig(job);
- }
-
inputOptions.appendToConf(job.getConfiguration());
return job;
}
/**
- * Setup ssl configuration on the job configuration to enable hsftp access
- * from map job. Also copy the ssl configuration file to Distributed cache
- *
- * @param job - Reference to job's handle
- * @throws java.io.IOException - Exception if unable to locate ssl config file
- */
- private void setupSSLConfig(Job job) throws IOException {
- Configuration configuration = job.getConfiguration();
- URL sslFileUrl = configuration.getResource(inputOptions
- .getSslConfigurationFile());
- if (sslFileUrl == null) {
- throw new IOException(
- "Given ssl configuration file doesn't exist in class path : "
- + inputOptions.getSslConfigurationFile());
- }
- Path sslConfigPath = new Path(sslFileUrl.toString());
-
- addSSLFilesToDistCache(job, sslConfigPath);
- configuration.set(DistCpConstants.CONF_LABEL_SSL_CONF, sslConfigPath.getName());
- configuration.set(DistCpConstants.CONF_LABEL_SSL_KEYSTORE, sslConfigPath.getName());
- }
-
- /**
- * Add SSL files to distributed cache. Trust store, key store and ssl config xml
- *
- * @param job - Job handle
- * @param sslConfigPath - ssl Configuration file specified through options
- * @throws IOException - If any
- */
- private void addSSLFilesToDistCache(Job job,
- Path sslConfigPath) throws IOException {
- Configuration configuration = job.getConfiguration();
- FileSystem localFS = FileSystem.getLocal(configuration);
-
- Configuration sslConf = new Configuration(false);
- sslConf.addResource(sslConfigPath);
-
- Path localStorePath = getLocalStorePath(sslConf,
- DistCpConstants.CONF_LABEL_SSL_TRUST_STORE_LOCATION);
- job.addCacheFile(localStorePath.makeQualified(localFS.getUri(),
- localFS.getWorkingDirectory()).toUri());
- configuration.set(DistCpConstants.CONF_LABEL_SSL_TRUST_STORE_LOCATION,
- localStorePath.getName());
-
- localStorePath = getLocalStorePath(sslConf,
- DistCpConstants.CONF_LABEL_SSL_KEY_STORE_LOCATION);
- job.addCacheFile(localStorePath.makeQualified(localFS.getUri(),
- localFS.getWorkingDirectory()).toUri());
- configuration.set(DistCpConstants.CONF_LABEL_SSL_KEY_STORE_LOCATION,
- localStorePath.getName());
-
- job.addCacheFile(sslConfigPath.makeQualified(localFS.getUri(),
- localFS.getWorkingDirectory()).toUri());
-
- }
-
- /**
- * Get Local Trust store/key store path
- *
- * @param sslConf - Config from SSL Client xml
- * @param storeKey - Key for either trust store or key store
- * @return - Path where the store is present
- * @throws IOException -If any
- */
- private Path getLocalStorePath(Configuration sslConf, String storeKey) throws IOException {
- if (sslConf.get(storeKey) != null) {
- return new Path(sslConf.get(storeKey));
- } else {
- throw new IOException("Store for " + storeKey + " is not set in " +
- inputOptions.getSslConfigurationFile());
- }
- }
-
- /**
* Setup output format appropriately
*
* @param job - Job handle
http://git-wip-us.apache.org/repos/asf/hadoop/blob/18c7e582/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java
index 93d6a62..95d26df 100644
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java
@@ -49,7 +49,6 @@ public class DistCpConstants {
"distcp.preserve.rawxattrs";
public static final String CONF_LABEL_SYNC_FOLDERS = "distcp.sync.folders";
public static final String CONF_LABEL_DELETE_MISSING = "distcp.delete.missing.source";
- public static final String CONF_LABEL_SSL_CONF = "distcp.keystore.resource";
public static final String CONF_LABEL_LISTSTATUS_THREADS = "distcp.liststatus.threads";
public static final String CONF_LABEL_MAX_MAPS = "distcp.max.maps";
public static final String CONF_LABEL_SOURCE_LISTING = "distcp.source.listing";
@@ -76,9 +75,6 @@ public class DistCpConstants {
/* Total number of paths to copy, includes directories. Unfiltered count */
public static final String CONF_LABEL_TOTAL_NUMBER_OF_RECORDS = "mapred.number.of.records";
- /* SSL keystore resource */
- public static final String CONF_LABEL_SSL_KEYSTORE = "dfs.https.client.keystore.resource";
-
/* If input is based -f <<source listing>>, file containing the src paths */
public static final String CONF_LABEL_LISTING_FILE_PATH = "distcp.listing.file.path";
@@ -107,18 +103,6 @@ public class DistCpConstants {
public static final String CONF_LABEL_COPY_LISTING_CLASS = "distcp.copy.listing.class";
/**
- * Conf label for SSL Trust-store location.
- */
- public static final String CONF_LABEL_SSL_TRUST_STORE_LOCATION
- = "ssl.client.truststore.location";
-
- /**
- * Conf label for SSL Key-store location.
- */
- public static final String CONF_LABEL_SSL_KEY_STORE_LOCATION
- = "ssl.client.keystore.location";
-
- /**
* Constants for DistCp return code to shell / consumer of ToolRunner's run
*/
public static final int SUCCESS = 0;
http://git-wip-us.apache.org/repos/asf/hadoop/blob/18c7e582/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptionSwitch.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptionSwitch.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptionSwitch.java
index 9485584..c104896 100644
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptionSwitch.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptionSwitch.java
@@ -74,15 +74,6 @@ public enum DistCpOptionSwitch {
DELETE_MISSING(DistCpConstants.CONF_LABEL_DELETE_MISSING,
new Option("delete", false, "Delete from target, " +
"files missing in source")),
-
- /**
- * Configuration file to use with hftps:// for securely copying
- * files across clusters. Typically the configuration file contains
- * truststore/keystore information such as location, password and type
- */
- SSL_CONF(DistCpConstants.CONF_LABEL_SSL_CONF,
- new Option("mapredSslConf", true, "Configuration for ssl config file" +
- ", to use with hftps://. Must be in the classpath.")),
/**
* Number of threads for building source file listing (before map-reduce
* phase, max one listStatus per thread at a time).
http://git-wip-us.apache.org/repos/asf/hadoop/blob/18c7e582/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java
index cbd4ac0..0b1234b 100644
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java
@@ -49,8 +49,6 @@ public class DistCpOptions {
private int maxMaps = DistCpConstants.DEFAULT_MAPS;
private float mapBandwidth = DistCpConstants.DEFAULT_BANDWIDTH_MB;
- private String sslConfigurationFile;
-
private String copyStrategy = DistCpConstants.UNIFORMSIZE;
private EnumSet<FileAttribute> preserveStatus = EnumSet.noneOf(FileAttribute.class);
@@ -134,7 +132,6 @@ public class DistCpOptions {
this.numListstatusThreads = that.numListstatusThreads;
this.maxMaps = that.maxMaps;
this.mapBandwidth = that.mapBandwidth;
- this.sslConfigurationFile = that.getSslConfigurationFile();
this.copyStrategy = that.copyStrategy;
this.preserveStatus = that.preserveStatus;
this.preserveRawXattrs = that.preserveRawXattrs;
@@ -381,24 +378,6 @@ public class DistCpOptions {
}
/**
- * Get path where the ssl configuration file is present to use for hftps://
- *
- * @return Path on local file system
- */
- public String getSslConfigurationFile() {
- return sslConfigurationFile;
- }
-
- /**
- * Set the SSL configuration file path to use with hftps:// (local path)
- *
- * @param sslConfigurationFile - Local ssl config file path
- */
- public void setSslConfigurationFile(String sslConfigurationFile) {
- this.sslConfigurationFile = sslConfigurationFile;
- }
-
- /**
* Returns an iterator with the list of file attributes to preserve
*
* @return iterator of file attributes to preserve
@@ -670,7 +649,6 @@ public class DistCpOptions {
", numListstatusThreads=" + numListstatusThreads +
", maxMaps=" + maxMaps +
", mapBandwidth=" + mapBandwidth +
- ", sslConfigurationFile='" + sslConfigurationFile + '\'' +
", copyStrategy='" + copyStrategy + '\'' +
", preserveStatus=" + preserveStatus +
", preserveRawXattrs=" + preserveRawXattrs +
http://git-wip-us.apache.org/repos/asf/hadoop/blob/18c7e582/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/OptionsParser.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/OptionsParser.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/OptionsParser.java
index b414513..0f0ef2e 100644
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/OptionsParser.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/OptionsParser.java
@@ -137,11 +137,6 @@ public class OptionsParser {
parseBandwidth(command, option);
- if (command.hasOption(DistCpOptionSwitch.SSL_CONF.getSwitch())) {
- option.setSslConfigurationFile(command.
- getOptionValue(DistCpOptionSwitch.SSL_CONF.getSwitch()));
- }
-
parseNumListStatusThreads(command, option);
parseMaxMaps(command, option);
http://git-wip-us.apache.org/repos/asf/hadoop/blob/18c7e582/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyMapper.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyMapper.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyMapper.java
index f75fe76..09bcead 100644
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyMapper.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyMapper.java
@@ -19,10 +19,7 @@
package org.apache.hadoop.tools.mapred;
import java.io.FileNotFoundException;
-import java.io.FileOutputStream;
import java.io.IOException;
-import java.io.OutputStream;
-import java.util.Arrays;
import java.util.EnumSet;
import org.apache.commons.logging.Log;
@@ -33,7 +30,6 @@ import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.tools.CopyListingFileStatus;
import org.apache.hadoop.tools.DistCpConstants;
@@ -119,74 +115,10 @@ public class CopyMapper extends Mapper<Text, CopyListingFileStatus, Text, Text>
overWrite = true; // When target is an existing file, overwrite it.
}
- if (conf.get(DistCpConstants.CONF_LABEL_SSL_CONF) != null) {
- initializeSSLConf(context);
- }
startEpoch = System.currentTimeMillis();
}
/**
- * Initialize SSL Config if same is set in conf
- *
- * @throws IOException - If any
- */
- private void initializeSSLConf(Context context) throws IOException {
- LOG.info("Initializing SSL configuration");
-
- String workDir = conf.get(JobContext.JOB_LOCAL_DIR) + "/work";
- Path[] cacheFiles = context.getLocalCacheFiles();
-
- Configuration sslConfig = new Configuration(false);
- String sslConfFileName = conf.get(DistCpConstants.CONF_LABEL_SSL_CONF);
- Path sslClient = findCacheFile(cacheFiles, sslConfFileName);
- if (sslClient == null) {
- LOG.warn("SSL Client config file not found. Was looking for " + sslConfFileName +
- " in " + Arrays.toString(cacheFiles));
- return;
- }
- sslConfig.addResource(sslClient);
-
- String trustStoreFile = conf.get("ssl.client.truststore.location");
- Path trustStorePath = findCacheFile(cacheFiles, trustStoreFile);
- sslConfig.set("ssl.client.truststore.location", trustStorePath.toString());
-
- String keyStoreFile = conf.get("ssl.client.keystore.location");
- Path keyStorePath = findCacheFile(cacheFiles, keyStoreFile);
- sslConfig.set("ssl.client.keystore.location", keyStorePath.toString());
-
- try {
- OutputStream out = new FileOutputStream(workDir + "/" + sslConfFileName);
- try {
- sslConfig.writeXml(out);
- } finally {
- out.close();
- }
- conf.set(DistCpConstants.CONF_LABEL_SSL_KEYSTORE, sslConfFileName);
- } catch (IOException e) {
- LOG.warn("Unable to write out the ssl configuration. " +
- "Will fall back to default ssl-client.xml in class path, if there is one", e);
- }
- }
-
- /**
- * Find entry from distributed cache
- *
- * @param cacheFiles - All localized cache files
- * @param fileName - fileName to search
- * @return Path of the filename if found, else null
- */
- private Path findCacheFile(Path[] cacheFiles, String fileName) {
- if (cacheFiles != null && cacheFiles.length > 0) {
- for (Path file : cacheFiles) {
- if (file.getName().equals(fileName)) {
- return file;
- }
- }
- }
- return null;
- }
-
- /**
* Implementation of the Mapper::map(). Does the copy.
* @param relPath The target path.
* @param sourceFileStatus The source path.
http://git-wip-us.apache.org/repos/asf/hadoop/blob/18c7e582/hadoop-tools/hadoop-distcp/src/site/markdown/DistCp.md.vm
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-distcp/src/site/markdown/DistCp.md.vm b/hadoop-tools/hadoop-distcp/src/site/markdown/DistCp.md.vm
index bac5ecc..4dd1583 100644
--- a/hadoop-tools/hadoop-distcp/src/site/markdown/DistCp.md.vm
+++ b/hadoop-tools/hadoop-distcp/src/site/markdown/DistCp.md.vm
@@ -32,7 +32,6 @@ DistCp Guide
- [Map sizing](#Map_sizing)
- [Copying Between Versions of HDFS](#Copying_Between_Versions_of_HDFS)
- [MapReduce and other side-effects](#MapReduce_and_other_side-effects)
- - [SSL Configurations for HSFTP sources](#SSL_Configurations_for_HSFTP_sources)
- [Frequently Asked Questions](#Frequently_Asked_Questions)
---
@@ -232,7 +231,6 @@ Flag | Description | Notes
`-strategy {dynamic|uniformsize}` | Choose the copy-strategy to be used in DistCp. | By default, uniformsize is used. (i.e. Maps are balanced on the total size of files copied by each map. Similar to legacy.) If "dynamic" is specified, `DynamicInputFormat` is used instead. (This is described in the Architecture section, under InputFormats.)
`-bandwidth` | Specify bandwidth per map, in MB/second. | Each map will be restricted to consume only the specified bandwidth. This is not always exact. The map throttles back its bandwidth consumption during a copy, such that the **net** bandwidth used tends towards the specified value.
`-atomic {-tmp <tmp_dir>}` | Specify atomic commit, with optional tmp directory. | `-atomic` instructs DistCp to copy the source data to a temporary target location, and then move the temporary target to the final-location atomically. Data will either be available at final target in a complete and consistent form, or not at all. Optionally, `-tmp` may be used to specify the location of the tmp-target. If not specified, a default is chosen. **Note:** tmp_dir must be on the final target cluster.
-`-mapredSslConf <ssl_conf_file>` | Specify SSL Config file, to be used with HSFTP source | When using the hsftp protocol with a source, the security- related properties may be specified in a config-file and passed to DistCp. \<ssl_conf_file\> needs to be in the classpath.
`-async` | Run DistCp asynchronously. Quits as soon as the Hadoop Job is launched. | The Hadoop Job-id is logged, for tracking.
`-diff` | Use snapshot diff report to identify the difference between source and target. |
`-numListstatusThreads` | Number of threads to use for building file listing | At most 40 threads.
@@ -432,43 +430,6 @@ $H3 MapReduce and other side-effects
* If `mapreduce.map.speculative` is set set final and true, the result of the
copy is undefined.
-$H3 SSL Configurations for HSFTP sources
-
- To use an HSFTP source (i.e. using the hsftp protocol), a SSL configuration
- file needs to be specified (via the `-mapredSslConf` option). This must
- specify 3 parameters:
-
- * `ssl.client.truststore.location`: The local-filesystem location of the
- trust-store file, containing the certificate for the NameNode.
- * `ssl.client.truststore.type`: (Optional) The format of the trust-store
- file.
- * `ssl.client.truststore.password`: (Optional) Password for the trust-store
- file.
-
- The following is an example SSL configuration file:
-
- <configuration>
- <property>
- <name>ssl.client.truststore.location</name>
- <value>/work/keystore.jks</value>
- <description>Truststore to be used by clients like distcp. Must be specified.</description>
- </property>
-
- <property>
- <name>ssl.client.truststore.password</name>
- <value>changeme</value>
- <description>Optional. Default value is "".</description>
- </property>
-
- <property>
- <name>ssl.client.truststore.type</name>
- <value>jks</value>
- <description>Optional. Default value is "jks".</description>
- </property>
- </configuration>
-
- The SSL configuration file must be in the class-path of the DistCp program.
-
Frequently Asked Questions
--------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/18c7e582/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestOptionsParser.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestOptionsParser.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestOptionsParser.java
index 4c136a8..6186f30 100644
--- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestOptionsParser.java
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestOptionsParser.java
@@ -252,21 +252,6 @@ public class TestOptionsParser {
}
@Test
- public void testParseSSLConf() {
- DistCpOptions options = OptionsParser.parse(new String[] {
- "hdfs://localhost:8020/source/first",
- "hdfs://localhost:8020/target/"});
- Assert.assertNull(options.getSslConfigurationFile());
-
- options = OptionsParser.parse(new String[] {
- "-mapredSslConf",
- "/tmp/ssl-client.xml",
- "hdfs://localhost:8020/source/first",
- "hdfs://localhost:8020/target/"});
- Assert.assertEquals(options.getSslConfigurationFile(), "/tmp/ssl-client.xml");
- }
-
- @Test
public void testParseMaps() {
DistCpOptions options = OptionsParser.parse(new String[] {
"hdfs://localhost:8020/source/first",
@@ -402,7 +387,7 @@ public class TestOptionsParser {
String val = "DistCpOptions{atomicCommit=false, syncFolder=false, "
+ "deleteMissing=false, ignoreFailures=false, overwrite=false, "
+ "skipCRC=false, blocking=true, numListstatusThreads=0, maxMaps=20, "
- + "mapBandwidth=100.0, sslConfigurationFile='null', "
+ + "mapBandwidth=100.0, "
+ "copyStrategy='uniformsize', preserveStatus=[], "
+ "preserveRawXattrs=false, atomicWorkPath=null, logPath=null, "
+ "sourceFileListing=abc, sourcePaths=null, targetPath=xyz, "