You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by jl...@apache.org on 2016/10/05 15:34:20 UTC
hadoop git commit: MAPREDUCE-6741. Add MR support to redact job conf
properties. Contributed by Haibo Chen (cherry picked from commit
f1b74a3d9ff71bc014dbfd29a6996071b81d14c5)
Repository: hadoop
Updated Branches:
refs/heads/branch-2.7 5c99959b2 -> 599146d10
MAPREDUCE-6741. Add MR support to redact job conf properties. Contributed by Haibo Chen
(cherry picked from commit f1b74a3d9ff71bc014dbfd29a6996071b81d14c5)
Conflicts:
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/599146d1
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/599146d1
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/599146d1
Branch: refs/heads/branch-2.7
Commit: 599146d10bb038dadd97d778b0aa2abb42442030
Parents: 5c99959
Author: Jason Lowe <jl...@apache.org>
Authored: Wed Oct 5 15:33:23 2016 +0000
Committer: Jason Lowe <jl...@apache.org>
Committed: Wed Oct 5 15:33:23 2016 +0000
----------------------------------------------------------------------
hadoop-mapreduce-project/CHANGES.txt | 3 +
.../jobhistory/JobHistoryEventHandler.java | 19 +++---
.../mapreduce/v2/app/webapp/dao/ConfInfo.java | 4 +-
.../jobhistory/TestJobHistoryEventHandler.java | 71 ++++++++++++++++++++
.../mapreduce/v2/app/webapp/TestBlocks.java | 10 ++-
.../apache/hadoop/mapreduce/MRJobConfig.java | 4 ++
.../hadoop/mapreduce/util/MRJobConfUtil.java | 45 +++++++++++++
.../src/main/resources/mapred-default.xml | 8 +++
8 files changed, 151 insertions(+), 13 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/599146d1/hadoop-mapreduce-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt
index ab98596..6409b98 100644
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -8,6 +8,9 @@ Release 2.7.4 - UNRELEASED
IMPROVEMENTS
+ MAPREDUCE-6741. Add MR support to redact job conf properties. (Haibo Chen
+ via jlowe)
+
OPTIMIZATIONS
BUG FIXES
http://git-wip-us.apache.org/repos/asf/hadoop/blob/599146d1/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryEventHandler.java
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryEventHandler.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryEventHandler.java
index 5730262..c00da76 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryEventHandler.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryEventHandler.java
@@ -49,6 +49,7 @@ import org.apache.hadoop.mapreduce.JobCounter;
import org.apache.hadoop.mapreduce.MRJobConfig;
import org.apache.hadoop.mapreduce.TaskType;
import org.apache.hadoop.mapreduce.TypeConverter;
+import org.apache.hadoop.mapreduce.util.MRJobConfUtil;
import org.apache.hadoop.mapreduce.v2.api.records.JobId;
import org.apache.hadoop.mapreduce.v2.api.records.JobState;
import org.apache.hadoop.mapreduce.v2.app.AppContext;
@@ -469,16 +470,16 @@ public class JobHistoryEventHandler extends AbstractService
if (conf != null) {
// TODO Ideally this should be written out to the job dir
// (.staging/jobid/files - RecoveryService will need to be patched)
- FSDataOutputStream jobFileOut = null;
- try {
- if (logDirConfPath != null) {
- jobFileOut = stagingDirFS.create(logDirConfPath, true);
- conf.writeXml(jobFileOut);
- jobFileOut.close();
+ if (logDirConfPath != null) {
+ Configuration redactedConf = new Configuration(conf);
+ MRJobConfUtil.redact(redactedConf);
+ try (FSDataOutputStream jobFileOut = stagingDirFS
+ .create(logDirConfPath, true)) {
+ redactedConf.writeXml(jobFileOut);
+ } catch (IOException e) {
+ LOG.info("Failed to write the job configuration file", e);
+ throw e;
}
- } catch (IOException e) {
- LOG.info("Failed to write the job configuration file", e);
- throw e;
}
}
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/599146d1/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/dao/ConfInfo.java
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/dao/ConfInfo.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/dao/ConfInfo.java
index a05c317..287ab99 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/dao/ConfInfo.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/dao/ConfInfo.java
@@ -26,8 +26,7 @@ import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlRootElement;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileContext;
-import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.util.MRJobConfUtil;
import org.apache.hadoop.mapreduce.v2.app.job.Job;
@XmlRootElement(name = "conf")
@@ -45,6 +44,7 @@ public class ConfInfo {
this.property = new ArrayList<ConfEntryInfo>();
Configuration jobConf = job.loadConfFile();
this.path = job.getConfFile().toString();
+ MRJobConfUtil.redact(jobConf);
for (Map.Entry<String, String> entry : jobConf) {
this.property.add(new ConfEntryInfo(entry.getKey(), entry.getValue(),
jobConf.getPropertySources(entry.getKey())));
http://git-wip-us.apache.org/repos/asf/hadoop/blob/599146d1/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/jobhistory/TestJobHistoryEventHandler.java
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/jobhistory/TestJobHistoryEventHandler.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/jobhistory/TestJobHistoryEventHandler.java
index f213b32..4ed1e80 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/jobhistory/TestJobHistoryEventHandler.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/jobhistory/TestJobHistoryEventHandler.java
@@ -28,6 +28,7 @@ import static org.mockito.Mockito.when;
import java.io.File;
import java.io.FileOutputStream;
+import java.io.InputStream;
import java.io.IOException;
import java.util.HashMap;
@@ -52,6 +53,7 @@ import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.mapreduce.TaskID;
import org.apache.hadoop.mapreduce.TaskType;
import org.apache.hadoop.mapreduce.TypeConverter;
+import org.apache.hadoop.mapreduce.util.MRJobConfUtil;
import org.apache.hadoop.mapreduce.v2.api.records.JobId;
import org.apache.hadoop.mapreduce.v2.app.AppContext;
import org.apache.hadoop.mapreduce.v2.app.job.Job;
@@ -370,6 +372,74 @@ public class TestJobHistoryEventHandler {
}
}
+ @Test
+ public void testPropertyRedactionForJHS() throws Exception {
+ final Configuration conf = new Configuration();
+
+ String sensitivePropertyName = "aws.fake.credentials.name";
+ String sensitivePropertyValue = "aws.fake.credentials.val";
+ conf.set(sensitivePropertyName, sensitivePropertyValue);
+ conf.set(MRJobConfig.MR_JOB_REDACTED_PROPERTIES,
+ sensitivePropertyName);
+ conf.set(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY,
+ dfsCluster.getURI().toString());
+ final TestParams params = new TestParams();
+ conf.set(MRJobConfig.MR_AM_STAGING_DIR, params.dfsWorkDir);
+
+ final JHEvenHandlerForTest jheh =
+ new JHEvenHandlerForTest(params.mockAppContext, 0, false);
+
+ try {
+ jheh.init(conf);
+ jheh.start();
+ handleEvent(jheh, new JobHistoryEvent(params.jobId,
+ new AMStartedEvent(params.appAttemptId, 200, params.containerId,
+ "nmhost", 3000, 4000, -1)));
+ handleEvent(jheh, new JobHistoryEvent(params.jobId,
+ new JobUnsuccessfulCompletionEvent(TypeConverter.fromYarn(
+ params.jobId), 0, 0, 0, JobStateInternal.FAILED.toString())));
+
+ // verify the value of the sensitive property in job.xml is restored.
+ Assert.assertEquals(sensitivePropertyName + " is modified.",
+ conf.get(sensitivePropertyName), sensitivePropertyValue);
+
+ // load the job_conf.xml in JHS directory and verify property redaction.
+ Path jhsJobConfFile = getJobConfInIntermediateDoneDir(conf, params.jobId);
+ Assert.assertTrue("The job_conf.xml file is not in the JHS directory",
+ FileContext.getFileContext(conf).util().exists(jhsJobConfFile));
+ Configuration jhsJobConf = new Configuration();
+
+ try (InputStream input = FileSystem.get(conf).open(jhsJobConfFile)) {
+ jhsJobConf.addResource(input);
+ Assert.assertEquals(
+ sensitivePropertyName + " is not redacted in HDFS.",
+ MRJobConfUtil.REDACTION_REPLACEMENT_VAL,
+ jhsJobConf.get(sensitivePropertyName));
+ }
+ } finally {
+ jheh.stop();
+ purgeHdfsHistoryIntermediateDoneDirectory(conf);
+ }
+ }
+
+ private static Path getJobConfInIntermediateDoneDir(Configuration conf,
+ JobId jobId) throws IOException {
+ Path userDoneDir = new Path(
+ JobHistoryUtils.getHistoryIntermediateDoneDirForUser(conf));
+ Path doneDirPrefix =
+ FileContext.getFileContext(conf).makeQualified(userDoneDir);
+ return new Path(
+ doneDirPrefix, JobHistoryUtils.getIntermediateConfFileName(jobId));
+ }
+
+ private void purgeHdfsHistoryIntermediateDoneDirectory(Configuration conf)
+ throws IOException {
+ FileSystem fs = FileSystem.get(dfsCluster.getConfiguration(0));
+ String intermDoneDirPrefix =
+ JobHistoryUtils.getConfiguredHistoryIntermediateDoneDirPrefix(conf);
+ fs.delete(new Path(intermDoneDirPrefix), true);
+ }
+
@Test (timeout=50000)
public void testDefaultFsIsUsedForHistory() throws Exception {
// Create default configuration pointing to the minicluster
@@ -411,6 +481,7 @@ public class TestJobHistoryEventHandler {
localFileSystem.exists(new Path(t.dfsWorkDir)));
} finally {
jheh.stop();
+ purgeHdfsHistoryIntermediateDoneDirectory(conf);
}
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/599146d1/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestBlocks.java
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestBlocks.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestBlocks.java
index 13f91e0..ee7e0c9 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestBlocks.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestBlocks.java
@@ -23,6 +23,8 @@ import java.io.PrintWriter;
import java.util.HashMap;
import java.util.Map;
+import org.apache.hadoop.mapreduce.MRJobConfig;
+import org.apache.hadoop.mapreduce.util.MRJobConfUtil;
import org.junit.Test;
import org.apache.hadoop.conf.Configuration;
@@ -59,6 +61,9 @@ public class TestBlocks {
Path path = new Path("conf");
Configuration configuration = new Configuration();
configuration.set("Key for test", "Value for test");
+ final String redactedProp = "Key for redaction";
+ configuration.set(MRJobConfig.MR_JOB_REDACTED_PROPERTIES,
+ redactedProp);
when(job.getConfFile()).thenReturn(path);
when(job.loadConfFile()).thenReturn(configuration);
@@ -79,9 +84,10 @@ public class TestBlocks {
configurationBlock.render(html);
pWriter.flush();
assertTrue(data.toString().contains("Key for test"));
-
assertTrue(data.toString().contains("Value for test"));
-
+ assertTrue(data.toString().contains(redactedProp));
+ assertTrue(data.toString().contains(
+ MRJobConfUtil.REDACTION_REPLACEMENT_VAL));
}
/**
http://git-wip-us.apache.org/repos/asf/hadoop/blob/599146d1/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java
index 206a65c..c028aaf 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java
@@ -896,4 +896,8 @@ public interface MRJobConfig {
public static final int DEFAULT_MR_ENCRYPTED_INTERMEDIATE_DATA_BUFFER_KB =
128;
+ /**
+ * A comma-separated list of properties whose value will be redacted.
+ */
+ String MR_JOB_REDACTED_PROPERTIES = "mapreduce.job.redacted-properties";
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/599146d1/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/MRJobConfUtil.java
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/MRJobConfUtil.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/MRJobConfUtil.java
new file mode 100644
index 0000000..11d49a4
--- /dev/null
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/MRJobConfUtil.java
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapreduce.util;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.MRJobConfig;
+
+/**
+ * A class that contains utility methods for MR Job configuration.
+ */
+public final class MRJobConfUtil {
+ public static final String REDACTION_REPLACEMENT_VAL = "*********(redacted)";
+
+ /**
+ * Redact job configuration properties.
+ * @param conf the job configuration to redact
+ */
+ public static void redact(final Configuration conf) {
+ for (String prop : conf.getTrimmedStringCollection(
+ MRJobConfig.MR_JOB_REDACTED_PROPERTIES)) {
+ conf.set(prop, REDACTION_REPLACEMENT_VAL);
+ }
+ }
+
+ /**
+ * There is no reason to instantiate this utility class.
+ */
+ private MRJobConfUtil() {
+ }
+}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/599146d1/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml
index 4c29c28..e9cb3f9 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml
@@ -2167,4 +2167,12 @@
app master.
</description>
</property>
+
+<property>
+ <description>
+ The list of job configuration properties whose value will be redacted.
+ </description>
+ <name>mapreduce.job.redacted-properties</name>
+ <value></value>
+</property>
</configuration>
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org