You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ki...@apache.org on 2020/08/13 23:55:19 UTC

[hadoop] branch branch-2.10 updated: MAPREDUCE-7069. Add ability to specify user environment variables individually. Contributed by Jim Brennan

This is an automated email from the ASF dual-hosted git repository.

kihwal pushed a commit to branch branch-2.10
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/branch-2.10 by this push:
     new 283def8  MAPREDUCE-7069. Add ability to specify user environment variables individually. Contributed by Jim Brennan
283def8 is described below

commit 283def8bc5f16b139c0b35ad924085ee3baf9631
Author: Kihwal Lee <ki...@apache.org>
AuthorDate: Thu Aug 13 18:54:31 2020 -0500

    MAPREDUCE-7069. Add ability to specify user environment variables individually. Contributed by Jim Brennan
    
    (cherry picked from commit 4571351cccf6d4977469d3d623cf045b06a5f5f0)
---
 .../apache/hadoop/mapred/MapReduceChildJVM.java    |  73 +++++------
 .../mapreduce/v2/app/job/impl/TaskAttemptImpl.java |   8 +-
 .../v2/app/job/impl/TestMapReduceChildJVM.java     |  24 +++-
 .../apache/hadoop/mapreduce/v2/util/MRApps.java    |  10 ++
 .../java/org/apache/hadoop/mapred/JobConf.java     |  18 +++
 .../src/main/resources/mapred-default.xml          |  61 +++++++--
 .../src/site/markdown/MapReduceTutorial.md         |   6 +
 .../java/org/apache/hadoop/mapred/YARNRunner.java  |  11 +-
 .../org/apache/hadoop/mapred/TestYARNRunner.java   |  26 +++-
 .../java/org/apache/hadoop/yarn/util/Apps.java     | 115 ++++++++++++++---
 .../java/org/apache/hadoop/yarn/util/TestApps.java | 136 +++++++++++++++++++++
 11 files changed, 407 insertions(+), 81 deletions(-)

diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/MapReduceChildJVM.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/MapReduceChildJVM.java
index 817b3a5..1a427a0 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/MapReduceChildJVM.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/MapReduceChildJVM.java
@@ -19,7 +19,6 @@
 package org.apache.hadoop.mapred;
 
 import java.net.InetSocketAddress;
-import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Vector;
@@ -27,7 +26,6 @@ import java.util.Vector;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapred.TaskLog.LogName;
 import org.apache.hadoop.mapreduce.MRJobConfig;
-import org.apache.hadoop.mapreduce.TypeConverter;
 import org.apache.hadoop.mapreduce.v2.util.MRApps;
 import org.apache.hadoop.yarn.api.ApplicationConstants;
 import org.apache.hadoop.yarn.api.ApplicationConstants.Environment;
@@ -41,50 +39,53 @@ public class MapReduceChildJVM {
         filter.toString();
   }
 
-  private static String getChildEnv(JobConf jobConf, boolean isMap) {
+  private static String getChildEnvProp(JobConf jobConf, boolean isMap) {
     if (isMap) {
-      return jobConf.get(JobConf.MAPRED_MAP_TASK_ENV,
-          jobConf.get(JobConf.MAPRED_TASK_ENV));
+      return JobConf.MAPRED_MAP_TASK_ENV;
     }
-    return jobConf.get(JobConf.MAPRED_REDUCE_TASK_ENV,
-        jobConf.get(JobConf.MAPRED_TASK_ENV));
+    return JobConf.MAPRED_REDUCE_TASK_ENV;
+  }
+
+  private static String getChildEnvDefaultValue(JobConf jobConf) {
+    // There is no default value for these - use the fallback value instead.
+    return jobConf.get(JobConf.MAPRED_TASK_ENV);
   }
 
   public static void setVMEnv(Map<String, String> environment,
       Task task) {
 
     JobConf conf = task.conf;
-    // Add the env variables passed by the user
-    String mapredChildEnv = getChildEnv(conf, task.isMapTask());
-    MRApps.setEnvFromInputString(environment, mapredChildEnv, conf);
-
-    // Set logging level in the environment.
-    // This is so that, if the child forks another "bin/hadoop" (common in
-    // streaming) it will have the correct loglevel.
-    environment.put(
-        "HADOOP_ROOT_LOGGER", 
-        MRApps.getChildLogLevel(conf, task.isMapTask()) + ",console");
-
-    // TODO: The following is useful for instance in streaming tasks. Should be
-    // set in ApplicationMaster's env by the RM.
-    String hadoopClientOpts = System.getenv("HADOOP_CLIENT_OPTS");
-    if (hadoopClientOpts == null) {
-      hadoopClientOpts = "";
-    } else {
-      hadoopClientOpts = hadoopClientOpts + " ";
+    boolean isMap = task.isMapTask();
+
+    // Remove these before adding the user variables to prevent
+    // MRApps.setEnvFromInputProperty() from appending to them.
+    String hadoopRootLoggerKey = "HADOOP_ROOT_LOGGER";
+    String hadoopClientOptsKey = "HADOOP_CLIENT_OPTS";
+    environment.remove(hadoopRootLoggerKey);
+    environment.remove(hadoopClientOptsKey);
+
+    // Add the environment variables passed by the user
+    MRApps.setEnvFromInputProperty(environment, getChildEnvProp(conf, isMap),
+        getChildEnvDefaultValue(conf), conf);
+
+    // Set HADOOP_ROOT_LOGGER and HADOOP_CLIENTS if the user did not set them.
+    if (!environment.containsKey(hadoopRootLoggerKey)) {
+      // Set the value for logging level in the environment.
+      // This is so that, if the child forks another "bin/hadoop" (common in
+      // streaming) it will have the correct loglevel.
+      environment.put(hadoopRootLoggerKey,
+          MRApps.getChildLogLevel(conf, task.isMapTask()) + ",console");
     }
-    environment.put("HADOOP_CLIENT_OPTS", hadoopClientOpts);
-    
-    // setEnvFromInputString above will add env variable values from
-    // mapredChildEnv to existing variables. We want to overwrite
-    // HADOOP_ROOT_LOGGER and HADOOP_CLIENT_OPTS if the user set it explicitly.
-    Map<String, String> tmpEnv = new HashMap<String, String>();
-    MRApps.setEnvFromInputString(tmpEnv, mapredChildEnv, conf);
-    String[] keys = { "HADOOP_ROOT_LOGGER", "HADOOP_CLIENT_OPTS" };
-    for (String key : keys) {
-      if (tmpEnv.containsKey(key)) {
-        environment.put(key, tmpEnv.get(key));
+    if (!environment.containsKey(hadoopClientOptsKey)) {
+      // TODO: The following is useful for instance in streaming tasks.
+      // Should be set in ApplicationMaster's env by the RM.
+      String hadoopClientOptsValue = System.getenv(hadoopClientOptsKey);
+      if (hadoopClientOptsValue == null) {
+        hadoopClientOptsValue = "";
+      } else {
+        hadoopClientOptsValue = hadoopClientOptsValue + " ";
       }
+      environment.put(hadoopClientOptsKey, hadoopClientOptsValue);
     }
 
     // Add stdout/stderr env
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java
index 3f37d4d..3020b14 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java
@@ -963,10 +963,10 @@ public abstract class TaskAttemptImpl implements
         MRApps.crossPlatformifyMREnv(conf, Environment.PWD), conf);
 
     // Add the env variables passed by the admin
-    MRApps.setEnvFromInputString(environment,
-        conf.get(MRJobConfig.MAPRED_ADMIN_USER_ENV,
-            MRJobConfig.DEFAULT_MAPRED_ADMIN_USER_ENV),
-        conf);
+    MRApps.setEnvFromInputProperty(environment,
+        MRJobConfig.MAPRED_ADMIN_USER_ENV,
+        MRJobConfig.DEFAULT_MAPRED_ADMIN_USER_ENV, conf);
+
     return environment;
   }
 
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestMapReduceChildJVM.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestMapReduceChildJVM.java
index b1e9cf0..07ed7cb 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestMapReduceChildJVM.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestMapReduceChildJVM.java
@@ -217,11 +217,11 @@ public class TestMapReduceChildJVM {
     app.verifyCompleted();
     
     Assert.assertTrue("HADOOP_ROOT_LOGGER not set for job",
-      app.cmdEnvironment.containsKey("HADOOP_ROOT_LOGGER"));
+        app.cmdEnvironment.containsKey("HADOOP_ROOT_LOGGER"));
     Assert.assertEquals("WARN,console",
-      app.cmdEnvironment.get("HADOOP_ROOT_LOGGER"));
+        app.cmdEnvironment.get("HADOOP_ROOT_LOGGER"));
     Assert.assertTrue("HADOOP_CLIENT_OPTS not set for job",
-      app.cmdEnvironment.containsKey("HADOOP_CLIENT_OPTS"));
+        app.cmdEnvironment.containsKey("HADOOP_CLIENT_OPTS"));
     Assert.assertEquals("test", app.cmdEnvironment.get("HADOOP_CLIENT_OPTS"));
 
     // Try one more.
@@ -233,8 +233,22 @@ public class TestMapReduceChildJVM {
     app.verifyCompleted();
     
     Assert.assertTrue("HADOOP_ROOT_LOGGER not set for job",
-      app.cmdEnvironment.containsKey("HADOOP_ROOT_LOGGER"));
+        app.cmdEnvironment.containsKey("HADOOP_ROOT_LOGGER"));
     Assert.assertEquals("trace",
-      app.cmdEnvironment.get("HADOOP_ROOT_LOGGER"));
+        app.cmdEnvironment.get("HADOOP_ROOT_LOGGER"));
+
+    // Try one using the mapreduce.task.env.var=value syntax
+    app = new MyMRApp(1, 0, true, this.getClass().getName(), true);
+    conf = new Configuration();
+    conf.set(JobConf.MAPRED_MAP_TASK_ENV + ".HADOOP_ROOT_LOGGER",
+        "DEBUG,console");
+    job = app.submit(conf);
+    app.waitForState(job, JobState.SUCCEEDED);
+    app.verifyCompleted();
+
+    Assert.assertTrue("HADOOP_ROOT_LOGGER not set for job",
+        app.cmdEnvironment.containsKey("HADOOP_ROOT_LOGGER"));
+    Assert.assertEquals("DEBUG,console",
+        app.cmdEnvironment.get("HADOOP_ROOT_LOGGER"));
   }
 }
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java
index 5711464..f0929f6 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java
@@ -707,6 +707,16 @@ public class MRApps extends Apps {
     Apps.setEnvFromInputString(env, envString, classPathSeparator);
   }
 
+  public static void setEnvFromInputProperty(Map<String, String> env,
+      String propName, String defaultPropValue, Configuration conf) {
+    String classPathSeparator =
+        conf.getBoolean(MRConfig.MAPREDUCE_APP_SUBMISSION_CROSS_PLATFORM,
+            MRConfig.DEFAULT_MAPREDUCE_APP_SUBMISSION_CROSS_PLATFORM)
+            ? ApplicationConstants.CLASS_PATH_SEPARATOR : File.pathSeparator;
+    Apps.setEnvFromInputProperty(env, propName, defaultPropValue, conf,
+        classPathSeparator);
+  }
+
   @Public
   @Unstable
   public static void addToEnvironment(Map<String, String> environment,
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java
index 08581e4..a16813e 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java
@@ -312,6 +312,15 @@ public class JobConf extends Configuration {
    *   <li> B=$X:c This is inherit tasktracker's X env variable on Linux. </li>
    *   <li> B=%X%;c This is inherit tasktracker's X env variable on Windows. </li>
    * </ul>
+   *
+   * You can also add environment variables individually by appending
+   * <code>.VARNAME</code> to this configuration key, where VARNAME is
+   * the name of the environment variable.
+   *
+   * Example:
+   * <ul>
+   *   <li>mapreduce.map.env.VARNAME=value</li>
+   * </ul>
    */
   public static final String MAPRED_MAP_TASK_ENV = JobContext.MAP_ENV;
   
@@ -328,6 +337,15 @@ public class JobConf extends Configuration {
    *   <li> B=$X:c This is inherit tasktracker's X env variable on Linux. </li>
    *   <li> B=%X%;c This is inherit tasktracker's X env variable on Windows. </li>
    * </ul>
+   *
+   * You can also add environment variables individually by appending
+   * <code>.VARNAME</code> to this configuration key, where VARNAME is
+   * the name of the environment variable.
+   *
+   * Example:
+   * <ul>
+   *   <li>mapreduce.reduce.env.VARNAME=value</li>
+   * </ul>
    */
   public static final String MAPRED_REDUCE_TASK_ENV = JobContext.REDUCE_ENV;
 
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml
index 8b133ed..bc85716 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml
@@ -356,11 +356,22 @@
 <property>
   <name>mapred.child.env</name>
   <value></value>
-  <description>User added environment variables for the task processes.
+  <description>User added environment variables for the task processes,
+  specified as a comma separated list.
   Example :
   1) A=foo  This will set the env variable A to foo
   2) B=$B:c This is inherit nodemanager's B env variable on Unix.
   3) B=%B%;c This is inherit nodemanager's B env variable on Windows.
+
+  To specify a comma separated list of environment variables specifically for
+  map or reduce tasks, use the mapreduce.map.env or mapreduce.reduce.env
+  properties.
+
+  To define environment variables individually for map or reduce tasks,
+  you can specify multiple properties of the form mapreduce.map.env.VARNAME
+  or mapreduce.reduce.env.VARNAME, where VARNAME is the name of the
+  environment variable. This is the only way to add a variable when its value
+  contains commas.
   </description>
 </property>
 
@@ -368,7 +379,15 @@
 <property>
   <name>mapreduce.map.env</name>
   <value></value>
-  <description>User added environment variables for the map task processes.
+  <description>User added environment variables for the map task processes,
+  specified as a comma separated list.
+  Example:
+    VAR1=value1,VAR2=value2
+
+  To define environment variables individually, you can specify
+  multiple properties of the form mapreduce.map.env.VARNAME,
+  where VARNAME is the name of the environment variable. This is the only
+  way to add a variable when its value contains commas.
   </description>
 </property>
 -->
@@ -377,7 +396,16 @@
 <property>
   <name>mapreduce.reduce.env</name>
   <value></value>
-  <description>User added environment variables for the reduce task processes.
+  <description>User added environment variables for the reduce task processes,
+  specified as a comma separated list.
+  Example:
+    VAR1=value1,VAR2=value2
+
+  To define environment variables individually, you can specify
+  multiple properties of the form mapreduce.reduce.env.VARNAME,
+  where VARNAME is the name of the environment variable. This is the only
+  way to add a variable when its value contains commas.
+  contains commas.
   </description>
 </property>
 -->
@@ -391,9 +419,14 @@
   You must preserve the original value if you want your map and
   reduce tasks to have access to native libraries (compression, etc).
   When this value is empty, the command to set execution
-  envrionment will be OS dependent:
+  environment will be OS dependent:
   For linux, use LD_LIBRARY_PATH=$HADOOP_COMMON_HOME/lib/native.
   For windows, use PATH = %PATH%;%HADOOP_COMMON_HOME%\\bin.
+
+  To define environment variables individually, you can specify
+  multiple properties of the form mapreduce.admin.user.env.VARNAME,
+  where VARNAME is the name of the environment variable. This is the only
+  way to add a variable when its value contains commas.
   </description>
 </property>
 
@@ -1363,20 +1396,32 @@
   <name>yarn.app.mapreduce.am.env</name>
   <value></value>
   <description>User added environment variables for the MR App Master
-  processes. Example :
+  processes, specified as a comma separated list.
+  Example :
   1) A=foo  This will set the env variable A to foo
   2) B=$B:c This is inherit tasktracker's B env variable.
+
+  To define environment variables individually, you can specify
+  multiple properties of the form yarn.app.mapreduce.am.env.VARNAME,
+  where VARNAME is the name of the environment variable. This is the only
+  way to add a variable when its value contains commas.
   </description>
 </property>
 
 <property>
   <name>yarn.app.mapreduce.am.admin.user.env</name>
   <value></value>
-  <description> Environment variables for the MR App Master
-  processes for admin purposes. These values are set first and can be
-  overridden by the user env (yarn.app.mapreduce.am.env) Example :
+  <description>Environment variables for the MR App Master
+  processes for admin purposes, specified as a comma separated list
+  These values are set first and can be overridden by the user env
+  (yarn.app.mapreduce.am.env). Example :
   1) A=foo  This will set the env variable A to foo
   2) B=$B:c This is inherit app master's B env variable.
+
+  To define environment variables individually, you can specify
+  multiple properties of the form yarn.app.mapreduce.am.admin.user.env.VARNAME,
+  where VARNAME is the name of the environment variable. This is the only
+  way to add a variable when its value contains commas.
   </description>
 </property>
 
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/MapReduceTutorial.md b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/MapReduceTutorial.md
index 57eb2ad..4d039bc 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/MapReduceTutorial.md
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/MapReduceTutorial.md
@@ -206,6 +206,12 @@ For example,
 
 Here, the files dir1/dict.txt and dir2/dict.txt can be accessed by tasks using the symbolic names dict1 and dict2 respectively. The archive mytar.tgz will be placed and unarchived into a directory by the name "tgzdir".
 
+Applications can specify environment variables for mapper, reducer, and application master tasks by specifying them on the command line using the options -Dmapreduce.map.env, -Dmapreduce.reduce.env, and -Dyarn.app.mapreduce.am.env, respectively.
+
+For example the following sets environment variables FOO_VAR=bar and LIST_VAR=a,b,c for the mappers and reducers,
+
+    bin/hadoop jar hadoop-mapreduce-examples-<ver>.jar wordcount -Dmapreduce.map.env.FOO_VAR=bar -Dmapreduce.map.env.LIST_VAR=a,b,c -Dmapreduce.reduce.env.FOO_VAR=bar -Dmapreduce.reduce.env.LIST_VAR=a,b,c input output
+
 ### Walk-through
 
 The `WordCount` application is quite straight-forward.
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java
index 12a3079..686d6fb 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java
@@ -533,12 +533,13 @@ public class YARNRunner implements ClientProtocol {
         MRApps.crossPlatformifyMREnv(conf, Environment.PWD), conf);
 
     // Setup the environment variables for Admin first
-    MRApps.setEnvFromInputString(environment,
-        conf.get(MRJobConfig.MR_AM_ADMIN_USER_ENV,
-            MRJobConfig.DEFAULT_MR_AM_ADMIN_USER_ENV), conf);
+    MRApps.setEnvFromInputProperty(environment,
+        MRJobConfig.MR_AM_ADMIN_USER_ENV,
+        MRJobConfig.DEFAULT_MR_AM_ADMIN_USER_ENV,
+        conf);
     // Setup the environment variables (LD_LIBRARY_PATH, etc)
-    MRApps.setEnvFromInputString(environment,
-        conf.get(MRJobConfig.MR_AM_ENV), conf);
+    MRApps.setEnvFromInputProperty(environment, MRJobConfig.MR_AM_ENV, null,
+        conf);
 
     // Parse distributed cache
     MRApps.setupDistributedCache(jobConf, localResources);
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestYARNRunner.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestYARNRunner.java
index ecb396e..98842bd 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestYARNRunner.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestYARNRunner.java
@@ -811,15 +811,22 @@ public class TestYARNRunner {
 
   @Test
   public void testAMStandardEnvWithDefaultLibPath() throws Exception {
-    testAMStandardEnv(false);
+    testAMStandardEnv(false, false);
   }
 
   @Test
   public void testAMStandardEnvWithCustomLibPath() throws Exception {
-    testAMStandardEnv(true);
+    testAMStandardEnv(true, false);
   }
 
-  private void testAMStandardEnv(boolean customLibPath) throws Exception {
+  @Test
+  public void testAMStandardEnvWithCustomLibPathWithSeparateEnvProps()
+      throws Exception {
+    testAMStandardEnv(true, true);
+  }
+
+  private void testAMStandardEnv(boolean customLibPath,
+      boolean useSeparateEnvProps) throws Exception {
     // the Windows behavior is different and this test currently doesn't really
     // apply
     // MAPREDUCE-6588 should revisit this test
@@ -834,9 +841,16 @@ public class TestYARNRunner {
     String pathKey = Environment.LD_LIBRARY_PATH.name();
 
     if (customLibPath) {
-      jobConf.set(MRJobConfig.MR_AM_ADMIN_USER_ENV, pathKey + "=" +
-          ADMIN_LIB_PATH);
-      jobConf.set(MRJobConfig.MR_AM_ENV, pathKey + "=" + USER_LIB_PATH);
+      if (useSeparateEnvProps) {
+        // Specify these as individual variables instead of k=v lists
+        jobConf.set(MRJobConfig.MR_AM_ADMIN_USER_ENV + "." + pathKey,
+            ADMIN_LIB_PATH);
+        jobConf.set(MRJobConfig.MR_AM_ENV + "." + pathKey, USER_LIB_PATH);
+      } else {
+        jobConf.set(MRJobConfig.MR_AM_ADMIN_USER_ENV, pathKey + "=" +
+            ADMIN_LIB_PATH);
+        jobConf.set(MRJobConfig.MR_AM_ENV, pathKey + "=" + USER_LIB_PATH);
+      }
     }
     jobConf.set(MRJobConfig.MAPRED_ADMIN_USER_SHELL, USER_SHELL);
 
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java
index aa629ac..4c62134 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Apps.java
@@ -24,6 +24,7 @@ import static org.apache.hadoop.yarn.util.StringHelper.sjoin;
 
 import java.io.File;
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.Iterator;
 import java.util.Map;
 import java.util.regex.Matcher;
@@ -32,6 +33,7 @@ import java.util.regex.Pattern;
 import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.classification.InterfaceAudience.Public;
 import org.apache.hadoop.classification.InterfaceStability.Unstable;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.util.Shell;
 import org.apache.hadoop.util.StringInterner;
 import org.apache.hadoop.yarn.api.ApplicationConstants;
@@ -79,34 +81,113 @@ public class Apps {
     throw new YarnRuntimeException(join("Error parsing ", name, ": ", s));
   }
 
+  private static void setEnvFromString(Map<String, String> env,
+      String envVar, String varString, String classPathSeparator) {
+    Matcher m = VAR_SUBBER.matcher(varString);
+    StringBuffer sb = new StringBuffer();
+    while (m.find()) {
+      String var = m.group(1);
+      // do variable substitution of $var from passed in environment or from
+      // system environment and default to empty string if undefined in both.
+      String replace = env.get(var);
+      if (replace == null) {
+        replace = System.getenv(var);
+      }
+      if (replace == null) {
+        replace = "";
+      }
+      m.appendReplacement(sb, Matcher.quoteReplacement(replace));
+    }
+    m.appendTail(sb);
+    addToEnvironment(env, envVar, sb.toString(), classPathSeparator);
+  }
+
   public static void setEnvFromInputString(Map<String, String> env,
       String envString,  String classPathSeparator) {
     if (envString != null && envString.length() > 0) {
       Matcher varValMatcher = VARVAL_SPLITTER.matcher(envString);
       while (varValMatcher.find()) {
         String envVar = varValMatcher.group(1);
-        Matcher m = VAR_SUBBER.matcher(varValMatcher.group(2));
-        StringBuffer sb = new StringBuffer();
-        while (m.find()) {
-          String var = m.group(1);
-          // replace $env with the child's env constructed by tt's
-          String replace = env.get(var);
-          // if this key is not configured by the tt for the child .. get it
-          // from the tt's env
-          if (replace == null)
-            replace = System.getenv(var);
-          // the env key is note present anywhere .. simply set it
-          if (replace == null)
-            replace = "";
-          m.appendReplacement(sb, Matcher.quoteReplacement(replace));
-        }
-        m.appendTail(sb);
-        addToEnvironment(env, envVar, sb.toString(), classPathSeparator);
+        String varString = varValMatcher.group(2);
+        setEnvFromString(env, envVar, varString, classPathSeparator);
+      }
+    }
+  }
+
+  /**
+   * Set environment from string without doing any variable substitution.
+   * Used internally to avoid double expansion.
+   * @param env environment to set
+   * @param envString comma-separated k=v pairs.
+   * @param classPathSeparator Separator to use when appending to an existing
+   *                           environment variable.
+   */
+  private static void setEnvFromInputStringNoExpand(Map<String, String> env,
+      String envString,  String classPathSeparator) {
+    if (envString != null && envString.length() > 0) {
+      Matcher varValMatcher = VARVAL_SPLITTER.matcher(envString);
+      while (varValMatcher.find()) {
+        String envVar = varValMatcher.group(1);
+        String varString = varValMatcher.group(2);
+        addToEnvironment(env, envVar, varString, classPathSeparator);
       }
     }
   }
 
   /**
+   * Set environment variables from map of input properties.
+   * @param env environment to update
+   * @param inputMap environment variable property keys and values
+   * @param classPathSeparator separator to use when appending to an existing
+   *                           environment variable
+   */
+  private static void setEnvFromInputStringMap(Map<String, String> env,
+      Map<String, String> inputMap, String classPathSeparator) {
+    for(Map.Entry<String, String> inputVar: inputMap.entrySet()) {
+      String envVar = inputVar.getKey();
+      String varString = inputVar.getValue();
+      setEnvFromString(env, envVar, varString, classPathSeparator);
+    }
+  }
+
+  /**
+   * Set environment variables from the given environment input property.
+   * For example, given the property mapreduce.map.env, this method
+   * will extract environment variables from:
+   *    the comma-separated string value of mapreduce.map.env, and
+   *    the values of any properties of the form mapreduce.map.env.VAR_NAME
+   * Variables specified via the latter syntax take precedence over those
+   * specified using the former syntax.
+   * @param env the environment to update
+   * @param propName the name of the property
+   * @param defaultPropValue the default value for propName
+   * @param conf configuration containing properties
+   * @param classPathSeparator Separator used when appending to an existing var
+   */
+  public static void setEnvFromInputProperty(Map<String, String> env,
+      String propName, String defaultPropValue, Configuration conf,
+      String classPathSeparator) {
+
+    String envString = conf.get(propName, defaultPropValue);
+
+    // Get k,v pairs from string into a tmp env. Note that we don't want
+    // to expand the env var values, because we will do that below -
+    // don't want to do it twice.
+    Map<String, String> tmpEnv = new HashMap<String, String>();
+    Apps.setEnvFromInputStringNoExpand(tmpEnv, envString, classPathSeparator);
+
+    // Get map of props with prefix propName.
+    // (e.g., map.reduce.env.ENV_VAR_NAME=value)
+    Map<String, String> inputMap = conf.getPropsWithPrefix(propName + ".");
+
+    // Entries from map should override entries from input string.
+    tmpEnv.putAll(inputMap);
+
+    // Add them to the environment
+    setEnvFromInputStringMap(env, tmpEnv, classPathSeparator);
+  }
+
+  /**
    *
    * @param envString String containing env variable definitions
    * @param classPathSeparator String that separates the definitions
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestApps.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestApps.java
index 66d2d23..0d65d65 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestApps.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestApps.java
@@ -17,6 +17,7 @@
 */
 package org.apache.hadoop.yarn.util;
 
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.util.Shell;
 import org.junit.Test;
 
@@ -25,6 +26,8 @@ import java.util.HashMap;
 import java.util.Map;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
 
 public class TestApps {
   @Test
@@ -58,4 +61,137 @@ public class TestApps {
     assertEquals("=", environment.get("e1"));
     assertEquals("a1=a2", environment.get("e2"));
   }
+
+  @Test
+  public void testSetEnvFromInputProperty() {
+    Configuration conf = new Configuration(false);
+    Map<String, String> env = new HashMap<>();
+    String propName = "mapreduce.map.env";
+    String defaultPropName = "mapreduce.child.env";
+    // Setup environment input properties
+    conf.set(propName, "env1=env1_val,env2=env2_val,env3=env3_val");
+    conf.set(propName + ".env4", "env4_val");
+    conf.set(propName + ".env2", "new_env2_val");
+    // Setup some default values - we shouldn't see these values
+    conf.set(defaultPropName, "env1=def1_val,env2=def2_val,env3=def3_val");
+    String defaultPropValue = conf.get(defaultPropName);
+    // These should never be referenced.
+    conf.set(defaultPropName + ".env4", "def4_val");
+    conf.set(defaultPropName + ".env2", "new_def2_val");
+    Apps.setEnvFromInputProperty(env, propName, defaultPropValue, conf,
+        File.pathSeparator);
+    // Check values from string
+    assertEquals("env1_val", env.get("env1"));
+    assertEquals("env3_val", env.get("env3"));
+    // Check individual value
+    assertEquals("env4_val", env.get("env4"));
+    // Check individual value that eclipses one in string
+    assertEquals("new_env2_val", env.get("env2"));
+  }
+
+  @Test
+  public void testSetEnvFromInputPropertyDefault() {
+    Configuration conf = new Configuration(false);
+    Map<String, String> env = new HashMap<>();
+    String propName = "mapreduce.map.env";
+    String defaultPropName = "mapreduce.child.env";
+    // Setup environment input properties
+    conf.set(propName, "env1=env1_val,env2=env2_val,env3=env3_val");
+    conf.set(propName + ".env4", "env4_val");
+    conf.set(propName + ".env2", "new_env2_val");
+    // Setup some default values
+    conf.set(defaultPropName, "env1=def1_val,env2=def2_val,env3=def3_val");
+    String defaultPropValue = conf.get(defaultPropName);
+    // These should never be referenced.
+    conf.set(defaultPropName + ".env4", "def4_val");
+    conf.set(defaultPropName + ".env2", "new_def2_val");
+
+    // Test using default value for the string.
+    // Individually specified env properties do not have defaults,
+    // so this should just get things from the defaultPropName string.
+    String bogusProp = propName + "bogus";
+    Apps.setEnvFromInputProperty(env, bogusProp, defaultPropValue, conf,
+        File.pathSeparator);
+    // Check values from string
+    assertEquals("def1_val", env.get("env1"));
+    assertEquals("def2_val", env.get("env2"));
+    assertEquals("def3_val", env.get("env3"));
+    // Check individual value is not set.
+    assertNull(env.get("env4"));
+  }
+
+  @Test
+  public void testSetEnvFromInputPropertyOverrideDefault() {
+    Configuration conf = new Configuration(false);
+    Map<String, String> env = new HashMap<>();
+
+    // Try using default value, but specify some individual values using
+    // the main propName, but no main value, so it should get values from
+    // the default string, and then the individual values.
+    String propName = "mapreduce.reduce.env";
+    conf.set(propName + ".env2", "new2_val");
+    conf.set(propName + ".env4", "new4_val");
+    // Setup some default values - we shouldn't see these values
+    String defaultPropName = "mapreduce.child.env";
+    conf.set(defaultPropName, "env1=def1_val,env2=def2_val,env3=def3_val");
+    String defaultPropValue = conf.get(defaultPropName);
+    // These should never be referenced.
+    conf.set(defaultPropName + ".env4", "def4_val");
+    conf.set(defaultPropName + ".env2", "new_def2_val");
+    Apps.setEnvFromInputProperty(env, propName, defaultPropValue, conf,
+        File.pathSeparator);
+
+    // Check values from string
+    assertEquals("def1_val", env.get("env1"));
+    assertEquals("def3_val", env.get("env3"));
+    // Check individual value
+    assertEquals("new4_val", env.get("env4"));
+    // Check individual value that eclipses one in string
+    assertEquals("new2_val", env.get("env2"));
+  }
+
+  @Test
+  public void testSetEnvFromInputPropertyCommas() {
+    Configuration conf = new Configuration(false);
+    Map<String, String> env = new HashMap<>();
+    String propName = "mapreduce.reduce.env";
+    conf.set(propName, "env1=env1_val,env2=env2_val,env3=env3_val");
+    conf.set(propName + ".env2", "new2_val1,new2_val2,new2_val3");
+    conf.set(propName + ".env4", "new4_valwith=equals");
+    // Setup some default values - we shouldn't see these values
+    String defaultPropName = "mapreduce.child.env";
+    conf.set(defaultPropName, "env1=def1_val,env2=def2_val,env3=def3_val");
+    String defaultPropValue = conf.get(defaultPropName);
+
+    Apps.setEnvFromInputProperty(env, propName, defaultPropValue, conf,
+        File.pathSeparator);
+    // Check values from string
+    assertEquals("env1_val", env.get("env1"));
+    assertEquals("env3_val", env.get("env3"));
+    // Check individual value
+    assertEquals("new4_valwith=equals", env.get("env4"));
+    // Check individual value that eclipses one in string
+    assertEquals("new2_val1,new2_val2,new2_val3", env.get("env2"));
+  }
+
+  @Test
+  public void testSetEnvFromInputPropertyNull() {
+    Configuration conf = new Configuration(false);
+    Map<String, String> env = new HashMap<>();
+    String propName = "mapreduce.map.env";
+    String defaultPropName = "mapreduce.child.env";
+    // Setup environment input properties
+    conf.set(propName, "env1=env1_val,env2=env2_val,env3=env3_val");
+    conf.set(propName + ".env4", "env4_val");
+    conf.set(propName + ".env2", "new_env2_val");
+    // Setup some default values - we shouldn't see these values
+    conf.set(defaultPropName, "env1=def1_val,env2=def2_val,env3=def3_val");
+    String defaultPropValue = conf.get(defaultPropName);
+    // These should never be referenced.
+    conf.set(defaultPropName + ".env4", "def4_val");
+    conf.set(defaultPropName + ".env2", "new_def2_val");
+    // Try with null inputs
+    Apps.setEnvFromInputProperty(env, "bogus1", null, conf, File.pathSeparator);
+    assertTrue(env.isEmpty());
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org