You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by va...@apache.org on 2017/03/10 21:34:12 UTC
spark git commit: [SPARK-17979][SPARK-14453] Remove deprecated
SPARK_YARN_USER_ENV and SPARK_JAVA_OPTS
Repository: spark
Updated Branches:
refs/heads/master dd9049e04 -> 8f0490e22
[SPARK-17979][SPARK-14453] Remove deprecated SPARK_YARN_USER_ENV and SPARK_JAVA_OPTS
This fix removes deprecated support for config `SPARK_YARN_USER_ENV`, as is mentioned in SPARK-17979.
This fix also removes deprecated support for the following:
```
SPARK_YARN_USER_ENV
SPARK_JAVA_OPTS
SPARK_CLASSPATH
SPARK_WORKER_INSTANCES
```
Related JIRA:
[SPARK-14453]: https://issues.apache.org/jira/browse/SPARK-14453
[SPARK-12344]: https://issues.apache.org/jira/browse/SPARK-12344
[SPARK-15781]: https://issues.apache.org/jira/browse/SPARK-15781
Existing tests should pass.
Author: Yong Tang <yo...@outlook.com>
Closes #17212 from yongtang/SPARK-17979.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8f0490e2
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8f0490e2
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8f0490e2
Branch: refs/heads/master
Commit: 8f0490e22b4c7f1fdf381c70c5894d46b7f7e6fb
Parents: dd9049e
Author: Yong Tang <yo...@outlook.com>
Authored: Fri Mar 10 13:33:58 2017 -0800
Committer: Marcelo Vanzin <va...@cloudera.com>
Committed: Fri Mar 10 13:34:01 2017 -0800
----------------------------------------------------------------------
conf/spark-env.sh.template | 3 -
.../main/scala/org/apache/spark/SparkConf.scala | 65 --------------------
.../spark/deploy/FaultToleranceTest.scala | 3 +-
.../spark/launcher/WorkerCommandBuilder.scala | 1 -
docs/rdd-programming-guide.md | 2 +-
.../spark/launcher/AbstractCommandBuilder.java | 1 -
.../launcher/SparkClassCommandBuilder.java | 2 -
.../launcher/SparkSubmitCommandBuilder.java | 1 -
.../MesosCoarseGrainedSchedulerBackend.scala | 5 --
.../MesosFineGrainedSchedulerBackend.scala | 4 --
.../org/apache/spark/deploy/yarn/Client.scala | 39 +-----------
.../spark/deploy/yarn/ExecutorRunnable.scala | 8 ---
12 files changed, 3 insertions(+), 131 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/8f0490e2/conf/spark-env.sh.template
----------------------------------------------------------------------
diff --git a/conf/spark-env.sh.template b/conf/spark-env.sh.template
index 5c1e876..94bd2c4 100755
--- a/conf/spark-env.sh.template
+++ b/conf/spark-env.sh.template
@@ -25,12 +25,10 @@
# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
# - SPARK_PUBLIC_DNS, to set the public dns name of the driver program
-# - SPARK_CLASSPATH, default classpath entries to append
# Options read by executors and drivers running inside the cluster
# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
# - SPARK_PUBLIC_DNS, to set the public DNS name of the driver program
-# - SPARK_CLASSPATH, default classpath entries to append
# - SPARK_LOCAL_DIRS, storage directories to use on this node for shuffle and RDD data
# - MESOS_NATIVE_JAVA_LIBRARY, to point to your libmesos.so if you use Mesos
@@ -48,7 +46,6 @@
# - SPARK_WORKER_CORES, to set the number of cores to use on this machine
# - SPARK_WORKER_MEMORY, to set how much total memory workers have to give executors (e.g. 1000m, 2g)
# - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT, to use non-default ports for the worker
-# - SPARK_WORKER_INSTANCES, to set the number of worker processes per node
# - SPARK_WORKER_DIR, to set the working directory of worker processes
# - SPARK_WORKER_OPTS, to set config properties only for the worker (e.g. "-Dx=y")
# - SPARK_DAEMON_MEMORY, to allocate to the master, worker and history server themselves (default: 1g).
http://git-wip-us.apache.org/repos/asf/spark/blob/8f0490e2/core/src/main/scala/org/apache/spark/SparkConf.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala
index fe912e6..2a2ce05 100644
--- a/core/src/main/scala/org/apache/spark/SparkConf.scala
+++ b/core/src/main/scala/org/apache/spark/SparkConf.scala
@@ -518,71 +518,6 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
}
}
- // Check for legacy configs
- sys.env.get("SPARK_JAVA_OPTS").foreach { value =>
- val warning =
- s"""
- |SPARK_JAVA_OPTS was detected (set to '$value').
- |This is deprecated in Spark 1.0+.
- |
- |Please instead use:
- | - ./spark-submit with conf/spark-defaults.conf to set defaults for an application
- | - ./spark-submit with --driver-java-options to set -X options for a driver
- | - spark.executor.extraJavaOptions to set -X options for executors
- | - SPARK_DAEMON_JAVA_OPTS to set java options for standalone daemons (master or worker)
- """.stripMargin
- logWarning(warning)
-
- for (key <- Seq(executorOptsKey, driverOptsKey)) {
- if (getOption(key).isDefined) {
- throw new SparkException(s"Found both $key and SPARK_JAVA_OPTS. Use only the former.")
- } else {
- logWarning(s"Setting '$key' to '$value' as a work-around.")
- set(key, value)
- }
- }
- }
-
- sys.env.get("SPARK_CLASSPATH").foreach { value =>
- val warning =
- s"""
- |SPARK_CLASSPATH was detected (set to '$value').
- |This is deprecated in Spark 1.0+.
- |
- |Please instead use:
- | - ./spark-submit with --driver-class-path to augment the driver classpath
- | - spark.executor.extraClassPath to augment the executor classpath
- """.stripMargin
- logWarning(warning)
-
- for (key <- Seq(executorClasspathKey, driverClassPathKey)) {
- if (getOption(key).isDefined) {
- throw new SparkException(s"Found both $key and SPARK_CLASSPATH. Use only the former.")
- } else {
- logWarning(s"Setting '$key' to '$value' as a work-around.")
- set(key, value)
- }
- }
- }
-
- if (!contains(sparkExecutorInstances)) {
- sys.env.get("SPARK_WORKER_INSTANCES").foreach { value =>
- val warning =
- s"""
- |SPARK_WORKER_INSTANCES was detected (set to '$value').
- |This is deprecated in Spark 1.0+.
- |
- |Please instead use:
- | - ./spark-submit with --num-executors to specify the number of executors
- | - Or set SPARK_EXECUTOR_INSTANCES
- | - spark.executor.instances to configure the number of instances in the spark config.
- """.stripMargin
- logWarning(warning)
-
- set("spark.executor.instances", value)
- }
- }
-
if (contains("spark.master") && get("spark.master").startsWith("yarn-")) {
val warning = s"spark.master ${get("spark.master")} is deprecated in Spark 2.0+, please " +
"instead use \"yarn\" with specified deploy mode."
http://git-wip-us.apache.org/repos/asf/spark/blob/8f0490e2/core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala b/core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala
index 320af5c..c6307da 100644
--- a/core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala
@@ -43,8 +43,7 @@ import org.apache.spark.util.{ThreadUtils, Utils}
* Execute using
* ./bin/spark-class org.apache.spark.deploy.FaultToleranceTest
*
- * Make sure that the environment includes the following properties in SPARK_DAEMON_JAVA_OPTS
- * *and* SPARK_JAVA_OPTS:
+ * Make sure that the environment includes the following properties in SPARK_DAEMON_JAVA_OPTS:
* - spark.deploy.recoveryMode=ZOOKEEPER
* - spark.deploy.zookeeper.url=172.17.42.1:2181
* Note that 172.17.42.1 is the default docker ip for the host and 2181 is the default ZK port.
http://git-wip-us.apache.org/repos/asf/spark/blob/8f0490e2/core/src/main/scala/org/apache/spark/launcher/WorkerCommandBuilder.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/launcher/WorkerCommandBuilder.scala b/core/src/main/scala/org/apache/spark/launcher/WorkerCommandBuilder.scala
index 3fd812e..4216b26 100644
--- a/core/src/main/scala/org/apache/spark/launcher/WorkerCommandBuilder.scala
+++ b/core/src/main/scala/org/apache/spark/launcher/WorkerCommandBuilder.scala
@@ -39,7 +39,6 @@ private[spark] class WorkerCommandBuilder(sparkHome: String, memoryMb: Int, comm
val cmd = buildJavaCommand(command.classPathEntries.mkString(File.pathSeparator))
cmd.add(s"-Xmx${memoryMb}M")
command.javaOpts.foreach(cmd.add)
- addOptionString(cmd, getenv("SPARK_JAVA_OPTS"))
cmd
}
http://git-wip-us.apache.org/repos/asf/spark/blob/8f0490e2/docs/rdd-programming-guide.md
----------------------------------------------------------------------
diff --git a/docs/rdd-programming-guide.md b/docs/rdd-programming-guide.md
index cad9ff4..e2bf2d7 100644
--- a/docs/rdd-programming-guide.md
+++ b/docs/rdd-programming-guide.md
@@ -457,7 +457,7 @@ If required, a Hadoop configuration can be passed in as a Python dict. Here is a
Elasticsearch ESInputFormat:
{% highlight python %}
-$ SPARK_CLASSPATH=/path/to/elasticsearch-hadoop.jar ./bin/pyspark
+$ ./bin/pyspark --jars /path/to/elasticsearch-hadoop.jar
>>> conf = {"es.resource" : "index/type"} # assume Elasticsearch is running on localhost defaults
>>> rdd = sc.newAPIHadoopRDD("org.elasticsearch.hadoop.mr.EsInputFormat",
"org.apache.hadoop.io.NullWritable",
http://git-wip-us.apache.org/repos/asf/spark/blob/8f0490e2/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java
----------------------------------------------------------------------
diff --git a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java
index bc8d603..6c0c3eb 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java
@@ -135,7 +135,6 @@ abstract class AbstractCommandBuilder {
String sparkHome = getSparkHome();
Set<String> cp = new LinkedHashSet<>();
- addToClassPath(cp, getenv("SPARK_CLASSPATH"));
addToClassPath(cp, appClassPath);
addToClassPath(cp, getConfDir());
http://git-wip-us.apache.org/repos/asf/spark/blob/8f0490e2/launcher/src/main/java/org/apache/spark/launcher/SparkClassCommandBuilder.java
----------------------------------------------------------------------
diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkClassCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/SparkClassCommandBuilder.java
index 8178684..7cf5b73 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/SparkClassCommandBuilder.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/SparkClassCommandBuilder.java
@@ -66,7 +66,6 @@ class SparkClassCommandBuilder extends AbstractCommandBuilder {
memKey = "SPARK_DAEMON_MEMORY";
break;
case "org.apache.spark.executor.CoarseGrainedExecutorBackend":
- javaOptsKeys.add("SPARK_JAVA_OPTS");
javaOptsKeys.add("SPARK_EXECUTOR_OPTS");
memKey = "SPARK_EXECUTOR_MEMORY";
break;
@@ -84,7 +83,6 @@ class SparkClassCommandBuilder extends AbstractCommandBuilder {
memKey = "SPARK_DAEMON_MEMORY";
break;
default:
- javaOptsKeys.add("SPARK_JAVA_OPTS");
memKey = "SPARK_DRIVER_MEMORY";
break;
}
http://git-wip-us.apache.org/repos/asf/spark/blob/8f0490e2/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
----------------------------------------------------------------------
diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
index 5e64fa7..5f2da03 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
@@ -240,7 +240,6 @@ class SparkSubmitCommandBuilder extends AbstractCommandBuilder {
addOptionString(cmd, System.getenv("SPARK_DAEMON_JAVA_OPTS"));
}
addOptionString(cmd, System.getenv("SPARK_SUBMIT_OPTS"));
- addOptionString(cmd, System.getenv("SPARK_JAVA_OPTS"));
// We don't want the client to specify Xmx. These have to be set by their corresponding
// memory flag --driver-memory or configuration entry spark.driver.memory
http://git-wip-us.apache.org/repos/asf/spark/blob/8f0490e2/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
----------------------------------------------------------------------
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
index 85c2e9c..c049a32 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
@@ -175,11 +175,6 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
def createCommand(offer: Offer, numCores: Int, taskId: String): CommandInfo = {
val environment = Environment.newBuilder()
- val extraClassPath = conf.getOption("spark.executor.extraClassPath")
- extraClassPath.foreach { cp =>
- environment.addVariables(
- Environment.Variable.newBuilder().setName("SPARK_CLASSPATH").setValue(cp).build())
- }
val extraJavaOpts = conf.get("spark.executor.extraJavaOptions", "")
// Set the environment variable through a command prefix
http://git-wip-us.apache.org/repos/asf/spark/blob/8f0490e2/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackend.scala
----------------------------------------------------------------------
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackend.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackend.scala
index 2152713..f198f88 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackend.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackend.scala
@@ -106,10 +106,6 @@ private[spark] class MesosFineGrainedSchedulerBackend(
throw new SparkException("Executor Spark home `spark.mesos.executor.home` is not set!")
}
val environment = Environment.newBuilder()
- sc.conf.getOption("spark.executor.extraClassPath").foreach { cp =>
- environment.addVariables(
- Environment.Variable.newBuilder().setName("SPARK_CLASSPATH").setValue(cp).build())
- }
val extraJavaOpts = sc.conf.getOption("spark.executor.extraJavaOptions").getOrElse("")
val prefixEnv = sc.conf.getOption("spark.executor.extraLibraryPath").map { p =>
http://git-wip-us.apache.org/repos/asf/spark/blob/8f0490e2/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
----------------------------------------------------------------------
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index e86bd54..ccb0f8f 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -748,14 +748,6 @@ private[spark] class Client(
.map { case (k, v) => (k.substring(amEnvPrefix.length), v) }
.foreach { case (k, v) => YarnSparkHadoopUtil.addPathToEnvironment(env, k, v) }
- // Keep this for backwards compatibility but users should move to the config
- sys.env.get("SPARK_YARN_USER_ENV").foreach { userEnvs =>
- // Allow users to specify some environment variables.
- YarnSparkHadoopUtil.setEnvFromInputString(env, userEnvs)
- // Pass SPARK_YARN_USER_ENV itself to the AM so it can use it to set up executor environments.
- env("SPARK_YARN_USER_ENV") = userEnvs
- }
-
// If pyFiles contains any .py files, we need to add LOCALIZED_PYTHON_DIR to the PYTHONPATH
// of the container processes too. Add all non-.py files directly to PYTHONPATH.
//
@@ -782,35 +774,7 @@ private[spark] class Client(
sparkConf.setExecutorEnv("PYTHONPATH", pythonPathStr)
}
- // In cluster mode, if the deprecated SPARK_JAVA_OPTS is set, we need to propagate it to
- // executors. But we can't just set spark.executor.extraJavaOptions, because the driver's
- // SparkContext will not let that set spark* system properties, which is expected behavior for
- // Yarn clients. So propagate it through the environment.
- //
- // Note that to warn the user about the deprecation in cluster mode, some code from
- // SparkConf#validateSettings() is duplicated here (to avoid triggering the condition
- // described above).
if (isClusterMode) {
- sys.env.get("SPARK_JAVA_OPTS").foreach { value =>
- val warning =
- s"""
- |SPARK_JAVA_OPTS was detected (set to '$value').
- |This is deprecated in Spark 1.0+.
- |
- |Please instead use:
- | - ./spark-submit with conf/spark-defaults.conf to set defaults for an application
- | - ./spark-submit with --driver-java-options to set -X options for a driver
- | - spark.executor.extraJavaOptions to set -X options for executors
- """.stripMargin
- logWarning(warning)
- for (proc <- Seq("driver", "executor")) {
- val key = s"spark.$proc.extraJavaOptions"
- if (sparkConf.contains(key)) {
- throw new SparkException(s"Found both $key and SPARK_JAVA_OPTS. Use only the former.")
- }
- }
- env("SPARK_JAVA_OPTS") = value
- }
// propagate PYSPARK_DRIVER_PYTHON and PYSPARK_PYTHON to driver in cluster mode
Seq("PYSPARK_DRIVER_PYTHON", "PYSPARK_PYTHON").foreach { envname =>
if (!env.contains(envname)) {
@@ -883,8 +847,7 @@ private[spark] class Client(
// Include driver-specific java options if we are launching a driver
if (isClusterMode) {
- val driverOpts = sparkConf.get(DRIVER_JAVA_OPTIONS).orElse(sys.env.get("SPARK_JAVA_OPTS"))
- driverOpts.foreach { opts =>
+ sparkConf.get(DRIVER_JAVA_OPTIONS).foreach { opts =>
javaOpts ++= Utils.splitCommandString(opts).map(YarnSparkHadoopUtil.escapeForShell)
}
val libraryPaths = Seq(sparkConf.get(DRIVER_LIBRARY_PATH),
http://git-wip-us.apache.org/repos/asf/spark/blob/8f0490e2/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
----------------------------------------------------------------------
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
index ee85c04..3f4d236 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
@@ -143,9 +143,6 @@ private[yarn] class ExecutorRunnable(
sparkConf.get(EXECUTOR_JAVA_OPTIONS).foreach { opts =>
javaOpts ++= Utils.splitCommandString(opts).map(YarnSparkHadoopUtil.escapeForShell)
}
- sys.env.get("SPARK_JAVA_OPTS").foreach { opts =>
- javaOpts ++= Utils.splitCommandString(opts).map(YarnSparkHadoopUtil.escapeForShell)
- }
sparkConf.get(EXECUTOR_LIBRARY_PATH).foreach { p =>
prefixEnv = Some(Client.getClusterPath(sparkConf, Utils.libraryPathEnvPrefix(Seq(p))))
}
@@ -229,11 +226,6 @@ private[yarn] class ExecutorRunnable(
YarnSparkHadoopUtil.addPathToEnvironment(env, key, value)
}
- // Keep this for backwards compatibility but users should move to the config
- sys.env.get("SPARK_YARN_USER_ENV").foreach { userEnvs =>
- YarnSparkHadoopUtil.setEnvFromInputString(env, userEnvs)
- }
-
// lookup appropriate http scheme for container log urls
val yarnHttpPolicy = conf.get(
YarnConfiguration.YARN_HTTP_POLICY_KEY,
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org