You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2017/11/23 03:47:45 UTC
spark git commit: [SPARK-22495] Fix setup of SPARK_HOME variable on
Windows
Repository: spark
Updated Branches:
refs/heads/master 1edb3175d -> b4edafa99
[SPARK-22495] Fix setup of SPARK_HOME variable on Windows
## What changes were proposed in this pull request?
Fixing the way how `SPARK_HOME` is resolved on Windows. While the previous version was working with the built release download, the set of directories changed slightly for the PySpark `pip` or `conda` install. This has been reflected in Linux files in `bin` but not for Windows `cmd` files.
First fix improves the way how the `jars` directory is found, as this was stoping Windows version of `pip/conda` install from working; JARs were not found by on Session/Context setup.
Second fix is adding `find-spark-home.cmd` script, which uses `find_spark_home.py` script, as the Linux version, to resolve `SPARK_HOME`. It is based on `find-spark-home` bash script, though, some operations are done in different order due to the `cmd` script language limitations. If environment variable is set, the Python script `find_spark_home.py` will not be run. The process can fail if Python is not installed, but it will mostly use this way if PySpark is installed via `pip/conda`, thus, there is some Python in the system.
## How was this patch tested?
Tested on local installation.
Author: Jakub Nowacki <j....@gmail.com>
Closes #19370 from jsnowacki/fix_spark_cmds.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b4edafa9
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b4edafa9
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b4edafa9
Branch: refs/heads/master
Commit: b4edafa99bd3858c166adeefdafd93dcd4bc9734
Parents: 1edb317
Author: Jakub Nowacki <j....@gmail.com>
Authored: Thu Nov 23 12:47:38 2017 +0900
Committer: hyukjinkwon <gu...@gmail.com>
Committed: Thu Nov 23 12:47:38 2017 +0900
----------------------------------------------------------------------
appveyor.yml | 1 +
bin/find-spark-home.cmd | 60 ++++++++++++++++++++++++++++++++++++++++++++
bin/pyspark2.cmd | 2 +-
bin/run-example.cmd | 4 ++-
bin/spark-class2.cmd | 2 +-
bin/spark-shell2.cmd | 4 ++-
bin/sparkR2.cmd | 2 +-
7 files changed, 70 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/b4edafa9/appveyor.yml
----------------------------------------------------------------------
diff --git a/appveyor.yml b/appveyor.yml
index dc2d81f..4874092 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -33,6 +33,7 @@ only_commits:
- core/src/main/scala/org/apache/spark/api/r/
- mllib/src/main/scala/org/apache/spark/ml/r/
- core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+ - bin/*.cmd
cache:
- C:\Users\appveyor\.m2
http://git-wip-us.apache.org/repos/asf/spark/blob/b4edafa9/bin/find-spark-home.cmd
----------------------------------------------------------------------
diff --git a/bin/find-spark-home.cmd b/bin/find-spark-home.cmd
new file mode 100644
index 0000000..c75e7ee
--- /dev/null
+++ b/bin/find-spark-home.cmd
@@ -0,0 +1,60 @@
+@echo off
+
+rem
+rem Licensed to the Apache Software Foundation (ASF) under one or more
+rem contributor license agreements. See the NOTICE file distributed with
+rem this work for additional information regarding copyright ownership.
+rem The ASF licenses this file to You under the Apache License, Version 2.0
+rem (the "License"); you may not use this file except in compliance with
+rem the License. You may obtain a copy of the License at
+rem
+rem http://www.apache.org/licenses/LICENSE-2.0
+rem
+rem Unless required by applicable law or agreed to in writing, software
+rem distributed under the License is distributed on an "AS IS" BASIS,
+rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+rem See the License for the specific language governing permissions and
+rem limitations under the License.
+rem
+
+rem Path to Python script finding SPARK_HOME
+set FIND_SPARK_HOME_PYTHON_SCRIPT=%~dp0find_spark_home.py
+
+rem Default to standard python interpreter unless told otherwise
+set PYTHON_RUNNER=python
+rem If PYSPARK_DRIVER_PYTHON is set, it overwrites the python version
+if not "x%PYSPARK_DRIVER_PYTHON%"=="x" (
+ set PYTHON_RUNNER=%PYSPARK_DRIVER_PYTHON%
+)
+rem If PYSPARK_PYTHON is set, it overwrites the python version
+if not "x%PYSPARK_PYTHON%"=="x" (
+ set PYTHON_RUNNER=%PYSPARK_PYTHON%
+)
+
+rem If there is python installed, trying to use the root dir as SPARK_HOME
+where %PYTHON_RUNNER% > nul 2>$1
+if %ERRORLEVEL% neq 0 (
+ if not exist %PYTHON_RUNNER% (
+ if "x%SPARK_HOME%"=="x" (
+ echo Missing Python executable '%PYTHON_RUNNER%', defaulting to '%~dp0..' for SPARK_HOME ^
+environment variable. Please install Python or specify the correct Python executable in ^
+PYSPARK_DRIVER_PYTHON or PYSPARK_PYTHON environment variable to detect SPARK_HOME safely.
+ set SPARK_HOME=%~dp0..
+ )
+ )
+)
+
+rem Only attempt to find SPARK_HOME if it is not set.
+if "x%SPARK_HOME%"=="x" (
+ if not exist "%FIND_SPARK_HOME_PYTHON_SCRIPT%" (
+ rem If we are not in the same directory as find_spark_home.py we are not pip installed so we don't
+ rem need to search the different Python directories for a Spark installation.
+ rem Note only that, if the user has pip installed PySpark but is directly calling pyspark-shell or
+ rem spark-submit in another directory we want to use that version of PySpark rather than the
+ rem pip installed version of PySpark.
+ set SPARK_HOME=%~dp0..
+ ) else (
+ rem We are pip installed, use the Python script to resolve a reasonable SPARK_HOME
+ for /f "delims=" %%i in ('%PYTHON_RUNNER% %FIND_SPARK_HOME_PYTHON_SCRIPT%') do set SPARK_HOME=%%i
+ )
+)
http://git-wip-us.apache.org/repos/asf/spark/blob/b4edafa9/bin/pyspark2.cmd
----------------------------------------------------------------------
diff --git a/bin/pyspark2.cmd b/bin/pyspark2.cmd
index 46d4d5c..663670f 100644
--- a/bin/pyspark2.cmd
+++ b/bin/pyspark2.cmd
@@ -18,7 +18,7 @@ rem limitations under the License.
rem
rem Figure out where the Spark framework is installed
-set SPARK_HOME=%~dp0..
+call "%~dp0find-spark-home.cmd"
call "%SPARK_HOME%\bin\load-spark-env.cmd"
set _SPARK_CMD_USAGE=Usage: bin\pyspark.cmd [options]
http://git-wip-us.apache.org/repos/asf/spark/blob/b4edafa9/bin/run-example.cmd
----------------------------------------------------------------------
diff --git a/bin/run-example.cmd b/bin/run-example.cmd
index efa5f81..cc6b234 100644
--- a/bin/run-example.cmd
+++ b/bin/run-example.cmd
@@ -17,7 +17,9 @@ rem See the License for the specific language governing permissions and
rem limitations under the License.
rem
-set SPARK_HOME=%~dp0..
+rem Figure out where the Spark framework is installed
+call "%~dp0find-spark-home.cmd"
+
set _SPARK_CMD_USAGE=Usage: ./bin/run-example [options] example-class [example args]
rem The outermost quotes are used to prevent Windows command line parse error
http://git-wip-us.apache.org/repos/asf/spark/blob/b4edafa9/bin/spark-class2.cmd
----------------------------------------------------------------------
diff --git a/bin/spark-class2.cmd b/bin/spark-class2.cmd
index a93fd2f..5da7d7a 100644
--- a/bin/spark-class2.cmd
+++ b/bin/spark-class2.cmd
@@ -18,7 +18,7 @@ rem limitations under the License.
rem
rem Figure out where the Spark framework is installed
-set SPARK_HOME=%~dp0..
+call "%~dp0find-spark-home.cmd"
call "%SPARK_HOME%\bin\load-spark-env.cmd"
http://git-wip-us.apache.org/repos/asf/spark/blob/b4edafa9/bin/spark-shell2.cmd
----------------------------------------------------------------------
diff --git a/bin/spark-shell2.cmd b/bin/spark-shell2.cmd
index 7b5d396..aaf7190 100644
--- a/bin/spark-shell2.cmd
+++ b/bin/spark-shell2.cmd
@@ -17,7 +17,9 @@ rem See the License for the specific language governing permissions and
rem limitations under the License.
rem
-set SPARK_HOME=%~dp0..
+rem Figure out where the Spark framework is installed
+call "%~dp0find-spark-home.cmd"
+
set _SPARK_CMD_USAGE=Usage: .\bin\spark-shell.cmd [options]
rem SPARK-4161: scala does not assume use of the java classpath,
http://git-wip-us.apache.org/repos/asf/spark/blob/b4edafa9/bin/sparkR2.cmd
----------------------------------------------------------------------
diff --git a/bin/sparkR2.cmd b/bin/sparkR2.cmd
index 459b780..b48bea3 100644
--- a/bin/sparkR2.cmd
+++ b/bin/sparkR2.cmd
@@ -18,7 +18,7 @@ rem limitations under the License.
rem
rem Figure out where the Spark framework is installed
-set SPARK_HOME=%~dp0..
+call "%~dp0find-spark-home.cmd"
call "%SPARK_HOME%\bin\load-spark-env.cmd"
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org