You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2018/02/08 00:29:38 UTC
spark git commit: [SPARK-23300][TESTS][BRANCH-2.3] Prints out if
Pandas and PyArrow are installed or not in PySpark SQL tests
Repository: spark
Updated Branches:
refs/heads/branch-2.3 05239afc9 -> 2ba07d5b1
[SPARK-23300][TESTS][BRANCH-2.3] Prints out if Pandas and PyArrow are installed or not in PySpark SQL tests
This PR backports https://github.com/apache/spark/pull/20473 to branch-2.3.
Author: hyukjinkwon <gu...@gmail.com>
Closes #20533 from HyukjinKwon/backport-20473.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2ba07d5b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2ba07d5b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2ba07d5b
Branch: refs/heads/branch-2.3
Commit: 2ba07d5b101c44382e0db6d660da756c2f5ce627
Parents: 05239af
Author: hyukjinkwon <gu...@gmail.com>
Authored: Thu Feb 8 09:29:31 2018 +0900
Committer: hyukjinkwon <gu...@gmail.com>
Committed: Thu Feb 8 09:29:31 2018 +0900
----------------------------------------------------------------------
python/run-tests.py | 56 +++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 55 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/2ba07d5b/python/run-tests.py
----------------------------------------------------------------------
diff --git a/python/run-tests.py b/python/run-tests.py
index 1341086..3539c76 100755
--- a/python/run-tests.py
+++ b/python/run-tests.py
@@ -31,6 +31,7 @@ if sys.version < '3':
import Queue
else:
import queue as Queue
+from distutils.version import LooseVersion
# Append `SPARK_HOME/dev` to the Python path so that we can import the sparktestsupport module
@@ -39,7 +40,7 @@ sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), "../de
from sparktestsupport import SPARK_HOME # noqa (suppress pep8 warnings)
from sparktestsupport.shellutils import which, subprocess_check_output # noqa
-from sparktestsupport.modules import all_modules # noqa
+from sparktestsupport.modules import all_modules, pyspark_sql # noqa
python_modules = dict((m.name, m) for m in all_modules if m.python_test_goals if m.name != 'root')
@@ -151,6 +152,55 @@ def parse_opts():
return opts
+def _check_dependencies(python_exec, modules_to_test):
+ # If we should test 'pyspark-sql', it checks if PyArrow and Pandas are installed and
+ # explicitly prints out. See SPARK-23300.
+ if pyspark_sql in modules_to_test:
+ # TODO(HyukjinKwon): Relocate and deduplicate these version specifications.
+ minimum_pyarrow_version = '0.8.0'
+ minimum_pandas_version = '0.19.2'
+
+ try:
+ pyarrow_version = subprocess_check_output(
+ [python_exec, "-c", "import pyarrow; print(pyarrow.__version__)"],
+ universal_newlines=True,
+ stderr=open(os.devnull, 'w')).strip()
+ if LooseVersion(pyarrow_version) >= LooseVersion(minimum_pyarrow_version):
+ LOGGER.info("Will test PyArrow related features against Python executable "
+ "'%s' in '%s' module." % (python_exec, pyspark_sql.name))
+ else:
+ LOGGER.warning(
+ "Will skip PyArrow related features against Python executable "
+ "'%s' in '%s' module. PyArrow >= %s is required; however, PyArrow "
+ "%s was found." % (
+ python_exec, pyspark_sql.name, minimum_pyarrow_version, pyarrow_version))
+ except:
+ LOGGER.warning(
+ "Will skip PyArrow related features against Python executable "
+ "'%s' in '%s' module. PyArrow >= %s is required; however, PyArrow "
+ "was not found." % (python_exec, pyspark_sql.name, minimum_pyarrow_version))
+
+ try:
+ pandas_version = subprocess_check_output(
+ [python_exec, "-c", "import pandas; print(pandas.__version__)"],
+ universal_newlines=True,
+ stderr=open(os.devnull, 'w')).strip()
+ if LooseVersion(pandas_version) >= LooseVersion(minimum_pandas_version):
+ LOGGER.info("Will test Pandas related features against Python executable "
+ "'%s' in '%s' module." % (python_exec, pyspark_sql.name))
+ else:
+ LOGGER.warning(
+ "Will skip Pandas related features against Python executable "
+ "'%s' in '%s' module. Pandas >= %s is required; however, Pandas "
+ "%s was found." % (
+ python_exec, pyspark_sql.name, minimum_pandas_version, pandas_version))
+ except:
+ LOGGER.warning(
+ "Will skip Pandas related features against Python executable "
+ "'%s' in '%s' module. Pandas >= %s is required; however, Pandas "
+ "was not found." % (python_exec, pyspark_sql.name, minimum_pandas_version))
+
+
def main():
opts = parse_opts()
if (opts.verbose):
@@ -175,6 +225,10 @@ def main():
task_queue = Queue.PriorityQueue()
for python_exec in python_execs:
+ # Check if the python executable has proper dependencies installed to run tests
+ # for given modules properly.
+ _check_dependencies(python_exec, modules_to_test)
+
python_implementation = subprocess_check_output(
[python_exec, "-c", "import platform; print(platform.python_implementation())"],
universal_newlines=True).strip()
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org