You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2020/11/09 05:28:04 UTC

[spark] branch branch-3.0 updated: [SPARK-33371][PYTHON][3.0] Update setup.py and tests for Python 3.9

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 808dd8f  [SPARK-33371][PYTHON][3.0] Update setup.py and tests for Python 3.9
808dd8f is described below

commit 808dd8fe40564e8f4f25c9faf1897a9efbb2cbe1
Author: HyukjinKwon <gu...@apache.org>
AuthorDate: Sun Nov 8 21:23:46 2020 -0800

    [SPARK-33371][PYTHON][3.0] Update setup.py and tests for Python 3.9
    
    ### What changes were proposed in this pull request?
    
    This PR is a backport of https://github.com/apache/spark/pull/30277
    
    This PR proposes to fix PySpark to officially support Python 3.9. The main codes already work. We should just note that we support Python 3.9.
    
    Also, this PR fixes some minor fixes into the test codes.
    - `Thread.isAlive` is removed in Python 3.9, and `Thread.is_alive` exists in Python 3.6+, see https://docs.python.org/3/whatsnew/3.9.html#removed
    - Fixed `TaskContextTestsWithWorkerReuse.test_barrier_with_python_worker_reuse` and `TaskContextTests.test_barrier` to be less flaky. This becomes more flaky in Python 3.9 for some reasons.
    
    NOTE that PyArrow does not support Python 3.9 yet.
    
    ### Why are the changes needed?
    
    To officially support Python 3.9.
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes, it officially supports Python 3.9.
    
    ### How was this patch tested?
    
    Manually ran the tests:
    
    ```
    $  ./run-tests --python-executable=python
    Running PySpark tests. Output is in /.../spark/python/unit-tests.log
    Will test against the following Python executables: ['python']
    Will test the following Python modules: ['pyspark-core', 'pyspark-ml', 'pyspark-mllib', 'pyspark-resource', 'pyspark-sql', 'pyspark-streaming']
    python python_implementation is CPython
    python version is: Python 3.9.0
    Starting test(python): pyspark.ml.tests.test_base
    Starting test(python): pyspark.ml.tests.test_evaluation
    Starting test(python): pyspark.ml.tests.test_algorithms
    Starting test(python): pyspark.ml.tests.test_feature
    Finished test(python): pyspark.ml.tests.test_base (12s)
    Starting test(python): pyspark.ml.tests.test_image
    Finished test(python): pyspark.ml.tests.test_evaluation (15s)
    Starting test(python): pyspark.ml.tests.test_linalg
    Finished test(python): pyspark.ml.tests.test_feature (25s)
    Starting test(python): pyspark.ml.tests.test_param
    Finished test(python): pyspark.ml.tests.test_image (17s)
    Starting test(python): pyspark.ml.tests.test_persistence
    Finished test(python): pyspark.ml.tests.test_param (17s)
    Starting test(python): pyspark.ml.tests.test_pipeline
    Finished test(python): pyspark.ml.tests.test_linalg (30s)
    Starting test(python): pyspark.ml.tests.test_stat
    Finished test(python): pyspark.ml.tests.test_pipeline (6s)
    Starting test(python): pyspark.ml.tests.test_training_summary
    Finished test(python): pyspark.ml.tests.test_stat (12s)
    Starting test(python): pyspark.ml.tests.test_tuning
    Finished test(python): pyspark.ml.tests.test_algorithms (68s)
    Starting test(python): pyspark.ml.tests.test_wrapper
    Finished test(python): pyspark.ml.tests.test_persistence (51s)
    Starting test(python): pyspark.mllib.tests.test_algorithms
    Finished test(python): pyspark.ml.tests.test_training_summary (33s)
    Starting test(python): pyspark.mllib.tests.test_feature
    Finished test(python): pyspark.ml.tests.test_wrapper (19s)
    Starting test(python): pyspark.mllib.tests.test_linalg
    Finished test(python): pyspark.mllib.tests.test_feature (26s)
    Starting test(python): pyspark.mllib.tests.test_stat
    Finished test(python): pyspark.mllib.tests.test_stat (22s)
    Starting test(python): pyspark.mllib.tests.test_streaming_algorithms
    Finished test(python): pyspark.mllib.tests.test_algorithms (53s)
    Starting test(python): pyspark.mllib.tests.test_util
    Finished test(python): pyspark.mllib.tests.test_linalg (54s)
    Starting test(python): pyspark.sql.tests.test_arrow
    Finished test(python): pyspark.sql.tests.test_arrow (0s) ... 61 tests were skipped
    Starting test(python): pyspark.sql.tests.test_catalog
    Finished test(python): pyspark.mllib.tests.test_util (11s)
    Starting test(python): pyspark.sql.tests.test_column
    Finished test(python): pyspark.sql.tests.test_catalog (16s)
    Starting test(python): pyspark.sql.tests.test_conf
    Finished test(python): pyspark.sql.tests.test_column (17s)
    Starting test(python): pyspark.sql.tests.test_context
    Finished test(python): pyspark.sql.tests.test_context (6s) ... 3 tests were skipped
    Starting test(python): pyspark.sql.tests.test_dataframe
    Finished test(python): pyspark.sql.tests.test_conf (11s)
    Starting test(python): pyspark.sql.tests.test_datasources
    Finished test(python): pyspark.sql.tests.test_datasources (19s)
    Starting test(python): pyspark.sql.tests.test_functions
    Finished test(python): pyspark.sql.tests.test_dataframe (35s) ... 3 tests were skipped
    Starting test(python): pyspark.sql.tests.test_group
    Finished test(python): pyspark.sql.tests.test_functions (32s)
    Starting test(python): pyspark.sql.tests.test_pandas_cogrouped_map
    Finished test(python): pyspark.sql.tests.test_pandas_cogrouped_map (1s) ... 15 tests were skipped
    Starting test(python): pyspark.sql.tests.test_pandas_grouped_map
    Finished test(python): pyspark.sql.tests.test_group (19s)
    Starting test(python): pyspark.sql.tests.test_pandas_map
    Finished test(python): pyspark.sql.tests.test_pandas_grouped_map (0s) ... 21 tests were skipped
    Starting test(python): pyspark.sql.tests.test_pandas_udf
    Finished test(python): pyspark.sql.tests.test_pandas_map (0s) ... 6 tests were skipped
    Starting test(python): pyspark.sql.tests.test_pandas_udf_grouped_agg
    Finished test(python): pyspark.sql.tests.test_pandas_udf (0s) ... 6 tests were skipped
    Starting test(python): pyspark.sql.tests.test_pandas_udf_scalar
    Finished test(python): pyspark.sql.tests.test_pandas_udf_grouped_agg (0s) ... 13 tests were skipped
    Starting test(python): pyspark.sql.tests.test_pandas_udf_typehints
    Finished test(python): pyspark.sql.tests.test_pandas_udf_scalar (0s) ... 50 tests were skipped
    Starting test(python): pyspark.sql.tests.test_pandas_udf_window
    Finished test(python): pyspark.sql.tests.test_pandas_udf_typehints (0s) ... 10 tests were skipped
    Starting test(python): pyspark.sql.tests.test_readwriter
    Finished test(python): pyspark.sql.tests.test_pandas_udf_window (0s) ... 14 tests were skipped
    Starting test(python): pyspark.sql.tests.test_serde
    Finished test(python): pyspark.sql.tests.test_serde (19s)
    Starting test(python): pyspark.sql.tests.test_session
    Finished test(python): pyspark.mllib.tests.test_streaming_algorithms (120s)
    Starting test(python): pyspark.sql.tests.test_streaming
    Finished test(python): pyspark.sql.tests.test_readwriter (25s)
    Starting test(python): pyspark.sql.tests.test_types
    Finished test(python): pyspark.ml.tests.test_tuning (208s)
    Starting test(python): pyspark.sql.tests.test_udf
    Finished test(python): pyspark.sql.tests.test_session (31s)
    Starting test(python): pyspark.sql.tests.test_utils
    Finished test(python): pyspark.sql.tests.test_streaming (35s)
    Starting test(python): pyspark.streaming.tests.test_context
    Finished test(python): pyspark.sql.tests.test_types (34s)
    Starting test(python): pyspark.streaming.tests.test_dstream
    Finished test(python): pyspark.sql.tests.test_utils (14s)
    Starting test(python): pyspark.streaming.tests.test_kinesis
    Finished test(python): pyspark.streaming.tests.test_kinesis (0s) ... 2 tests were skipped
    Starting test(python): pyspark.streaming.tests.test_listener
    Finished test(python): pyspark.streaming.tests.test_listener (11s)
    Starting test(python): pyspark.tests.test_appsubmit
    Finished test(python): pyspark.sql.tests.test_udf (39s)
    Starting test(python): pyspark.tests.test_broadcast
    Finished test(python): pyspark.streaming.tests.test_context (23s)
    Starting test(python): pyspark.tests.test_conf
    Finished test(python): pyspark.tests.test_conf (15s)
    Starting test(python): pyspark.tests.test_context
    Finished test(python): pyspark.tests.test_broadcast (33s)
    Starting test(python): pyspark.tests.test_daemon
    Finished test(python): pyspark.tests.test_daemon (5s)
    Starting test(python): pyspark.tests.test_install_spark
    Finished test(python): pyspark.tests.test_context (44s)
    Starting test(python): pyspark.tests.test_join
    Finished test(python): pyspark.tests.test_appsubmit (68s)
    Starting test(python): pyspark.tests.test_profiler
    Finished test(python): pyspark.tests.test_join (7s)
    Starting test(python): pyspark.tests.test_rdd
    Finished test(python): pyspark.tests.test_profiler (9s)
    Starting test(python): pyspark.tests.test_rddbarrier
    Finished test(python): pyspark.tests.test_rddbarrier (7s)
    Starting test(python): pyspark.tests.test_readwrite
    Finished test(python): pyspark.streaming.tests.test_dstream (107s)
    Starting test(python): pyspark.tests.test_serializers
    Finished test(python): pyspark.tests.test_serializers (8s)
    Starting test(python): pyspark.tests.test_shuffle
    Finished test(python): pyspark.tests.test_readwrite (14s)
    Starting test(python): pyspark.tests.test_taskcontext
    Finished test(python): pyspark.tests.test_install_spark (65s)
    Starting test(python): pyspark.tests.test_util
    Finished test(python): pyspark.tests.test_shuffle (8s)
    Starting test(python): pyspark.tests.test_worker
    Finished test(python): pyspark.tests.test_util (5s)
    Starting test(python): pyspark.accumulators
    Finished test(python): pyspark.accumulators (5s)
    Starting test(python): pyspark.broadcast
    Finished test(python): pyspark.broadcast (6s)
    Starting test(python): pyspark.conf
    Finished test(python): pyspark.tests.test_worker (14s)
    Starting test(python): pyspark.context
    Finished test(python): pyspark.conf (4s)
    Starting test(python): pyspark.ml.classification
    Finished test(python): pyspark.tests.test_rdd (60s)
    Starting test(python): pyspark.ml.clustering
    Finished test(python): pyspark.context (21s)
    Starting test(python): pyspark.ml.evaluation
    Finished test(python): pyspark.tests.test_taskcontext (69s)
    Starting test(python): pyspark.ml.feature
    Finished test(python): pyspark.ml.evaluation (26s)
    Starting test(python): pyspark.ml.fpm
    Finished test(python): pyspark.ml.clustering (45s)
    Starting test(python): pyspark.ml.functions
    Finished test(python): pyspark.ml.fpm (24s)
    Starting test(python): pyspark.ml.image
    Finished test(python): pyspark.ml.functions (17s)
    Starting test(python): pyspark.ml.linalg.__init__
    Finished test(python): pyspark.ml.linalg.__init__ (0s)
    Starting test(python): pyspark.ml.recommendation
    Finished test(python): pyspark.ml.classification (74s)
    Starting test(python): pyspark.ml.regression
    Finished test(python): pyspark.ml.image (8s)
    Starting test(python): pyspark.ml.stat
    Finished test(python): pyspark.ml.stat (29s)
    Starting test(python): pyspark.ml.tuning
    Finished test(python): pyspark.ml.regression (53s)
    Starting test(python): pyspark.mllib.classification
    Finished test(python): pyspark.ml.tuning (35s)
    Starting test(python): pyspark.mllib.clustering
    Finished test(python): pyspark.ml.feature (103s)
    Starting test(python): pyspark.mllib.evaluation
    Finished test(python): pyspark.mllib.classification (33s)
    Starting test(python): pyspark.mllib.feature
    Finished test(python): pyspark.mllib.evaluation (21s)
    Starting test(python): pyspark.mllib.fpm
    Finished test(python): pyspark.ml.recommendation (103s)
    Starting test(python): pyspark.mllib.linalg.__init__
    Finished test(python): pyspark.mllib.linalg.__init__ (1s)
    Starting test(python): pyspark.mllib.linalg.distributed
    Finished test(python): pyspark.mllib.feature (26s)
    Starting test(python): pyspark.mllib.random
    Finished test(python): pyspark.mllib.fpm (23s)
    Starting test(python): pyspark.mllib.recommendation
    Finished test(python): pyspark.mllib.clustering (50s)
    Starting test(python): pyspark.mllib.regression
    Finished test(python): pyspark.mllib.random (13s)
    Starting test(python): pyspark.mllib.stat.KernelDensity
    Finished test(python): pyspark.mllib.stat.KernelDensity (1s)
    Starting test(python): pyspark.mllib.stat._statistics
    Finished test(python): pyspark.mllib.linalg.distributed (42s)
    Starting test(python): pyspark.mllib.tree
    Finished test(python): pyspark.mllib.stat._statistics (19s)
    Starting test(python): pyspark.mllib.util
    Finished test(python): pyspark.mllib.regression (33s)
    Starting test(python): pyspark.profiler
    Finished test(python): pyspark.mllib.recommendation (36s)
    Starting test(python): pyspark.rdd
    Finished test(python): pyspark.profiler (9s)
    Starting test(python): pyspark.resource.tests.test_resources
    Finished test(python): pyspark.mllib.tree (19s)
    Starting test(python): pyspark.serializers
    Finished test(python): pyspark.mllib.util (21s)
    Starting test(python): pyspark.shuffle
    Finished test(python): pyspark.resource.tests.test_resources (9s)
    Starting test(python): pyspark.sql.avro.functions
    Finished test(python): pyspark.shuffle (1s)
    Starting test(python): pyspark.sql.catalog
    Finished test(python): pyspark.rdd (22s)
    Starting test(python): pyspark.sql.column
    Finished test(python): pyspark.serializers (12s)
    Starting test(python): pyspark.sql.conf
    Finished test(python): pyspark.sql.conf (6s)
    Starting test(python): pyspark.sql.context
    Finished test(python): pyspark.sql.catalog (14s)
    Starting test(python): pyspark.sql.dataframe
    Finished test(python): pyspark.sql.avro.functions (15s)
    Starting test(python): pyspark.sql.functions
    Finished test(python): pyspark.sql.column (24s)
    Starting test(python): pyspark.sql.group
    Finished test(python): pyspark.sql.context (20s)
    Starting test(python): pyspark.sql.pandas.conversion
    Finished test(python): pyspark.sql.pandas.conversion (13s)
    Starting test(python): pyspark.sql.pandas.group_ops
    Finished test(python): pyspark.sql.group (36s)
    Starting test(python): pyspark.sql.pandas.map_ops
    Finished test(python): pyspark.sql.pandas.group_ops (21s)
    Starting test(python): pyspark.sql.pandas.serializers
    Finished test(python): pyspark.sql.pandas.serializers (0s)
    Starting test(python): pyspark.sql.pandas.typehints
    Finished test(python): pyspark.sql.pandas.typehints (0s)
    Starting test(python): pyspark.sql.pandas.types
    Finished test(python): pyspark.sql.pandas.types (0s)
    Starting test(python): pyspark.sql.pandas.utils
    Finished test(python): pyspark.sql.pandas.utils (0s)
    Starting test(python): pyspark.sql.readwriter
    Finished test(python): pyspark.sql.dataframe (56s)
    Starting test(python): pyspark.sql.session
    Finished test(python): pyspark.sql.functions (57s)
    Starting test(python): pyspark.sql.streaming
    Finished test(python): pyspark.sql.pandas.map_ops (12s)
    Starting test(python): pyspark.sql.types
    Finished test(python): pyspark.sql.types (10s)
    Starting test(python): pyspark.sql.udf
    Finished test(python): pyspark.sql.streaming (16s)
    Starting test(python): pyspark.sql.window
    Finished test(python): pyspark.sql.session (19s)
    Starting test(python): pyspark.streaming.util
    Finished test(python): pyspark.streaming.util (0s)
    Starting test(python): pyspark.util
    Finished test(python): pyspark.util (0s)
    Finished test(python): pyspark.sql.readwriter (24s)
    Finished test(python): pyspark.sql.udf (13s)
    Finished test(python): pyspark.sql.window (14s)
    Tests passed in 780 seconds
    
    ```
    
    Closes #30288 from HyukjinKwon/SPARK-33371-3.0.
    
    Authored-by: HyukjinKwon <gu...@apache.org>
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
 python/pyspark/tests/test_taskcontext.py | 8 ++++----
 python/pyspark/tests/test_worker.py      | 2 +-
 python/setup.py                          | 1 +
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/python/pyspark/tests/test_taskcontext.py b/python/pyspark/tests/test_taskcontext.py
index 57c880a..b2fa991 100644
--- a/python/pyspark/tests/test_taskcontext.py
+++ b/python/pyspark/tests/test_taskcontext.py
@@ -127,12 +127,12 @@ class TaskContextTests(PySparkTestCase):
 
         def context_barrier(x):
             tc = BarrierTaskContext.get()
-            time.sleep(random.randint(1, 10))
+            time.sleep(random.randint(1, 5) * 2)
             tc.barrier()
             return time.time()
 
         times = rdd.barrier().mapPartitions(f).map(context_barrier).collect()
-        self.assertTrue(max(times) - min(times) < 1)
+        self.assertTrue(max(times) - min(times) < 2)
 
     def test_all_gather(self):
         """
@@ -235,7 +235,7 @@ class TaskContextTestsWithWorkerReuse(unittest.TestCase):
 
         def context_barrier(x):
             tc = BarrierTaskContext.get()
-            time.sleep(random.randint(1, 10))
+            time.sleep(random.randint(1, 5) * 2)
             tc.barrier()
             return (time.time(), os.getpid())
 
@@ -243,7 +243,7 @@ class TaskContextTestsWithWorkerReuse(unittest.TestCase):
         times = list(map(lambda x: x[0], result))
         pids = list(map(lambda x: x[1], result))
         # check both barrier and worker reuse effect
-        self.assertTrue(max(times) - min(times) < 1)
+        self.assertTrue(max(times) - min(times) < 2)
         for pid in pids:
             self.assertTrue(pid in worker_pids)
 
diff --git a/python/pyspark/tests/test_worker.py b/python/pyspark/tests/test_worker.py
index 9d7deb2..bfcbc43 100644
--- a/python/pyspark/tests/test_worker.py
+++ b/python/pyspark/tests/test_worker.py
@@ -137,7 +137,7 @@ class WorkerTests(ReusedPySparkTestCase):
         t.daemon = True
         t.start()
         t.join(5)
-        self.assertTrue(not t.isAlive())
+        self.assertTrue(not t.is_alive())
         self.assertEqual(100000, rdd.count())
 
     def test_with_different_versions_of_python(self):
diff --git a/python/setup.py b/python/setup.py
index a1cde31..78b5a69 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -226,6 +226,7 @@ try:
             'Programming Language :: Python :: 3.6',
             'Programming Language :: Python :: 3.7',
             'Programming Language :: Python :: 3.8',
+            'Programming Language :: Python :: 3.9',
             'Programming Language :: Python :: Implementation :: CPython',
             'Programming Language :: Python :: Implementation :: PyPy']
     )


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org