You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2023/02/28 00:16:46 UTC

[spark] branch master updated: [SPARK-42510][CONNECT][PYTHON][TEST] Enable more `DataFrame.mapInPandas` parity tests

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 611a0f6adf1 [SPARK-42510][CONNECT][PYTHON][TEST] Enable more `DataFrame.mapInPandas` parity tests
611a0f6adf1 is described below

commit 611a0f6adf17cd894557c4fa2687023f946737ac
Author: Takuya UESHIN <ue...@databricks.com>
AuthorDate: Tue Feb 28 09:16:31 2023 +0900

    [SPARK-42510][CONNECT][PYTHON][TEST] Enable more `DataFrame.mapInPandas` parity tests
    
    ### What changes were proposed in this pull request?
    
    Enables more `DataFrame.mapInPandas` parity tests.
    
    ### Why are the changes needed?
    
    Now that we have `SparkSession.conf`, we can enable some more parity tests for `DataFrame.mapInPandas`
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Enabled related tests.
    
    Closes #40201 from ueshin/issues/SPARK-42510/tests.
    
    Authored-by: Takuya UESHIN <ue...@databricks.com>
    Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
 .../sql/tests/connect/test_parity_pandas_map.py    | 17 ++-------
 python/pyspark/sql/tests/pandas/test_pandas_map.py | 41 ++++++++++------------
 2 files changed, 21 insertions(+), 37 deletions(-)

diff --git a/python/pyspark/sql/tests/connect/test_parity_pandas_map.py b/python/pyspark/sql/tests/connect/test_parity_pandas_map.py
index b8402c564f1..539fd98266b 100644
--- a/python/pyspark/sql/tests/connect/test_parity_pandas_map.py
+++ b/python/pyspark/sql/tests/connect/test_parity_pandas_map.py
@@ -14,31 +14,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-import unittest
-
 from pyspark.sql.tests.pandas.test_pandas_map import MapInPandasTestsMixin
 from pyspark.testing.connectutils import ReusedConnectTestCase
 
 
 class MapInPandasParityTests(MapInPandasTestsMixin, ReusedConnectTestCase):
-    @unittest.skip(
-        "Spark Connect does not support sc._jvm.org.apache.log4j but the test depends on it."
-    )
     def test_empty_dataframes_with_less_columns(self):
-        super().test_empty_dataframes_with_less_columns()
+        self.check_empty_dataframes_with_less_columns()
 
-    @unittest.skip(
-        "Spark Connect does not support sc._jvm.org.apache.log4j but the test depends on it."
-    )
     def test_other_than_dataframe(self):
-        super().test_other_than_dataframe()
-
-    @unittest.skip("Spark Connect does not support spark.conf but the test depends on it.")
-    def test_map_in_pandas_with_column_vector(self):
-        super().test_map_in_pandas_with_column_vector()
+        self.check_other_than_dataframe()
 
 
 if __name__ == "__main__":
+    import unittest
     from pyspark.sql.tests.connect.test_parity_pandas_map import *  # noqa: F401
 
     try:
diff --git a/python/pyspark/sql/tests/pandas/test_pandas_map.py b/python/pyspark/sql/tests/pandas/test_pandas_map.py
index e39b97613cf..2f6f3f0df57 100644
--- a/python/pyspark/sql/tests/pandas/test_pandas_map.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_map.py
@@ -78,20 +78,19 @@ class MapInPandasTestsMixin:
         self.assertEqual(set((r.a for r in actual)), set(range(100)))
 
     def test_other_than_dataframe(self):
+        with QuietTest(self.sc):
+            self.check_other_than_dataframe()
+
+    def check_other_than_dataframe(self):
         def bad_iter(_):
             return iter([1])
 
-        with QuietTest(self.sc):
-            with self.assertRaisesRegex(
-                PythonException,
-                "Return type of the user-defined function should be Pandas.DataFrame, "
-                "but is <class 'int'>",
-            ):
-                (
-                    self.spark.range(10, numPartitions=3)
-                    .mapInPandas(bad_iter, "a int, b string")
-                    .count()
-                )
+        with self.assertRaisesRegex(
+            PythonException,
+            "Return type of the user-defined function should be Pandas.DataFrame, "
+            "but is <class 'int'>",
+        ):
+            self.spark.range(10, numPartitions=3).mapInPandas(bad_iter, "a int, b string").count()
 
     def test_empty_iterator(self):
         def empty_iter(_):
@@ -122,24 +121,20 @@ class MapInPandasTestsMixin:
         self.assertEqual(mapped.count(), 10)
 
     def test_empty_dataframes_with_less_columns(self):
+        with QuietTest(self.sc):
+            self.check_empty_dataframes_with_less_columns()
+
+    def check_empty_dataframes_with_less_columns(self):
         def empty_dataframes_with_less_columns(iterator):
             for pdf in iterator:
                 yield pdf
             # after yielding all elements of the iterator, also yield a dataframe with less columns
             yield pd.DataFrame([(1,)], columns=["id"])
 
-        with QuietTest(self.sc):
-            with self.assertRaisesRegex(
-                PythonException,
-                "KeyError: 'value'",
-            ):
-                (
-                    self.spark.range(10, numPartitions=3)
-                    .withColumn("value", lit(0))
-                    .toDF("id", "value")
-                    .mapInPandas(empty_dataframes_with_less_columns, "id int, value int")
-                    .collect()
-                )
+        with self.assertRaisesRegex(PythonException, "KeyError: 'value'"):
+            self.spark.range(10, numPartitions=3).withColumn("value", lit(0)).toDF(
+                "id", "value"
+            ).mapInPandas(empty_dataframes_with_less_columns, "id int, value int").collect()
 
     def test_chain_map_partitions_in_pandas(self):
         def func(iterator):


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org