You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2023/02/28 00:16:46 UTC
[spark] branch master updated: [SPARK-42510][CONNECT][PYTHON][TEST] Enable more `DataFrame.mapInPandas` parity tests
This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 611a0f6adf1 [SPARK-42510][CONNECT][PYTHON][TEST] Enable more `DataFrame.mapInPandas` parity tests
611a0f6adf1 is described below
commit 611a0f6adf17cd894557c4fa2687023f946737ac
Author: Takuya UESHIN <ue...@databricks.com>
AuthorDate: Tue Feb 28 09:16:31 2023 +0900
[SPARK-42510][CONNECT][PYTHON][TEST] Enable more `DataFrame.mapInPandas` parity tests
### What changes were proposed in this pull request?
Enables more `DataFrame.mapInPandas` parity tests.
### Why are the changes needed?
Now that we have `SparkSession.conf`, we can enable some more parity tests for `DataFrame.mapInPandas`
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Enabled related tests.
Closes #40201 from ueshin/issues/SPARK-42510/tests.
Authored-by: Takuya UESHIN <ue...@databricks.com>
Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
.../sql/tests/connect/test_parity_pandas_map.py | 17 ++-------
python/pyspark/sql/tests/pandas/test_pandas_map.py | 41 ++++++++++------------
2 files changed, 21 insertions(+), 37 deletions(-)
diff --git a/python/pyspark/sql/tests/connect/test_parity_pandas_map.py b/python/pyspark/sql/tests/connect/test_parity_pandas_map.py
index b8402c564f1..539fd98266b 100644
--- a/python/pyspark/sql/tests/connect/test_parity_pandas_map.py
+++ b/python/pyspark/sql/tests/connect/test_parity_pandas_map.py
@@ -14,31 +14,20 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
-import unittest
-
from pyspark.sql.tests.pandas.test_pandas_map import MapInPandasTestsMixin
from pyspark.testing.connectutils import ReusedConnectTestCase
class MapInPandasParityTests(MapInPandasTestsMixin, ReusedConnectTestCase):
- @unittest.skip(
- "Spark Connect does not support sc._jvm.org.apache.log4j but the test depends on it."
- )
def test_empty_dataframes_with_less_columns(self):
- super().test_empty_dataframes_with_less_columns()
+ self.check_empty_dataframes_with_less_columns()
- @unittest.skip(
- "Spark Connect does not support sc._jvm.org.apache.log4j but the test depends on it."
- )
def test_other_than_dataframe(self):
- super().test_other_than_dataframe()
-
- @unittest.skip("Spark Connect does not support spark.conf but the test depends on it.")
- def test_map_in_pandas_with_column_vector(self):
- super().test_map_in_pandas_with_column_vector()
+ self.check_other_than_dataframe()
if __name__ == "__main__":
+ import unittest
from pyspark.sql.tests.connect.test_parity_pandas_map import * # noqa: F401
try:
diff --git a/python/pyspark/sql/tests/pandas/test_pandas_map.py b/python/pyspark/sql/tests/pandas/test_pandas_map.py
index e39b97613cf..2f6f3f0df57 100644
--- a/python/pyspark/sql/tests/pandas/test_pandas_map.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_map.py
@@ -78,20 +78,19 @@ class MapInPandasTestsMixin:
self.assertEqual(set((r.a for r in actual)), set(range(100)))
def test_other_than_dataframe(self):
+ with QuietTest(self.sc):
+ self.check_other_than_dataframe()
+
+ def check_other_than_dataframe(self):
def bad_iter(_):
return iter([1])
- with QuietTest(self.sc):
- with self.assertRaisesRegex(
- PythonException,
- "Return type of the user-defined function should be Pandas.DataFrame, "
- "but is <class 'int'>",
- ):
- (
- self.spark.range(10, numPartitions=3)
- .mapInPandas(bad_iter, "a int, b string")
- .count()
- )
+ with self.assertRaisesRegex(
+ PythonException,
+ "Return type of the user-defined function should be Pandas.DataFrame, "
+ "but is <class 'int'>",
+ ):
+ self.spark.range(10, numPartitions=3).mapInPandas(bad_iter, "a int, b string").count()
def test_empty_iterator(self):
def empty_iter(_):
@@ -122,24 +121,20 @@ class MapInPandasTestsMixin:
self.assertEqual(mapped.count(), 10)
def test_empty_dataframes_with_less_columns(self):
+ with QuietTest(self.sc):
+ self.check_empty_dataframes_with_less_columns()
+
+ def check_empty_dataframes_with_less_columns(self):
def empty_dataframes_with_less_columns(iterator):
for pdf in iterator:
yield pdf
# after yielding all elements of the iterator, also yield a dataframe with less columns
yield pd.DataFrame([(1,)], columns=["id"])
- with QuietTest(self.sc):
- with self.assertRaisesRegex(
- PythonException,
- "KeyError: 'value'",
- ):
- (
- self.spark.range(10, numPartitions=3)
- .withColumn("value", lit(0))
- .toDF("id", "value")
- .mapInPandas(empty_dataframes_with_less_columns, "id int, value int")
- .collect()
- )
+ with self.assertRaisesRegex(PythonException, "KeyError: 'value'"):
+ self.spark.range(10, numPartitions=3).withColumn("value", lit(0)).toDF(
+ "id", "value"
+ ).mapInPandas(empty_dataframes_with_less_columns, "id int, value int").collect()
def test_chain_map_partitions_in_pandas(self):
def func(iterator):
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org