You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2023/03/20 00:26:21 UTC
[spark] branch branch-3.4 updated: [SPARK-41818][SPARK-41843][CONNECT][PYTHON][TESTS] Enable more parity tests
This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch branch-3.4
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.4 by this push:
new a0993ba76f2 [SPARK-41818][SPARK-41843][CONNECT][PYTHON][TESTS] Enable more parity tests
a0993ba76f2 is described below
commit a0993ba76f2c62558faafd57cf4b8281f5dbe451
Author: Takuya UESHIN <ue...@databricks.com>
AuthorDate: Mon Mar 20 09:25:52 2023 +0900
[SPARK-41818][SPARK-41843][CONNECT][PYTHON][TESTS] Enable more parity tests
### What changes were proposed in this pull request?
Enables more parity tests.
### Why are the changes needed?
We can enable more parity tests.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Enabled related parity tests.
Closes #40470 from ueshin/issues/SPARK-41818/parity.
Authored-by: Takuya UESHIN <ue...@databricks.com>
Signed-off-by: Hyukjin Kwon <gu...@apache.org>
(cherry picked from commit e911c5efea7d12800fdbe3cb8effbc0d5ce763f0)
Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
python/pyspark/sql/connect/catalog.py | 3 ---
python/pyspark/sql/connect/functions.py | 3 ---
.../sql/tests/connect/test_parity_pandas_udf.py | 8 --------
.../pyspark/sql/tests/connect/test_parity_udf.py | 18 ++--------------
python/pyspark/sql/tests/test_udf.py | 24 ++++++++++++++--------
5 files changed, 17 insertions(+), 39 deletions(-)
diff --git a/python/pyspark/sql/connect/catalog.py b/python/pyspark/sql/connect/catalog.py
index 261f87b4cc6..788c48e037b 100644
--- a/python/pyspark/sql/connect/catalog.py
+++ b/python/pyspark/sql/connect/catalog.py
@@ -331,9 +331,6 @@ def _test() -> None:
PySparkSession.builder.appName("sql.connect.catalog tests").remote("local[4]").getOrCreate()
)
- # TODO(SPARK-41818): java.lang.ClassNotFoundException) .DefaultSource
- del pyspark.sql.connect.catalog.Catalog.recoverPartitions.__doc__
-
(failure_count, test_count) = doctest.testmod(
pyspark.sql.connect.catalog,
globs=globs,
diff --git a/python/pyspark/sql/connect/functions.py b/python/pyspark/sql/connect/functions.py
index e8bb06a3903..f01e333c9d7 100644
--- a/python/pyspark/sql/connect/functions.py
+++ b/python/pyspark/sql/connect/functions.py
@@ -2482,9 +2482,6 @@ def _test() -> None:
# Spark Connect does not support Spark Context but the test depends on that.
del pyspark.sql.connect.functions.monotonically_increasing_id.__doc__
- # TODO(SPARK-41843): Implement SparkSession.udf
- del pyspark.sql.connect.functions.call_udf.__doc__
-
globs["spark"] = (
PySparkSession.builder.appName("sql.connect.functions tests")
.remote("local[4]")
diff --git a/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py b/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py
index 4b1ce0a9587..841ade40f5e 100644
--- a/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py
+++ b/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py
@@ -32,14 +32,6 @@ class PandasUDFParityTests(PandasUDFTestsMixin, ReusedConnectTestCase):
def test_pandas_udf_timestamp_ntz(self):
super().test_pandas_udf_timestamp_ntz()
- @unittest.skip("Spark Connect does not support spark.conf but the test depends on it.")
- def test_pandas_udf_detect_unsafe_type_conversion(self):
- super().test_pandas_udf_detect_unsafe_type_conversion()
-
- @unittest.skip("Spark Connect does not support spark.conf but the test depends on it.")
- def test_pandas_udf_arrow_overflow(self):
- super().test_pandas_udf_arrow_overflow()
-
# TODO(SPARK-42247): standardize `returnType` attribute of UDF
@unittest.skip("Fails in Spark Connect, should enable.")
def test_pandas_udf_decorator(self):
diff --git a/python/pyspark/sql/tests/connect/test_parity_udf.py b/python/pyspark/sql/tests/connect/test_parity_udf.py
index 50f0d36be5d..10d93b71ebf 100644
--- a/python/pyspark/sql/tests/connect/test_parity_udf.py
+++ b/python/pyspark/sql/tests/connect/test_parity_udf.py
@@ -44,10 +44,6 @@ class UDFParityTests(BaseUDFTestsMixin, ReusedConnectTestCase):
def test_same_accumulator_in_udfs(self):
super().test_same_accumulator_in_udfs()
- @unittest.skip("Spark Connect does not support spark.conf but the test depends on it.")
- def test_udf_with_column_vector(self):
- super().test_udf_with_column_vector()
-
@unittest.skip("Spark Connect does not support spark.conf but the test depends on it.")
def test_udf_timestamp_ntz(self):
super().test_udf_timestamp_ntz()
@@ -56,14 +52,6 @@ class UDFParityTests(BaseUDFTestsMixin, ReusedConnectTestCase):
def test_broadcast_in_udf(self):
super().test_broadcast_in_udf()
- @unittest.skip("Spark Connect does not support sql_conf but the test depends on it.")
- def test_file_dsv2_with_udf_filter(self):
- super().test_file_dsv2_with_udf_filter()
-
- @unittest.skip("Spark Connect does not support sql_conf but the test depends on it.")
- def test_udf_in_join_condition(self):
- super().test_udf_in_join_condition()
-
@unittest.skip("Spark Connect does not support cache() but the test depends on it.")
def test_udf_cache(self):
super().test_udf_cache()
@@ -76,13 +64,11 @@ class UDFParityTests(BaseUDFTestsMixin, ReusedConnectTestCase):
def test_nondeterministic_udf3(self):
super().test_nondeterministic_udf3()
- @unittest.skip("Requires JVM access.")
def test_nondeterministic_udf_in_aggregate(self):
- super().test_nondeterministic_udf_in_aggregate()
+ self.check_nondeterministic_udf_in_aggregate()
- @unittest.skip("Requires JVM access.")
def test_udf_registration_return_type_not_none(self):
- super().test_udf_registration_return_type_not_none()
+ self.check_udf_registration_return_type_not_none()
@unittest.skip("Spark Connect doesn't support RDD but the test depends on it.")
def test_worker_original_stdin_closed(self):
diff --git a/python/pyspark/sql/tests/test_udf.py b/python/pyspark/sql/tests/test_udf.py
index b766b0c0178..250386d8c6e 100644
--- a/python/pyspark/sql/tests/test_udf.py
+++ b/python/pyspark/sql/tests/test_udf.py
@@ -106,10 +106,13 @@ class BaseUDFTestsMixin(object):
def test_udf_registration_return_type_not_none(self):
with QuietTest(self.sc):
- with self.assertRaisesRegex(TypeError, "Invalid return type"):
- self.spark.catalog.registerFunction(
- "f", UserDefinedFunction(lambda x, y: len(x) + y, StringType()), StringType()
- )
+ self.check_udf_registration_return_type_not_none()
+
+ def check_udf_registration_return_type_not_none(self):
+ with self.assertRaisesRegex(TypeError, "Invalid return type"):
+ self.spark.catalog.registerFunction(
+ "f", UserDefinedFunction(lambda x, y: len(x) + y, StringType()), StringType()
+ )
def test_nondeterministic_udf(self):
# Test that nondeterministic UDFs are evaluated only once in chained UDF evaluations
@@ -154,17 +157,20 @@ class BaseUDFTestsMixin(object):
self.assertFalse(deterministic)
def test_nondeterministic_udf_in_aggregate(self):
+ with QuietTest(self.sc):
+ self.check_nondeterministic_udf_in_aggregate()
+
+ def check_nondeterministic_udf_in_aggregate(self):
from pyspark.sql.functions import sum
import random
udf_random_col = udf(lambda: int(100 * random.random()), "int").asNondeterministic()
df = self.spark.range(10)
- with QuietTest(self.sc):
- with self.assertRaisesRegex(AnalysisException, "nondeterministic"):
- df.groupby("id").agg(sum(udf_random_col())).collect()
- with self.assertRaisesRegex(AnalysisException, "nondeterministic"):
- df.agg(sum(udf_random_col())).collect()
+ with self.assertRaisesRegex(AnalysisException, "nondeterministic"):
+ df.groupby("id").agg(sum(udf_random_col())).collect()
+ with self.assertRaisesRegex(AnalysisException, "nondeterministic"):
+ df.agg(sum(udf_random_col())).collect()
def test_chained_udf(self):
self.spark.catalog.registerFunction("double", lambda x: x + x, IntegerType())
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org