You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2022/01/23 02:33:41 UTC
[spark] branch master updated: [SPARK-37886][PYTHON][TESTS] Use ComparisonTestBase as base class in OpsTestBase
This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 1ff40d6 [SPARK-37886][PYTHON][TESTS] Use ComparisonTestBase as base class in OpsTestBase
1ff40d6 is described below
commit 1ff40d61cee754d3ba60ee45f839dba76a9955d3
Author: Yikun Jiang <yi...@gmail.com>
AuthorDate: Sun Jan 23 11:32:41 2022 +0900
[SPARK-37886][PYTHON][TESTS] Use ComparisonTestBase as base class in OpsTestBase
### What changes were proposed in this pull request?
- Rename TestCasesUtils to OpsTestBase
- Make OpsTestCase inherited from `ComparisonTestBase`(`PandasOnSparkTestCase` with `pdf` and `psdf`)
- Make `*OpsTest` inherited from `OpsTestBase`
### Why are the changes needed?
All data type ops related tests case are using `PandasOnSparkTestCase, TestCasesUtils` as basic classes, we'd better just let `TestCasesUtils` inherited from `PandasOnSparkTestCase` instead of multiple inheritance.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
UT
Closes #35203 from Yikun/opstest_refactor.
Authored-by: Yikun Jiang <yi...@gmail.com>
Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py | 5 ++---
python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py | 7 +++----
.../pyspark/pandas/tests/data_type_ops/test_categorical_ops.py | 5 ++---
python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py | 5 ++---
python/pyspark/pandas/tests/data_type_ops/test_date_ops.py | 5 ++---
python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py | 5 ++---
python/pyspark/pandas/tests/data_type_ops/test_null_ops.py | 5 ++---
python/pyspark/pandas/tests/data_type_ops/test_num_ops.py | 9 ++++-----
python/pyspark/pandas/tests/data_type_ops/test_string_ops.py | 7 +++----
.../pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py | 5 ++---
python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py | 5 ++---
python/pyspark/pandas/tests/data_type_ops/testing_utils.py | 10 ++++------
12 files changed, 30 insertions(+), 43 deletions(-)
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py
index 5dc7f80..35fcb37 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py
@@ -19,11 +19,10 @@ import pandas as pd
from pandas.api.types import CategoricalDtype
from pyspark import pandas as ps
-from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
-from pyspark.testing.pandasutils import PandasOnSparkTestCase
+from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase
-class BinaryOpsTest(PandasOnSparkTestCase, TestCasesUtils):
+class BinaryOpsTest(OpsTestBase):
@property
def pser(self):
return pd.Series([b"1", b"2", b"3"])
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py
index b83b610..02bb048 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py
@@ -25,15 +25,14 @@ from pandas.api.types import CategoricalDtype
from pyspark import pandas as ps
from pyspark.pandas import option_context
-from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
+from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase
from pyspark.pandas.typedef.typehints import (
extension_float_dtypes_available,
extension_object_dtypes_available,
)
-from pyspark.testing.pandasutils import PandasOnSparkTestCase
-class BooleanOpsTest(PandasOnSparkTestCase, TestCasesUtils):
+class BooleanOpsTest(OpsTestBase):
@property
def bool_pdf(self):
return pd.DataFrame({"this": [True, False, True], "that": [False, True, True]})
@@ -381,7 +380,7 @@ class BooleanOpsTest(PandasOnSparkTestCase, TestCasesUtils):
@unittest.skipIf(
not extension_object_dtypes_available, "pandas extension object dtypes are not available"
)
-class BooleanExtensionOpsTest(PandasOnSparkTestCase, TestCasesUtils):
+class BooleanExtensionOpsTest(OpsTestBase):
@property
def boolean_pdf(self):
return pd.DataFrame(
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py
index e07af72..b84c35b 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py
@@ -23,11 +23,10 @@ from pandas.api.types import CategoricalDtype
from pyspark import pandas as ps
from pyspark.pandas.config import option_context
-from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
-from pyspark.testing.pandasutils import PandasOnSparkTestCase
+from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase
-class CategoricalOpsTest(PandasOnSparkTestCase, TestCasesUtils):
+class CategoricalOpsTest(OpsTestBase):
@property
def pdf(self):
return pd.DataFrame(
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py
index 91a92ba..cc9a0bf 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py
@@ -21,11 +21,10 @@ import datetime
import pandas as pd
from pyspark import pandas as ps
-from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
-from pyspark.testing.pandasutils import PandasOnSparkTestCase
+from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase
-class ComplexOpsTest(PandasOnSparkTestCase, TestCasesUtils):
+class ComplexOpsTest(OpsTestBase):
@property
def pser(self):
return pd.Series([[1, 2, 3]])
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py
index 8c196d2..f0585c3 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py
@@ -21,11 +21,10 @@ import pandas as pd
from pandas.api.types import CategoricalDtype
from pyspark import pandas as ps
-from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
-from pyspark.testing.pandasutils import PandasOnSparkTestCase
+from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase
-class DateOpsTest(PandasOnSparkTestCase, TestCasesUtils):
+class DateOpsTest(OpsTestBase):
@property
def pser(self):
return pd.Series(
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py
index 5eba485..f29f9d3 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py
@@ -21,11 +21,10 @@ import pandas as pd
from pandas.api.types import CategoricalDtype
from pyspark import pandas as ps
-from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
-from pyspark.testing.pandasutils import PandasOnSparkTestCase
+from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase
-class DatetimeOpsTest(PandasOnSparkTestCase, TestCasesUtils):
+class DatetimeOpsTest(OpsTestBase):
@property
def pser(self):
return pd.Series(pd.date_range("1994-1-31 10:30:15", periods=3, freq="D"))
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py
index c2b6be2..009d4d0 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py
@@ -19,11 +19,10 @@ import pandas as pd
from pandas.api.types import CategoricalDtype
import pyspark.pandas as ps
-from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
-from pyspark.testing.pandasutils import PandasOnSparkTestCase
+from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase
-class NullOpsTest(PandasOnSparkTestCase, TestCasesUtils):
+class NullOpsTest(OpsTestBase):
@property
def pser(self):
return pd.Series([None, None, None])
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py
index 785eb25..0c2c94e 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py
@@ -25,17 +25,16 @@ from pandas.api.types import CategoricalDtype
from pyspark import pandas as ps
from pyspark.pandas.config import option_context
-from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
+from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase
from pyspark.pandas.typedef.typehints import (
extension_dtypes_available,
extension_float_dtypes_available,
extension_object_dtypes_available,
)
from pyspark.sql.types import DecimalType, IntegralType
-from pyspark.testing.pandasutils import PandasOnSparkTestCase
-class NumOpsTest(PandasOnSparkTestCase, TestCasesUtils):
+class NumOpsTest(OpsTestBase):
"""Unit tests for arithmetic operations of numeric data types.
A few test cases are disabled because pandas-on-Spark returns float64 whereas pandas
@@ -450,7 +449,7 @@ class NumOpsTest(PandasOnSparkTestCase, TestCasesUtils):
@unittest.skipIf(not extension_dtypes_available, "pandas extension dtypes are not available")
-class IntegralExtensionOpsTest(PandasOnSparkTestCase, TestCasesUtils):
+class IntegralExtensionOpsTest(OpsTestBase):
@property
def intergral_extension_psers(self):
return [pd.Series([1, 2, 3, None], dtype=dtype) for dtype in self.integral_extension_dtypes]
@@ -590,7 +589,7 @@ class IntegralExtensionOpsTest(PandasOnSparkTestCase, TestCasesUtils):
@unittest.skipIf(
not extension_float_dtypes_available, "pandas extension float dtypes are not available"
)
-class FractionalExtensionOpsTest(PandasOnSparkTestCase, TestCasesUtils):
+class FractionalExtensionOpsTest(OpsTestBase):
@property
def fractional_extension_psers(self):
return [
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py
index f7c45cc..572ea76 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py
@@ -23,15 +23,14 @@ from pandas.api.types import CategoricalDtype
from pyspark import pandas as ps
from pyspark.pandas.config import option_context
-from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
+from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase
from pyspark.pandas.typedef.typehints import extension_object_dtypes_available
-from pyspark.testing.pandasutils import PandasOnSparkTestCase
if extension_object_dtypes_available:
from pandas import StringDtype
-class StringOpsTest(PandasOnSparkTestCase, TestCasesUtils):
+class StringOpsTest(OpsTestBase):
@property
def bool_pdf(self):
return pd.DataFrame({"this": ["x", "y", "z"], "that": ["z", "y", "x"]})
@@ -237,7 +236,7 @@ class StringOpsTest(PandasOnSparkTestCase, TestCasesUtils):
@unittest.skipIf(
not extension_object_dtypes_available, "pandas extension object dtypes are not available"
)
-class StringExtensionOpsTest(StringOpsTest, PandasOnSparkTestCase, TestCasesUtils):
+class StringExtensionOpsTest(StringOpsTest):
@property
def pser(self):
return pd.Series(["x", "y", "z", None], dtype="string")
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py
index 40882b8..16788c0 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py
@@ -21,11 +21,10 @@ import pandas as pd
from pandas.api.types import CategoricalDtype
import pyspark.pandas as ps
-from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
-from pyspark.testing.pandasutils import PandasOnSparkTestCase
+from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase
-class TimedeltaOpsTest(PandasOnSparkTestCase, TestCasesUtils):
+class TimedeltaOpsTest(OpsTestBase):
@property
def pser(self):
return pd.Series([timedelta(1), timedelta(microseconds=2), timedelta(weeks=3)])
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py
index 70175c4..a71691c 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py
@@ -19,11 +19,10 @@ import pandas as pd
import pyspark.pandas as ps
from pyspark.ml.linalg import SparseVector
-from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
-from pyspark.testing.pandasutils import PandasOnSparkTestCase
+from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase
-class UDTOpsTest(PandasOnSparkTestCase, TestCasesUtils):
+class UDTOpsTest(OpsTestBase):
@property
def pser(self):
sparse_values = {0: 0.1, 1: 1.1}
diff --git a/python/pyspark/pandas/tests/data_type_ops/testing_utils.py b/python/pyspark/pandas/tests/data_type_ops/testing_utils.py
index 9f57ad4..222b945 100644
--- a/python/pyspark/pandas/tests/data_type_ops/testing_utils.py
+++ b/python/pyspark/pandas/tests/data_type_ops/testing_utils.py
@@ -31,6 +31,8 @@ from pyspark.pandas.typedef.typehints import (
extension_object_dtypes_available,
)
+from pyspark.testing.pandasutils import ComparisonTestBase
+
if extension_dtypes_available:
from pandas import Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype
@@ -41,8 +43,8 @@ if extension_object_dtypes_available:
from pandas import BooleanDtype, StringDtype
-class TestCasesUtils:
- """A utility holding common test cases for arithmetic operations of different data types."""
+class OpsTestBase(ComparisonTestBase):
+ """The test base for arithmetic operations of different data types."""
@property
def numeric_pdf(self):
@@ -111,10 +113,6 @@ class TestCasesUtils:
return pd.concat([self.numeric_pdf, self.non_numeric_pdf], axis=1)
@property
- def psdf(self):
- return ps.from_pandas(self.pdf)
-
- @property
def df_cols(self):
return self.pdf.columns
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org