You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2022/01/23 02:33:41 UTC

[spark] branch master updated: [SPARK-37886][PYTHON][TESTS] Use ComparisonTestBase as base class in OpsTestBase

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 1ff40d6  [SPARK-37886][PYTHON][TESTS] Use ComparisonTestBase as base class in OpsTestBase
1ff40d6 is described below

commit 1ff40d61cee754d3ba60ee45f839dba76a9955d3
Author: Yikun Jiang <yi...@gmail.com>
AuthorDate: Sun Jan 23 11:32:41 2022 +0900

    [SPARK-37886][PYTHON][TESTS] Use ComparisonTestBase as base class in OpsTestBase
    
    ### What changes were proposed in this pull request?
    - Rename TestCasesUtils to OpsTestBase
    - Make OpsTestCase inherited from `ComparisonTestBase`(`PandasOnSparkTestCase` with `pdf` and `psdf`)
    - Make `*OpsTest` inherited from `OpsTestBase`
    
    ### Why are the changes needed?
    All data type ops related tests case are using `PandasOnSparkTestCase, TestCasesUtils` as basic classes, we'd better just let `TestCasesUtils` inherited from `PandasOnSparkTestCase` instead of multiple inheritance.
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    UT
    
    Closes #35203 from Yikun/opstest_refactor.
    
    Authored-by: Yikun Jiang <yi...@gmail.com>
    Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
 python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py   |  5 ++---
 python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py  |  7 +++----
 .../pyspark/pandas/tests/data_type_ops/test_categorical_ops.py |  5 ++---
 python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py  |  5 ++---
 python/pyspark/pandas/tests/data_type_ops/test_date_ops.py     |  5 ++---
 python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py |  5 ++---
 python/pyspark/pandas/tests/data_type_ops/test_null_ops.py     |  5 ++---
 python/pyspark/pandas/tests/data_type_ops/test_num_ops.py      |  9 ++++-----
 python/pyspark/pandas/tests/data_type_ops/test_string_ops.py   |  7 +++----
 .../pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py   |  5 ++---
 python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py      |  5 ++---
 python/pyspark/pandas/tests/data_type_ops/testing_utils.py     | 10 ++++------
 12 files changed, 30 insertions(+), 43 deletions(-)

diff --git a/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py
index 5dc7f80..35fcb37 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py
@@ -19,11 +19,10 @@ import pandas as pd
 from pandas.api.types import CategoricalDtype
 
 from pyspark import pandas as ps
-from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
-from pyspark.testing.pandasutils import PandasOnSparkTestCase
+from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase
 
 
-class BinaryOpsTest(PandasOnSparkTestCase, TestCasesUtils):
+class BinaryOpsTest(OpsTestBase):
     @property
     def pser(self):
         return pd.Series([b"1", b"2", b"3"])
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py
index b83b610..02bb048 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py
@@ -25,15 +25,14 @@ from pandas.api.types import CategoricalDtype
 
 from pyspark import pandas as ps
 from pyspark.pandas import option_context
-from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
+from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase
 from pyspark.pandas.typedef.typehints import (
     extension_float_dtypes_available,
     extension_object_dtypes_available,
 )
-from pyspark.testing.pandasutils import PandasOnSparkTestCase
 
 
-class BooleanOpsTest(PandasOnSparkTestCase, TestCasesUtils):
+class BooleanOpsTest(OpsTestBase):
     @property
     def bool_pdf(self):
         return pd.DataFrame({"this": [True, False, True], "that": [False, True, True]})
@@ -381,7 +380,7 @@ class BooleanOpsTest(PandasOnSparkTestCase, TestCasesUtils):
 @unittest.skipIf(
     not extension_object_dtypes_available, "pandas extension object dtypes are not available"
 )
-class BooleanExtensionOpsTest(PandasOnSparkTestCase, TestCasesUtils):
+class BooleanExtensionOpsTest(OpsTestBase):
     @property
     def boolean_pdf(self):
         return pd.DataFrame(
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py
index e07af72..b84c35b 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py
@@ -23,11 +23,10 @@ from pandas.api.types import CategoricalDtype
 
 from pyspark import pandas as ps
 from pyspark.pandas.config import option_context
-from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
-from pyspark.testing.pandasutils import PandasOnSparkTestCase
+from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase
 
 
-class CategoricalOpsTest(PandasOnSparkTestCase, TestCasesUtils):
+class CategoricalOpsTest(OpsTestBase):
     @property
     def pdf(self):
         return pd.DataFrame(
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py
index 91a92ba..cc9a0bf 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py
@@ -21,11 +21,10 @@ import datetime
 import pandas as pd
 
 from pyspark import pandas as ps
-from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
-from pyspark.testing.pandasutils import PandasOnSparkTestCase
+from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase
 
 
-class ComplexOpsTest(PandasOnSparkTestCase, TestCasesUtils):
+class ComplexOpsTest(OpsTestBase):
     @property
     def pser(self):
         return pd.Series([[1, 2, 3]])
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py
index 8c196d2..f0585c3 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py
@@ -21,11 +21,10 @@ import pandas as pd
 from pandas.api.types import CategoricalDtype
 
 from pyspark import pandas as ps
-from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
-from pyspark.testing.pandasutils import PandasOnSparkTestCase
+from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase
 
 
-class DateOpsTest(PandasOnSparkTestCase, TestCasesUtils):
+class DateOpsTest(OpsTestBase):
     @property
     def pser(self):
         return pd.Series(
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py
index 5eba485..f29f9d3 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py
@@ -21,11 +21,10 @@ import pandas as pd
 from pandas.api.types import CategoricalDtype
 
 from pyspark import pandas as ps
-from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
-from pyspark.testing.pandasutils import PandasOnSparkTestCase
+from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase
 
 
-class DatetimeOpsTest(PandasOnSparkTestCase, TestCasesUtils):
+class DatetimeOpsTest(OpsTestBase):
     @property
     def pser(self):
         return pd.Series(pd.date_range("1994-1-31 10:30:15", periods=3, freq="D"))
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py
index c2b6be2..009d4d0 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py
@@ -19,11 +19,10 @@ import pandas as pd
 from pandas.api.types import CategoricalDtype
 
 import pyspark.pandas as ps
-from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
-from pyspark.testing.pandasutils import PandasOnSparkTestCase
+from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase
 
 
-class NullOpsTest(PandasOnSparkTestCase, TestCasesUtils):
+class NullOpsTest(OpsTestBase):
     @property
     def pser(self):
         return pd.Series([None, None, None])
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py
index 785eb25..0c2c94e 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py
@@ -25,17 +25,16 @@ from pandas.api.types import CategoricalDtype
 
 from pyspark import pandas as ps
 from pyspark.pandas.config import option_context
-from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
+from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase
 from pyspark.pandas.typedef.typehints import (
     extension_dtypes_available,
     extension_float_dtypes_available,
     extension_object_dtypes_available,
 )
 from pyspark.sql.types import DecimalType, IntegralType
-from pyspark.testing.pandasutils import PandasOnSparkTestCase
 
 
-class NumOpsTest(PandasOnSparkTestCase, TestCasesUtils):
+class NumOpsTest(OpsTestBase):
     """Unit tests for arithmetic operations of numeric data types.
 
     A few test cases are disabled because pandas-on-Spark returns float64 whereas pandas
@@ -450,7 +449,7 @@ class NumOpsTest(PandasOnSparkTestCase, TestCasesUtils):
 
 
 @unittest.skipIf(not extension_dtypes_available, "pandas extension dtypes are not available")
-class IntegralExtensionOpsTest(PandasOnSparkTestCase, TestCasesUtils):
+class IntegralExtensionOpsTest(OpsTestBase):
     @property
     def intergral_extension_psers(self):
         return [pd.Series([1, 2, 3, None], dtype=dtype) for dtype in self.integral_extension_dtypes]
@@ -590,7 +589,7 @@ class IntegralExtensionOpsTest(PandasOnSparkTestCase, TestCasesUtils):
 @unittest.skipIf(
     not extension_float_dtypes_available, "pandas extension float dtypes are not available"
 )
-class FractionalExtensionOpsTest(PandasOnSparkTestCase, TestCasesUtils):
+class FractionalExtensionOpsTest(OpsTestBase):
     @property
     def fractional_extension_psers(self):
         return [
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py
index f7c45cc..572ea76 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py
@@ -23,15 +23,14 @@ from pandas.api.types import CategoricalDtype
 
 from pyspark import pandas as ps
 from pyspark.pandas.config import option_context
-from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
+from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase
 from pyspark.pandas.typedef.typehints import extension_object_dtypes_available
-from pyspark.testing.pandasutils import PandasOnSparkTestCase
 
 if extension_object_dtypes_available:
     from pandas import StringDtype
 
 
-class StringOpsTest(PandasOnSparkTestCase, TestCasesUtils):
+class StringOpsTest(OpsTestBase):
     @property
     def bool_pdf(self):
         return pd.DataFrame({"this": ["x", "y", "z"], "that": ["z", "y", "x"]})
@@ -237,7 +236,7 @@ class StringOpsTest(PandasOnSparkTestCase, TestCasesUtils):
 @unittest.skipIf(
     not extension_object_dtypes_available, "pandas extension object dtypes are not available"
 )
-class StringExtensionOpsTest(StringOpsTest, PandasOnSparkTestCase, TestCasesUtils):
+class StringExtensionOpsTest(StringOpsTest):
     @property
     def pser(self):
         return pd.Series(["x", "y", "z", None], dtype="string")
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py
index 40882b8..16788c0 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py
@@ -21,11 +21,10 @@ import pandas as pd
 from pandas.api.types import CategoricalDtype
 
 import pyspark.pandas as ps
-from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
-from pyspark.testing.pandasutils import PandasOnSparkTestCase
+from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase
 
 
-class TimedeltaOpsTest(PandasOnSparkTestCase, TestCasesUtils):
+class TimedeltaOpsTest(OpsTestBase):
     @property
     def pser(self):
         return pd.Series([timedelta(1), timedelta(microseconds=2), timedelta(weeks=3)])
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py
index 70175c4..a71691c 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py
@@ -19,11 +19,10 @@ import pandas as pd
 
 import pyspark.pandas as ps
 from pyspark.ml.linalg import SparseVector
-from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
-from pyspark.testing.pandasutils import PandasOnSparkTestCase
+from pyspark.pandas.tests.data_type_ops.testing_utils import OpsTestBase
 
 
-class UDTOpsTest(PandasOnSparkTestCase, TestCasesUtils):
+class UDTOpsTest(OpsTestBase):
     @property
     def pser(self):
         sparse_values = {0: 0.1, 1: 1.1}
diff --git a/python/pyspark/pandas/tests/data_type_ops/testing_utils.py b/python/pyspark/pandas/tests/data_type_ops/testing_utils.py
index 9f57ad4..222b945 100644
--- a/python/pyspark/pandas/tests/data_type_ops/testing_utils.py
+++ b/python/pyspark/pandas/tests/data_type_ops/testing_utils.py
@@ -31,6 +31,8 @@ from pyspark.pandas.typedef.typehints import (
     extension_object_dtypes_available,
 )
 
+from pyspark.testing.pandasutils import ComparisonTestBase
+
 if extension_dtypes_available:
     from pandas import Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype
 
@@ -41,8 +43,8 @@ if extension_object_dtypes_available:
     from pandas import BooleanDtype, StringDtype
 
 
-class TestCasesUtils:
-    """A utility holding common test cases for arithmetic operations of different data types."""
+class OpsTestBase(ComparisonTestBase):
+    """The test base for arithmetic operations of different data types."""
 
     @property
     def numeric_pdf(self):
@@ -111,10 +113,6 @@ class TestCasesUtils:
         return pd.concat([self.numeric_pdf, self.non_numeric_pdf], axis=1)
 
     @property
-    def psdf(self):
-        return ps.from_pandas(self.pdf)
-
-    @property
     def df_cols(self):
         return self.pdf.columns
 

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org