You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ue...@apache.org on 2021/07/28 22:54:18 UTC
[spark] branch master updated: [SPARK-36190][PYTHON] Improve the
rest of DataTypeOps tests by avoiding joins
This is an automated email from the ASF dual-hosted git repository.
ueshin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 9c5cb99 [SPARK-36190][PYTHON] Improve the rest of DataTypeOps tests by avoiding joins
9c5cb99 is described below
commit 9c5cb99d6eada539e26ddb6dc1ac76b8dd6cc55a
Author: Xinrong Meng <xi...@databricks.com>
AuthorDate: Wed Jul 28 15:53:38 2021 -0700
[SPARK-36190][PYTHON] Improve the rest of DataTypeOps tests by avoiding joins
### What changes were proposed in this pull request?
Improve the rest of DataTypeOps tests by avoiding joins.
### Why are the changes needed?
bool, string, numeric DataTypeOps tests have been improved by avoiding joins.
We should improve the rest of the DataTypeOps tests in the same way.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Unit tests.
Closes #33546 from xinrong-databricks/test_no_join.
Authored-by: Xinrong Meng <xi...@databricks.com>
Signed-off-by: Takuya UESHIN <ue...@databricks.com>
---
.../pandas/tests/data_type_ops/test_binary_ops.py | 106 +++----
.../pandas/tests/data_type_ops/test_complex_ops.py | 346 +++++++++++----------
.../pandas/tests/data_type_ops/test_date_ops.py | 111 +++----
.../tests/data_type_ops/test_datetime_ops.py | 119 ++++---
.../pandas/tests/data_type_ops/test_null_ops.py | 60 ++--
.../pandas/tests/data_type_ops/test_udt_ops.py | 59 ++--
6 files changed, 399 insertions(+), 402 deletions(-)
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py
index 139ad11..a68459a 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py
@@ -19,7 +19,6 @@ import pandas as pd
from pandas.api.types import CategoricalDtype
from pyspark import pandas as ps
-from pyspark.pandas.config import option_context
from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
from pyspark.testing.pandasutils import PandasOnSparkTestCase
@@ -34,74 +33,75 @@ class BinaryOpsTest(PandasOnSparkTestCase, TestCasesUtils):
return ps.from_pandas(self.pser)
@property
- def other_pser(self):
- return pd.Series([b"2", b"3", b"4"])
+ def byte_pdf(self):
+ psers = {
+ "this": self.pser,
+ "that": pd.Series([b"2", b"3", b"4"]),
+ }
+ return pd.concat(psers, axis=1)
@property
- def other_psser(self):
- return ps.from_pandas(self.other_pser)
+ def byte_psdf(self):
+ return ps.from_pandas(self.byte_pdf)
def test_add(self):
- psser = self.psser
- pser = self.pser
+ byte_pdf, byte_psdf = self.byte_pdf, self.byte_psdf
+ pser, psser = byte_pdf["this"], byte_psdf["this"]
+ other_pser, other_psser = byte_pdf["that"], byte_psdf["that"]
+
self.assert_eq(psser + b"1", pser + b"1")
self.assert_eq(psser + psser, pser + pser)
self.assert_eq(psser + psser.astype("bytes"), pser + pser.astype("bytes"))
self.assertRaises(TypeError, lambda: psser + "x")
self.assertRaises(TypeError, lambda: psser + 1)
- with option_context("compute.ops_on_diff_frames", True):
- for psser in self.pssers:
- self.assertRaises(TypeError, lambda: self.psser + psser)
- self.assert_eq(self.pser + self.pser, (self.psser + self.psser).sort_index())
+ self.assert_eq(pser + pser, psser + psser)
+ self.assert_eq(pser + other_pser, psser + other_psser)
+
+ for psser in self.pssers:
+ self.assertRaises(TypeError, lambda: self.psser + psser)
def test_sub(self):
self.assertRaises(TypeError, lambda: self.psser - "x")
self.assertRaises(TypeError, lambda: self.psser - 1)
- with option_context("compute.ops_on_diff_frames", True):
- for psser in self.pssers:
- self.assertRaises(TypeError, lambda: self.psser - psser)
+ for psser in self.pssers:
+ self.assertRaises(TypeError, lambda: self.psser - psser)
def test_mul(self):
self.assertRaises(TypeError, lambda: self.psser * "x")
self.assertRaises(TypeError, lambda: self.psser * 1)
- with option_context("compute.ops_on_diff_frames", True):
- for psser in self.pssers:
- self.assertRaises(TypeError, lambda: self.psser * psser)
+ for psser in self.pssers:
+ self.assertRaises(TypeError, lambda: self.psser * psser)
def test_truediv(self):
self.assertRaises(TypeError, lambda: self.psser / "x")
self.assertRaises(TypeError, lambda: self.psser / 1)
- with option_context("compute.ops_on_diff_frames", True):
- for psser in self.pssers:
- self.assertRaises(TypeError, lambda: self.psser / psser)
+ for psser in self.pssers:
+ self.assertRaises(TypeError, lambda: self.psser / psser)
def test_floordiv(self):
self.assertRaises(TypeError, lambda: self.psser // "x")
self.assertRaises(TypeError, lambda: self.psser // 1)
- with option_context("compute.ops_on_diff_frames", True):
- for psser in self.pssers:
- self.assertRaises(TypeError, lambda: self.psser // psser)
+ for psser in self.pssers:
+ self.assertRaises(TypeError, lambda: self.psser // psser)
def test_mod(self):
self.assertRaises(TypeError, lambda: self.psser % "x")
self.assertRaises(TypeError, lambda: self.psser % 1)
- with option_context("compute.ops_on_diff_frames", True):
- for psser in self.pssers:
- self.assertRaises(TypeError, lambda: self.psser % psser)
+ for psser in self.pssers:
+ self.assertRaises(TypeError, lambda: self.psser % psser)
def test_pow(self):
self.assertRaises(TypeError, lambda: self.psser ** "x")
self.assertRaises(TypeError, lambda: self.psser ** 1)
- with option_context("compute.ops_on_diff_frames", True):
- for psser in self.pssers:
- self.assertRaises(TypeError, lambda: self.psser ** psser)
+ for psser in self.pssers:
+ self.assertRaises(TypeError, lambda: self.psser ** psser)
def test_radd(self):
self.assert_eq(b"1" + self.psser, b"1" + self.pser)
@@ -177,46 +177,34 @@ class BinaryOpsTest(PandasOnSparkTestCase, TestCasesUtils):
self.assertRaises(TypeError, lambda: ~self.psser)
def test_eq(self):
- with option_context("compute.ops_on_diff_frames", True):
- self.assert_eq(
- self.pser == self.other_pser, (self.psser == self.other_psser).sort_index()
- )
- self.assert_eq(self.pser == self.pser, (self.psser == self.psser).sort_index())
+ byte_pdf, byte_psdf = self.byte_pdf, self.byte_psdf
+ self.assert_eq(byte_pdf["this"] == byte_pdf["that"], byte_psdf["this"] == byte_psdf["that"])
+ self.assert_eq(byte_pdf["this"] == byte_pdf["this"], byte_psdf["this"] == byte_psdf["this"])
def test_ne(self):
- with option_context("compute.ops_on_diff_frames", True):
- self.assert_eq(
- self.pser != self.other_pser, (self.psser != self.other_psser).sort_index()
- )
- self.assert_eq(self.pser != self.pser, (self.psser != self.psser).sort_index())
+ byte_pdf, byte_psdf = self.byte_pdf, self.byte_psdf
+ self.assert_eq(byte_pdf["this"] != byte_pdf["that"], byte_psdf["this"] != byte_psdf["that"])
+ self.assert_eq(byte_pdf["this"] != byte_pdf["this"], byte_psdf["this"] != byte_psdf["this"])
def test_lt(self):
- with option_context("compute.ops_on_diff_frames", True):
- self.assert_eq(
- self.pser < self.other_pser, (self.psser < self.other_psser).sort_index()
- )
- self.assert_eq(self.pser < self.pser, (self.psser < self.psser).sort_index())
+ byte_pdf, byte_psdf = self.byte_pdf, self.byte_psdf
+ self.assert_eq(byte_pdf["this"] < byte_pdf["that"], byte_psdf["this"] < byte_psdf["that"])
+ self.assert_eq(byte_pdf["this"] < byte_pdf["this"], byte_psdf["this"] < byte_psdf["this"])
def test_le(self):
- with option_context("compute.ops_on_diff_frames", True):
- self.assert_eq(
- self.pser <= self.other_pser, (self.psser <= self.other_psser).sort_index()
- )
- self.assert_eq(self.pser <= self.pser, (self.psser <= self.psser).sort_index())
+ byte_pdf, byte_psdf = self.byte_pdf, self.byte_psdf
+ self.assert_eq(byte_pdf["this"] <= byte_pdf["that"], byte_psdf["this"] <= byte_psdf["that"])
+ self.assert_eq(byte_pdf["this"] <= byte_pdf["this"], byte_psdf["this"] <= byte_psdf["this"])
def test_gt(self):
- with option_context("compute.ops_on_diff_frames", True):
- self.assert_eq(
- self.pser > self.other_pser, (self.psser > self.other_psser).sort_index()
- )
- self.assert_eq(self.pser > self.pser, (self.psser > self.psser).sort_index())
+ byte_pdf, byte_psdf = self.byte_pdf, self.byte_psdf
+ self.assert_eq(byte_pdf["this"] > byte_pdf["that"], byte_psdf["this"] > byte_psdf["that"])
+ self.assert_eq(byte_pdf["this"] > byte_pdf["this"], byte_psdf["this"] > byte_psdf["this"])
def test_ge(self):
- with option_context("compute.ops_on_diff_frames", True):
- self.assert_eq(
- self.pser >= self.other_pser, (self.psser >= self.other_psser).sort_index()
- )
- self.assert_eq(self.pser >= self.pser, (self.psser >= self.psser).sort_index())
+ byte_pdf, byte_psdf = self.byte_pdf, self.byte_psdf
+ self.assert_eq(byte_pdf["this"] >= byte_pdf["that"], byte_psdf["this"] >= byte_psdf["that"])
+ self.assert_eq(byte_pdf["this"] >= byte_pdf["this"], byte_psdf["this"] >= byte_psdf["this"])
if __name__ == "__main__":
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py
index 0480285..91a92ba 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py
@@ -21,169 +21,175 @@ import datetime
import pandas as pd
from pyspark import pandas as ps
-from pyspark.pandas.config import option_context
from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
from pyspark.testing.pandasutils import PandasOnSparkTestCase
class ComplexOpsTest(PandasOnSparkTestCase, TestCasesUtils):
@property
- def numeric_array_psers(self):
- return [
- pd.Series([[1, 2, 3]]),
- pd.Series([[0.1, 0.2, 0.3]]),
- pd.Series([[decimal.Decimal(1), decimal.Decimal(2), decimal.Decimal(3)]]),
- ]
+ def pser(self):
+ return pd.Series([[1, 2, 3]])
@property
- def non_numeric_array_psers(self):
- return {
- "string": pd.Series([["x", "y", "z"]]),
- "date": pd.Series(
- [[datetime.date(1994, 1, 1), datetime.date(1994, 1, 2), datetime.date(1994, 1, 3)]]
- ),
- "bool": pd.Series([[True, True, False]]),
- }
+ def psser(self):
+ return ps.from_pandas(self.pser)
@property
- def numeric_array_pssers(self):
- return [ps.from_pandas(pser) for pser in self.numeric_array_psers]
+ def numeric_array_pdf(self):
+ psers = {
+ "int": pd.Series([[1, 2, 3]]),
+ "float": pd.Series([[0.1, 0.2, 0.3]]),
+ "decimal": pd.Series([[decimal.Decimal(1), decimal.Decimal(2), decimal.Decimal(3)]]),
+ }
+ return pd.concat(psers, axis=1)
@property
- def non_numeric_array_pssers(self):
- pssers = {}
+ def numeric_array_psdf(self):
+ return ps.from_pandas(self.numeric_array_pdf)
- for k, v in self.non_numeric_array_psers.items():
- pssers[k] = ps.from_pandas(v)
- return pssers
+ @property
+ def numeric_array_df_cols(self):
+ return self.numeric_array_pdf.columns
@property
- def psers(self):
- return self.numeric_array_psers + list(self.non_numeric_array_psers.values())
+ def non_numeric_array_pdf(self):
+ psers = {
+ "string": pd.Series([["x", "y", "z"]]),
+ "date": pd.Series(
+ [[datetime.date(1994, 1, 1), datetime.date(1994, 1, 2), datetime.date(1994, 1, 3)]]
+ ),
+ "bool": pd.Series([[True, True, False]]),
+ }
+ return pd.concat(psers, axis=1)
@property
- def pssers(self):
- return self.numeric_array_pssers + list(self.non_numeric_array_pssers.values())
+ def non_numeric_array_psdf(self):
+ return ps.from_pandas(self.non_numeric_array_pdf)
@property
- def pser(self):
- return pd.Series([[1, 2, 3]])
+ def non_numeric_array_df_cols(self):
+ return self.non_numeric_array_pdf.columns
@property
- def psser(self):
- return ps.from_pandas(self.pser)
+ def array_pdf(self):
+ return pd.concat([self.numeric_array_pdf, self.non_numeric_array_pdf], axis=1)
@property
- def other_pser(self):
- return pd.Series([[2, 3, 4]])
+ def array_psdf(self):
+ return ps.from_pandas(self.array_pdf)
@property
- def other_psser(self):
- return ps.from_pandas(self.other_pser)
+ def array_df_cols(self):
+ return self.array_pdf.columns
@property
- def struct_pser(self):
- return pd.Series([("x", 1)])
+ def complex_pdf(self):
+ psers = {
+ "this_array": self.pser,
+ "that_array": pd.Series([[2, 3, 4]]),
+ "this_struct": pd.Series([("x", 1)]),
+ "that_struct": pd.Series([("a", 2)]),
+ }
+ return pd.concat(psers, axis=1)
@property
- def struct_psser(self):
- return ps.Index([("x", 1)]).to_series().reset_index(drop=True)
+ def complex_psdf(self):
+ pssers = {
+ "this_array": self.psser,
+ "that_array": ps.Series([[2, 3, 4]]),
+ "this_struct": ps.Index([("x", 1)]).to_series().reset_index(drop=True),
+ "that_struct": ps.Index([("a", 2)]).to_series().reset_index(drop=True),
+ }
+ return ps.concat(pssers, axis=1)
def test_add(self):
- for pser, psser in zip(self.psers, self.pssers):
+ pdf, psdf = self.array_pdf, self.array_psdf
+ for col in self.array_df_cols:
+ self.assert_eq(pdf[col] + pdf[col], psdf[col] + psdf[col])
+
+ # Numeric array + Numeric array
+ for col in self.numeric_array_df_cols:
+ pser1, psser1 = pdf[col], psdf[col]
+ for other_col in self.numeric_array_df_cols:
+ pser2, psser2 = pdf[other_col], psdf[other_col]
+ self.assert_eq((pser1 + pser2).sort_values(), (psser1 + psser2).sort_values())
+
+ # Non-numeric array + Non-numeric array
+ self.assertRaises(
+ TypeError,
+ lambda: psdf["string"] + psdf["bool"],
+ )
+ self.assertRaises(
+ TypeError,
+ lambda: psdf["string"] + psdf["date"],
+ )
+ self.assertRaises(
+ TypeError,
+ lambda: psdf["bool"] + psdf["date"],
+ )
+
+ for col in self.non_numeric_array_df_cols:
+ pser, psser = pdf[col], psdf[col]
self.assert_eq(pser + pser, psser + psser)
- with option_context("compute.ops_on_diff_frames", True):
- # Numeric array + Numeric array
- for pser1, psser1 in zip(self.numeric_array_psers, self.numeric_array_pssers):
- for pser2, psser2 in zip(self.numeric_array_psers, self.numeric_array_pssers):
- self.assert_eq((pser1 + pser2).sort_values(), (psser1 + psser2).sort_values())
-
- # Non-numeric array + Non-numeric array
- self.assertRaises(
- TypeError,
- lambda: self.non_numeric_array_pssers["string"]
- + self.non_numeric_array_pssers["bool"],
- )
- self.assertRaises(
- TypeError,
- lambda: self.non_numeric_array_pssers["string"]
- + self.non_numeric_array_pssers["date"],
- )
- self.assertRaises(
- TypeError,
- lambda: self.non_numeric_array_pssers["bool"]
- + self.non_numeric_array_pssers["date"],
- )
-
- for data_type in self.non_numeric_array_psers.keys():
- self.assert_eq(
- self.non_numeric_array_psers.get(data_type)
- + self.non_numeric_array_psers.get(data_type),
- (
- self.non_numeric_array_pssers.get(data_type)
- + self.non_numeric_array_pssers.get(data_type)
- ).sort_index(),
- )
-
- # Numeric array + Non-numeric array
- for numeric_ppser in self.numeric_array_pssers:
- for non_numeric_ppser in self.non_numeric_array_pssers.values():
- self.assertRaises(TypeError, lambda: numeric_ppser + non_numeric_ppser)
+ # Numeric array + Non-numeric array
+ for numeric_col in self.numeric_array_df_cols:
+ for non_numeric_col in self.non_numeric_array_df_cols:
+ self.assertRaises(TypeError, lambda: psdf[numeric_col] + psdf[non_numeric_col])
def test_sub(self):
self.assertRaises(TypeError, lambda: self.psser - "x")
self.assertRaises(TypeError, lambda: self.psser - 1)
- with option_context("compute.ops_on_diff_frames", True):
- for psser1 in self.pssers:
- for psser2 in self.pssers:
- self.assertRaises(TypeError, lambda: psser1 - psser2)
+ psdf = self.array_psdf
+ for col in self.array_df_cols:
+ for other_col in self.array_df_cols:
+ self.assertRaises(TypeError, lambda: psdf[col] - psdf[other_col])
def test_mul(self):
self.assertRaises(TypeError, lambda: self.psser * "x")
self.assertRaises(TypeError, lambda: self.psser * 1)
- with option_context("compute.ops_on_diff_frames", True):
- for psser1 in self.pssers:
- for psser2 in self.pssers:
- self.assertRaises(TypeError, lambda: psser1 * psser2)
+ psdf = self.array_psdf
+ for col in self.array_df_cols:
+ for other_col in self.array_df_cols:
+ self.assertRaises(TypeError, lambda: psdf[col] * psdf[other_col])
def test_truediv(self):
self.assertRaises(TypeError, lambda: self.psser / "x")
self.assertRaises(TypeError, lambda: self.psser / 1)
- with option_context("compute.ops_on_diff_frames", True):
- for psser1 in self.pssers:
- for psser2 in self.pssers:
- self.assertRaises(TypeError, lambda: psser1 / psser2)
+ psdf = self.array_psdf
+ for col in self.array_df_cols:
+ for other_col in self.array_df_cols:
+ self.assertRaises(TypeError, lambda: psdf[col] / psdf[other_col])
def test_floordiv(self):
self.assertRaises(TypeError, lambda: self.psser // "x")
self.assertRaises(TypeError, lambda: self.psser // 1)
- with option_context("compute.ops_on_diff_frames", True):
- for psser1 in self.pssers:
- for psser2 in self.pssers:
- self.assertRaises(TypeError, lambda: psser1 // psser2)
+ psdf = self.array_psdf
+ for col in self.array_df_cols:
+ for other_col in self.array_df_cols:
+ self.assertRaises(TypeError, lambda: psdf[col] // psdf[other_col])
def test_mod(self):
self.assertRaises(TypeError, lambda: self.psser % "x")
self.assertRaises(TypeError, lambda: self.psser % 1)
- with option_context("compute.ops_on_diff_frames", True):
- for psser1 in self.pssers:
- for psser2 in self.pssers:
- self.assertRaises(TypeError, lambda: psser1 % psser2)
+ psdf = self.array_psdf
+ for col in self.array_df_cols:
+ for other_col in self.array_df_cols:
+ self.assertRaises(TypeError, lambda: psdf[col] % psdf[other_col])
def test_pow(self):
self.assertRaises(TypeError, lambda: self.psser ** "x")
self.assertRaises(TypeError, lambda: self.psser ** 1)
- with option_context("compute.ops_on_diff_frames", True):
- for psser1 in self.pssers:
- for psser2 in self.pssers:
- self.assertRaises(TypeError, lambda: psser1 ** psser2)
+ psdf = self.array_psdf
+ for col in self.array_df_cols:
+ for other_col in self.array_df_cols:
+ self.assertRaises(TypeError, lambda: psdf[col] ** psdf[other_col])
def test_radd(self):
self.assertRaises(TypeError, lambda: "x" + self.psser)
@@ -231,12 +237,16 @@ class ComplexOpsTest(PandasOnSparkTestCase, TestCasesUtils):
self.assertRaises(TypeError, lambda: False | self.psser)
def test_from_to_pandas(self):
- for pser, psser in zip(self.psers, self.pssers):
+ pdf, psdf = self.array_pdf, self.array_psdf
+ for col in self.array_df_cols:
+ pser, psser = pdf[col], psdf[col]
self.assert_eq(pser, psser.to_pandas())
self.assert_eq(ps.from_pandas(pser), psser)
def test_isnull(self):
- for pser, psser in zip(self.psers, self.pssers):
+ pdf, psdf = self.array_pdf, self.array_psdf
+ for col in self.array_df_cols:
+ pser, psser = pdf[col], psdf[col]
self.assert_eq(pser.isnull(), psser.isnull())
def test_astype(self):
@@ -252,70 +262,94 @@ class ComplexOpsTest(PandasOnSparkTestCase, TestCasesUtils):
self.assertRaises(TypeError, lambda: ~self.psser)
def test_eq(self):
- with option_context("compute.ops_on_diff_frames", True):
- self.assert_eq(
- self.pser == self.other_pser, (self.psser == self.other_psser).sort_index()
- )
- self.assert_eq(self.pser == self.pser, (self.psser == self.psser).sort_index())
- self.assert_eq(
- self.struct_pser == self.struct_pser,
- (self.struct_psser == self.struct_psser).sort_index(),
- )
+ pdf, psdf = self.complex_pdf, self.complex_pdf
+ self.assert_eq(
+ pdf["this_array"] == pdf["that_array"], psdf["this_array"] == psdf["that_array"]
+ )
+ self.assert_eq(
+ pdf["this_struct"] == pdf["that_struct"], psdf["this_struct"] == psdf["that_struct"]
+ )
+ self.assert_eq(
+ pdf["this_array"] == pdf["this_array"], psdf["this_array"] == psdf["this_array"]
+ )
+ self.assert_eq(
+ pdf["this_struct"] == pdf["this_struct"], psdf["this_struct"] == psdf["this_struct"]
+ )
def test_ne(self):
- with option_context("compute.ops_on_diff_frames", True):
- self.assert_eq(
- self.pser != self.other_pser, (self.psser != self.other_psser).sort_index()
- )
- self.assert_eq(self.pser != self.pser, (self.psser != self.psser).sort_index())
- self.assert_eq(
- self.struct_pser != self.struct_pser,
- (self.struct_psser != self.struct_psser).sort_index(),
- )
+ pdf, psdf = self.complex_pdf, self.complex_pdf
+ self.assert_eq(
+ pdf["this_array"] != pdf["that_array"], psdf["this_array"] != psdf["that_array"]
+ )
+ self.assert_eq(
+ pdf["this_struct"] != pdf["that_struct"], psdf["this_struct"] != psdf["that_struct"]
+ )
+ self.assert_eq(
+ pdf["this_array"] != pdf["this_array"], psdf["this_array"] != psdf["this_array"]
+ )
+ self.assert_eq(
+ pdf["this_struct"] != pdf["this_struct"], psdf["this_struct"] != psdf["this_struct"]
+ )
def test_lt(self):
- with option_context("compute.ops_on_diff_frames", True):
- self.assert_eq(
- self.pser < self.other_pser, (self.psser < self.other_psser).sort_index()
- )
- self.assert_eq(self.pser < self.pser, (self.psser < self.psser).sort_index())
- self.assert_eq(
- self.struct_pser < self.struct_pser,
- (self.struct_psser < self.struct_psser).sort_index(),
- )
+ pdf, psdf = self.complex_pdf, self.complex_pdf
+ self.assert_eq(
+ pdf["this_array"] < pdf["that_array"], psdf["this_array"] < psdf["that_array"]
+ )
+ self.assert_eq(
+ pdf["this_struct"] < pdf["that_struct"], psdf["this_struct"] < psdf["that_struct"]
+ )
+ self.assert_eq(
+ pdf["this_array"] < pdf["this_array"], psdf["this_array"] < psdf["this_array"]
+ )
+ self.assert_eq(
+ pdf["this_struct"] < pdf["this_struct"], psdf["this_struct"] < psdf["this_struct"]
+ )
def test_le(self):
- with option_context("compute.ops_on_diff_frames", True):
- self.assert_eq(
- self.pser <= self.other_pser, (self.psser <= self.other_psser).sort_index()
- )
- self.assert_eq(self.pser <= self.pser, (self.psser <= self.psser).sort_index())
- self.assert_eq(
- self.struct_pser <= self.struct_pser,
- (self.struct_psser <= self.struct_psser).sort_index(),
- )
+ pdf, psdf = self.complex_pdf, self.complex_pdf
+ self.assert_eq(
+ pdf["this_array"] <= pdf["that_array"], psdf["this_array"] <= psdf["that_array"]
+ )
+ self.assert_eq(
+ pdf["this_struct"] <= pdf["that_struct"], psdf["this_struct"] <= psdf["that_struct"]
+ )
+ self.assert_eq(
+ pdf["this_array"] <= pdf["this_array"], psdf["this_array"] <= psdf["this_array"]
+ )
+ self.assert_eq(
+ pdf["this_struct"] <= pdf["this_struct"], psdf["this_struct"] <= psdf["this_struct"]
+ )
def test_gt(self):
- with option_context("compute.ops_on_diff_frames", True):
- self.assert_eq(
- self.pser > self.other_pser, (self.psser > self.other_psser).sort_index()
- )
- self.assert_eq(self.pser > self.pser, (self.psser > self.psser).sort_index())
- self.assert_eq(
- self.struct_pser > self.struct_pser,
- (self.struct_psser > self.struct_psser).sort_index(),
- )
+ pdf, psdf = self.complex_pdf, self.complex_pdf
+ self.assert_eq(
+ pdf["this_array"] > pdf["that_array"], psdf["this_array"] > psdf["that_array"]
+ )
+ self.assert_eq(
+ pdf["this_struct"] > pdf["that_struct"], psdf["this_struct"] > psdf["that_struct"]
+ )
+ self.assert_eq(
+ pdf["this_array"] > pdf["this_array"], psdf["this_array"] > psdf["this_array"]
+ )
+ self.assert_eq(
+ pdf["this_struct"] > pdf["this_struct"], psdf["this_struct"] > psdf["this_struct"]
+ )
def test_ge(self):
- with option_context("compute.ops_on_diff_frames", True):
- self.assert_eq(
- self.pser >= self.other_pser, (self.psser >= self.other_psser).sort_index()
- )
- self.assert_eq(self.pser >= self.pser, (self.psser >= self.psser).sort_index())
- self.assert_eq(
- self.struct_pser >= self.struct_pser,
- (self.struct_psser >= self.struct_psser).sort_index(),
- )
+ pdf, psdf = self.complex_pdf, self.complex_pdf
+ self.assert_eq(
+ pdf["this_array"] >= pdf["that_array"], psdf["this_array"] >= psdf["that_array"]
+ )
+ self.assert_eq(
+ pdf["this_struct"] >= pdf["that_struct"], psdf["this_struct"] >= psdf["that_struct"]
+ )
+ self.assert_eq(
+ pdf["this_array"] >= pdf["this_array"], psdf["this_array"] >= psdf["this_array"]
+ )
+ self.assert_eq(
+ pdf["this_struct"] >= pdf["this_struct"], psdf["this_struct"] >= psdf["this_struct"]
+ )
if __name__ == "__main__":
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py
index 1574ebf..0f1d768 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py
@@ -20,10 +20,7 @@ import datetime
import pandas as pd
from pandas.api.types import CategoricalDtype
-from pyspark.sql.types import DateType
-
from pyspark import pandas as ps
-from pyspark.pandas.config import option_context
from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
from pyspark.testing.pandasutils import PandasOnSparkTestCase
@@ -40,14 +37,18 @@ class DateOpsTest(PandasOnSparkTestCase, TestCasesUtils):
return ps.from_pandas(self.pser)
@property
- def other_pser(self):
- return pd.Series(
- [datetime.date(2000, 1, 31), datetime.date(1994, 3, 1), datetime.date(1990, 2, 2)]
- )
+ def date_pdf(self):
+ psers = {
+ "this": self.pser,
+ "that": pd.Series(
+ [datetime.date(2000, 1, 31), datetime.date(1994, 3, 1), datetime.date(1990, 2, 2)]
+ ),
+ }
+ return pd.concat(psers, axis=1)
@property
- def other_psser(self):
- return ps.from_pandas(self.other_pser)
+ def date_psdf(self):
+ return ps.from_pandas(self.date_pdf)
@property
def some_date(self):
@@ -58,9 +59,8 @@ class DateOpsTest(PandasOnSparkTestCase, TestCasesUtils):
self.assertRaises(TypeError, lambda: self.psser + 1)
self.assertRaises(TypeError, lambda: self.psser + self.some_date)
- with option_context("compute.ops_on_diff_frames", True):
- for psser in self.pssers:
- self.assertRaises(TypeError, lambda: self.psser + psser)
+ for psser in self.pssers:
+ self.assertRaises(TypeError, lambda: self.psser + psser)
def test_sub(self):
self.assertRaises(TypeError, lambda: self.psser - "x")
@@ -69,57 +69,54 @@ class DateOpsTest(PandasOnSparkTestCase, TestCasesUtils):
(self.pser - self.some_date).dt.days,
self.psser - self.some_date,
)
- with option_context("compute.ops_on_diff_frames", True):
- for pser, psser in self.pser_psser_pairs:
- if isinstance(psser.spark.data_type, DateType):
- self.assert_eq((self.pser - pser).dt.days, (self.psser - psser).sort_index())
- else:
- self.assertRaises(TypeError, lambda: self.psser - psser)
+ pdf, psdf = self.pdf, self.psdf
+ for col in self.df_cols:
+ if col == "date":
+ self.assert_eq((pdf["date"] - pdf[col]).dt.days, psdf["date"] - psdf[col])
+ else:
+ self.assertRaises(TypeError, lambda: psdf["date"] - psdf[col])
+ pdf, psdf = self.date_pdf, self.date_psdf
+ self.assert_eq((pdf["this"] - pdf["that"]).dt.days, psdf["this"] - psdf["that"])
def test_mul(self):
self.assertRaises(TypeError, lambda: self.psser * "x")
self.assertRaises(TypeError, lambda: self.psser * 1)
self.assertRaises(TypeError, lambda: self.psser * self.some_date)
- with option_context("compute.ops_on_diff_frames", True):
- for psser in self.pssers:
- self.assertRaises(TypeError, lambda: self.psser * psser)
+ for psser in self.pssers:
+ self.assertRaises(TypeError, lambda: self.psser * psser)
def test_truediv(self):
self.assertRaises(TypeError, lambda: self.psser / "x")
self.assertRaises(TypeError, lambda: self.psser / 1)
self.assertRaises(TypeError, lambda: self.psser / self.some_date)
- with option_context("compute.ops_on_diff_frames", True):
- for psser in self.pssers:
- self.assertRaises(TypeError, lambda: self.psser / psser)
+ for psser in self.pssers:
+ self.assertRaises(TypeError, lambda: self.psser / psser)
def test_floordiv(self):
self.assertRaises(TypeError, lambda: self.psser // "x")
self.assertRaises(TypeError, lambda: self.psser // 1)
self.assertRaises(TypeError, lambda: self.psser // self.some_date)
- with option_context("compute.ops_on_diff_frames", True):
- for psser in self.pssers:
- self.assertRaises(TypeError, lambda: self.psser // psser)
+ for psser in self.pssers:
+ self.assertRaises(TypeError, lambda: self.psser // psser)
def test_mod(self):
self.assertRaises(TypeError, lambda: self.psser % "x")
self.assertRaises(TypeError, lambda: self.psser % 1)
self.assertRaises(TypeError, lambda: self.psser % self.some_date)
- with option_context("compute.ops_on_diff_frames", True):
- for psser in self.pssers:
- self.assertRaises(TypeError, lambda: self.psser % psser)
+ for psser in self.pssers:
+ self.assertRaises(TypeError, lambda: self.psser % psser)
def test_pow(self):
self.assertRaises(TypeError, lambda: self.psser ** "x")
self.assertRaises(TypeError, lambda: self.psser ** 1)
self.assertRaises(TypeError, lambda: self.psser ** self.some_date)
- with option_context("compute.ops_on_diff_frames", True):
- for psser in self.pssers:
- self.assertRaises(TypeError, lambda: self.psser ** psser)
+ for psser in self.pssers:
+ self.assertRaises(TypeError, lambda: self.psser ** psser)
def test_radd(self):
self.assertRaises(TypeError, lambda: "x" + self.psser)
@@ -204,46 +201,34 @@ class DateOpsTest(PandasOnSparkTestCase, TestCasesUtils):
self.assertRaises(TypeError, lambda: ~self.psser)
def test_eq(self):
- with option_context("compute.ops_on_diff_frames", True):
- self.assert_eq(
- self.pser == self.other_pser, (self.psser == self.other_psser).sort_index()
- )
- self.assert_eq(self.pser == self.pser, (self.psser == self.psser).sort_index())
+ pdf, psdf = self.date_pdf, self.date_psdf
+ self.assert_eq(pdf["this"] == pdf["that"], psdf["this"] == psdf["that"])
+ self.assert_eq(pdf["this"] == pdf["this"], psdf["this"] == psdf["this"])
def test_ne(self):
- with option_context("compute.ops_on_diff_frames", True):
- self.assert_eq(
- self.pser != self.other_pser, (self.psser != self.other_psser).sort_index()
- )
- self.assert_eq(self.pser != self.pser, (self.psser != self.psser).sort_index())
+ pdf, psdf = self.date_pdf, self.date_psdf
+ self.assert_eq(pdf["this"] != pdf["that"], psdf["this"] != psdf["that"])
+ self.assert_eq(pdf["this"] != pdf["this"], psdf["this"] != psdf["this"])
def test_lt(self):
- with option_context("compute.ops_on_diff_frames", True):
- self.assert_eq(
- self.pser < self.other_pser, (self.psser < self.other_psser).sort_index()
- )
- self.assert_eq(self.pser < self.pser, (self.psser < self.psser).sort_index())
+ pdf, psdf = self.date_pdf, self.date_psdf
+ self.assert_eq(pdf["this"] == pdf["that"], psdf["this"] == psdf["that"])
+ self.assert_eq(pdf["this"] == pdf["this"], psdf["this"] == psdf["this"])
def test_le(self):
- with option_context("compute.ops_on_diff_frames", True):
- self.assert_eq(
- self.pser <= self.other_pser, (self.psser <= self.other_psser).sort_index()
- )
- self.assert_eq(self.pser <= self.pser, (self.psser <= self.psser).sort_index())
+ pdf, psdf = self.date_pdf, self.date_psdf
+ self.assert_eq(pdf["this"] <= pdf["that"], psdf["this"] <= psdf["that"])
+ self.assert_eq(pdf["this"] <= pdf["this"], psdf["this"] <= psdf["this"])
def test_gt(self):
- with option_context("compute.ops_on_diff_frames", True):
- self.assert_eq(
- self.pser > self.other_pser, (self.psser > self.other_psser).sort_index()
- )
- self.assert_eq(self.pser > self.pser, (self.psser > self.psser).sort_index())
+ pdf, psdf = self.date_pdf, self.date_psdf
+ self.assert_eq(pdf["this"] > pdf["that"], psdf["this"] > psdf["that"])
+ self.assert_eq(pdf["this"] > pdf["this"], psdf["this"] > psdf["this"])
def test_ge(self):
- with option_context("compute.ops_on_diff_frames", True):
- self.assert_eq(
- self.pser >= self.other_pser, (self.psser >= self.other_psser).sort_index()
- )
- self.assert_eq(self.pser >= self.pser, (self.psser >= self.psser).sort_index())
+ pdf, psdf = self.date_pdf, self.date_psdf
+ self.assert_eq(pdf["this"] >= pdf["that"], psdf["this"] >= psdf["that"])
+ self.assert_eq(pdf["this"] >= pdf["this"], psdf["this"] >= psdf["this"])
if __name__ == "__main__":
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py
index 8b2a0f9..d3e59b3 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py
@@ -17,12 +17,10 @@
import datetime
-import numpy as np
import pandas as pd
from pandas.api.types import CategoricalDtype
from pyspark import pandas as ps
-from pyspark.pandas.config import option_context
from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
from pyspark.testing.pandasutils import PandasOnSparkTestCase
@@ -30,19 +28,23 @@ from pyspark.testing.pandasutils import PandasOnSparkTestCase
class DatetimeOpsTest(PandasOnSparkTestCase, TestCasesUtils):
@property
def pser(self):
- return pd.Series(pd.date_range("1994-1-31 10:30:15", periods=3, freq="M"))
+ return pd.Series(pd.date_range("1994-1-31 10:30:15", periods=3, freq="D"))
@property
def psser(self):
return ps.from_pandas(self.pser)
@property
- def other_pser(self):
- return pd.Series(pd.date_range("1994-4-30 10:30:15", periods=3, freq="M"))
+ def datetime_pdf(self):
+ psers = {
+ "this": self.pser,
+ "that": pd.Series(pd.date_range("1994-2-1 10:30:15", periods=3, freq="D")),
+ }
+ return pd.concat(psers, axis=1)
@property
- def other_psser(self):
- return ps.from_pandas(self.other_pser)
+ def datetime_psdf(self):
+ return ps.from_pandas(self.datetime_pdf)
@property
def some_datetime(self):
@@ -53,9 +55,8 @@ class DatetimeOpsTest(PandasOnSparkTestCase, TestCasesUtils):
self.assertRaises(TypeError, lambda: self.psser + 1)
self.assertRaises(TypeError, lambda: self.psser + self.some_datetime)
- with option_context("compute.ops_on_diff_frames", True):
- for psser in self.pssers:
- self.assertRaises(TypeError, lambda: self.psser + psser)
+ for psser in self.pssers:
+ self.assertRaises(TypeError, lambda: self.psser + psser)
def test_sub(self):
self.assertRaises(TypeError, lambda: self.psser - "x")
@@ -64,60 +65,62 @@ class DatetimeOpsTest(PandasOnSparkTestCase, TestCasesUtils):
(self.pser - self.some_datetime).dt.total_seconds().astype("int"),
self.psser - self.some_datetime,
)
- with option_context("compute.ops_on_diff_frames", True):
- for pser, psser in self.pser_psser_pairs:
- if pser.dtype == np.dtype("<M8[ns]"):
- self.assert_eq(
- (self.pser - pser).dt.total_seconds().astype("int"),
- (self.psser - psser).sort_index(),
- )
- else:
- self.assertRaises(TypeError, lambda: self.psser - psser)
+
+ pdf, psdf = self.pdf, self.psdf
+ for col in self.df_cols:
+ if col == "datetime":
+ self.assert_eq(
+ (pdf["datetime"] - pdf[col]).dt.total_seconds().astype("int"),
+ psdf["datetime"] - psdf[col],
+ )
+ else:
+ self.assertRaises(TypeError, lambda: psdf["datetime"] - psdf[col])
+
+ pdf, psdf = self.datetime_pdf, self.datetime_psdf
+ self.assert_eq(
+ (pdf["that"] - pdf["this"]).dt.total_seconds().astype("int"),
+ psdf["that"] - psdf["this"],
+ )
def test_mul(self):
self.assertRaises(TypeError, lambda: self.psser * "x")
self.assertRaises(TypeError, lambda: self.psser * 1)
self.assertRaises(TypeError, lambda: self.psser * self.some_datetime)
- with option_context("compute.ops_on_diff_frames", True):
- for psser in self.pssers:
- self.assertRaises(TypeError, lambda: self.psser * psser)
+ for psser in self.pssers:
+ self.assertRaises(TypeError, lambda: self.psser * psser)
def test_truediv(self):
self.assertRaises(TypeError, lambda: self.psser / "x")
self.assertRaises(TypeError, lambda: self.psser / 1)
self.assertRaises(TypeError, lambda: self.psser / self.some_datetime)
- with option_context("compute.ops_on_diff_frames", True):
- for psser in self.pssers:
- self.assertRaises(TypeError, lambda: self.psser / psser)
+ for psser in self.pssers:
+ self.assertRaises(TypeError, lambda: self.psser / psser)
def test_floordiv(self):
self.assertRaises(TypeError, lambda: self.psser // "x")
self.assertRaises(TypeError, lambda: self.psser // 1)
self.assertRaises(TypeError, lambda: self.psser // self.some_datetime)
- with option_context("compute.ops_on_diff_frames", True):
- for psser in self.pssers:
- self.assertRaises(TypeError, lambda: self.psser // psser)
+ for psser in self.pssers:
+ self.assertRaises(TypeError, lambda: self.psser // psser)
def test_mod(self):
self.assertRaises(TypeError, lambda: self.psser % "x")
self.assertRaises(TypeError, lambda: self.psser % 1)
self.assertRaises(TypeError, lambda: self.psser % self.some_datetime)
- with option_context("compute.ops_on_diff_frames", True):
- for psser in self.pssers:
- self.assertRaises(TypeError, lambda: self.psser % psser)
+ for psser in self.pssers:
+ self.assertRaises(TypeError, lambda: self.psser % psser)
def test_pow(self):
self.assertRaises(TypeError, lambda: self.psser ** "x")
self.assertRaises(TypeError, lambda: self.psser ** 1)
self.assertRaises(TypeError, lambda: self.psser ** self.some_datetime)
- with option_context("compute.ops_on_diff_frames", True):
- for psser in self.pssers:
- self.assertRaises(TypeError, lambda: self.psser ** psser)
+ for psser in self.pssers:
+ self.assertRaises(TypeError, lambda: self.psser ** psser)
def test_radd(self):
self.assertRaises(TypeError, lambda: "x" + self.psser)
@@ -202,46 +205,34 @@ class DatetimeOpsTest(PandasOnSparkTestCase, TestCasesUtils):
self.assertRaises(TypeError, lambda: ~self.psser)
def test_eq(self):
- with option_context("compute.ops_on_diff_frames", True):
- self.assert_eq(
- self.pser == self.other_pser, (self.psser == self.other_psser).sort_index()
- )
- self.assert_eq(self.pser == self.pser, (self.psser == self.psser).sort_index())
+ pdf, psdf = self.datetime_pdf, self.datetime_psdf
+ self.assert_eq(pdf["this"] == pdf["that"], psdf["this"] == psdf["that"])
+ self.assert_eq(pdf["this"] == pdf["this"], psdf["this"] == psdf["this"])
def test_ne(self):
- with option_context("compute.ops_on_diff_frames", True):
- self.assert_eq(
- self.pser != self.other_pser, (self.psser != self.other_psser).sort_index()
- )
- self.assert_eq(self.pser != self.pser, (self.psser != self.psser).sort_index())
+ pdf, psdf = self.datetime_pdf, self.datetime_psdf
+ self.assert_eq(pdf["this"] != pdf["that"], psdf["this"] != psdf["that"])
+ self.assert_eq(pdf["this"] != pdf["this"], psdf["this"] != psdf["this"])
def test_lt(self):
- with option_context("compute.ops_on_diff_frames", True):
- self.assert_eq(
- self.pser < self.other_pser, (self.psser < self.other_psser).sort_index()
- )
- self.assert_eq(self.pser < self.pser, (self.psser < self.psser).sort_index())
+ pdf, psdf = self.datetime_pdf, self.datetime_psdf
+ self.assert_eq(pdf["this"] < pdf["that"], psdf["this"] < psdf["that"])
+ self.assert_eq(pdf["this"] < pdf["this"], psdf["this"] < psdf["this"])
def test_le(self):
- with option_context("compute.ops_on_diff_frames", True):
- self.assert_eq(
- self.pser <= self.other_pser, (self.psser <= self.other_psser).sort_index()
- )
- self.assert_eq(self.pser <= self.pser, (self.psser <= self.psser).sort_index())
+ pdf, psdf = self.datetime_pdf, self.datetime_psdf
+ self.assert_eq(pdf["this"] <= pdf["that"], psdf["this"] <= psdf["that"])
+ self.assert_eq(pdf["this"] <= pdf["this"], psdf["this"] <= psdf["this"])
def test_gt(self):
- with option_context("compute.ops_on_diff_frames", True):
- self.assert_eq(
- self.pser > self.other_pser, (self.psser > self.other_psser).sort_index()
- )
- self.assert_eq(self.pser > self.pser, (self.psser > self.psser).sort_index())
+ pdf, psdf = self.datetime_pdf, self.datetime_psdf
+ self.assert_eq(pdf["this"] > pdf["that"], psdf["this"] > psdf["that"])
+ self.assert_eq(pdf["this"] > pdf["this"], psdf["this"] > psdf["this"])
def test_ge(self):
- with option_context("compute.ops_on_diff_frames", True):
- self.assert_eq(
- self.pser >= self.other_pser, (self.psser >= self.other_psser).sort_index()
- )
- self.assert_eq(self.pser >= self.pser, (self.psser >= self.psser).sort_index())
+ pdf, psdf = self.datetime_pdf, self.datetime_psdf
+ self.assert_eq(pdf["this"] >= pdf["that"], psdf["this"] >= psdf["that"])
+ self.assert_eq(pdf["this"] >= pdf["this"], psdf["this"] >= psdf["this"])
if __name__ == "__main__":
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py
index a7f0b6c..c2b6be2 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py
@@ -19,7 +19,6 @@ import pandas as pd
from pandas.api.types import CategoricalDtype
import pyspark.pandas as ps
-from pyspark.pandas.config import option_context
from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
from pyspark.testing.pandasutils import PandasOnSparkTestCase
@@ -37,57 +36,50 @@ class NullOpsTest(PandasOnSparkTestCase, TestCasesUtils):
self.assertRaises(TypeError, lambda: self.psser + "x")
self.assertRaises(TypeError, lambda: self.psser + 1)
- with option_context("compute.ops_on_diff_frames", True):
- for psser in self.pssers:
- self.assertRaises(TypeError, lambda: self.psser + psser)
+ for psser in self.pssers:
+ self.assertRaises(TypeError, lambda: self.psser + psser)
def test_sub(self):
self.assertRaises(TypeError, lambda: self.psser - "x")
self.assertRaises(TypeError, lambda: self.psser - 1)
- with option_context("compute.ops_on_diff_frames", True):
- for psser in self.pssers:
- self.assertRaises(TypeError, lambda: self.psser - psser)
+ for psser in self.pssers:
+ self.assertRaises(TypeError, lambda: self.psser - psser)
def test_mul(self):
self.assertRaises(TypeError, lambda: self.psser * "x")
self.assertRaises(TypeError, lambda: self.psser * 1)
- with option_context("compute.ops_on_diff_frames", True):
- for psser in self.pssers:
- self.assertRaises(TypeError, lambda: self.psser * psser)
+ for psser in self.pssers:
+ self.assertRaises(TypeError, lambda: self.psser * psser)
def test_truediv(self):
self.assertRaises(TypeError, lambda: self.psser / "x")
self.assertRaises(TypeError, lambda: self.psser / 1)
- with option_context("compute.ops_on_diff_frames", True):
- for psser in self.pssers:
- self.assertRaises(TypeError, lambda: self.psser / psser)
+ for psser in self.pssers:
+ self.assertRaises(TypeError, lambda: self.psser / psser)
def test_floordiv(self):
self.assertRaises(TypeError, lambda: self.psser // "x")
self.assertRaises(TypeError, lambda: self.psser // 1)
- with option_context("compute.ops_on_diff_frames", True):
- for psser in self.pssers:
- self.assertRaises(TypeError, lambda: self.psser // psser)
+ for psser in self.pssers:
+ self.assertRaises(TypeError, lambda: self.psser // psser)
def test_mod(self):
self.assertRaises(TypeError, lambda: self.psser % "x")
self.assertRaises(TypeError, lambda: self.psser % 1)
- with option_context("compute.ops_on_diff_frames", True):
- for psser in self.pssers:
- self.assertRaises(TypeError, lambda: self.psser % psser)
+ for psser in self.pssers:
+ self.assertRaises(TypeError, lambda: self.psser % psser)
def test_pow(self):
self.assertRaises(TypeError, lambda: self.psser ** "x")
self.assertRaises(TypeError, lambda: self.psser ** 1)
- with option_context("compute.ops_on_diff_frames", True):
- for psser in self.pssers:
- self.assertRaises(TypeError, lambda: self.psser ** psser)
+ for psser in self.pssers:
+ self.assertRaises(TypeError, lambda: self.psser ** psser)
def test_radd(self):
self.assertRaises(TypeError, lambda: "x" + self.psser)
@@ -145,28 +137,28 @@ class NullOpsTest(PandasOnSparkTestCase, TestCasesUtils):
self.assertRaises(TypeError, lambda: ~self.psser)
def test_eq(self):
- with option_context("compute.ops_on_diff_frames", True):
- self.assert_eq(self.pser == self.pser, (self.psser == self.psser).sort_index())
+ pser, psser = self.pser, self.psser
+ self.assert_eq(pser == pser, psser == psser)
def test_ne(self):
- with option_context("compute.ops_on_diff_frames", True):
- self.assert_eq(self.pser != self.pser, (self.psser != self.psser).sort_index())
+ pser, psser = self.pser, self.psser
+ self.assert_eq(pser != pser, psser != psser)
def test_lt(self):
- with option_context("compute.ops_on_diff_frames", True):
- self.assert_eq(self.pser < self.pser, (self.psser < self.psser).sort_index())
+ pser, psser = self.pser, self.psser
+ self.assert_eq(pser < pser, psser < psser)
def test_le(self):
- with option_context("compute.ops_on_diff_frames", True):
- self.assert_eq(self.pser <= self.pser, (self.psser <= self.psser).sort_index())
+ pser, psser = self.pser, self.psser
+ self.assert_eq(pser <= pser, psser <= psser)
def test_gt(self):
- with option_context("compute.ops_on_diff_frames", True):
- self.assert_eq(self.pser > self.pser, (self.psser > self.psser).sort_index())
+ pser, psser = self.pser, self.psser
+ self.assert_eq(pser > pser, psser > psser)
def test_ge(self):
- with option_context("compute.ops_on_diff_frames", True):
- self.assert_eq(self.pser >= self.pser, (self.psser >= self.psser).sort_index())
+ pser, psser = self.pser, self.psser
+ self.assert_eq(pser >= pser, psser >= psser)
if __name__ == "__main__":
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py
index 232fec1..70175c4 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py
@@ -19,7 +19,6 @@ import pandas as pd
import pyspark.pandas as ps
from pyspark.ml.linalg import SparseVector
-from pyspark.pandas.config import option_context
from pyspark.pandas.tests.data_type_ops.testing_utils import TestCasesUtils
from pyspark.testing.pandasutils import PandasOnSparkTestCase
@@ -34,61 +33,67 @@ class UDTOpsTest(PandasOnSparkTestCase, TestCasesUtils):
def psser(self):
return ps.from_pandas(self.pser)
+ @property
+ def udt_pdf(self):
+ sparse_values = {0: 0.2, 1: 1.0}
+ psers = {
+ "this": self.pser,
+ "that": pd.Series([SparseVector(len(sparse_values), sparse_values)]),
+ }
+ return pd.concat(psers, axis=1)
+
+ @property
+ def udt_psdf(self):
+ return ps.from_pandas(self.udt_pdf)
+
def test_add(self):
self.assertRaises(TypeError, lambda: self.psser + "x")
self.assertRaises(TypeError, lambda: self.psser + 1)
- with option_context("compute.ops_on_diff_frames", True):
- for psser in self.pssers:
- self.assertRaises(TypeError, lambda: self.psser + psser)
+ for psser in self.pssers:
+ self.assertRaises(TypeError, lambda: self.psser + psser)
def test_sub(self):
self.assertRaises(TypeError, lambda: self.psser - "x")
self.assertRaises(TypeError, lambda: self.psser - 1)
- with option_context("compute.ops_on_diff_frames", True):
- for psser in self.pssers:
- self.assertRaises(TypeError, lambda: self.psser - psser)
+ for psser in self.pssers:
+ self.assertRaises(TypeError, lambda: self.psser - psser)
def test_mul(self):
self.assertRaises(TypeError, lambda: self.psser * "x")
self.assertRaises(TypeError, lambda: self.psser * 1)
- with option_context("compute.ops_on_diff_frames", True):
- for psser in self.pssers:
- self.assertRaises(TypeError, lambda: self.psser * psser)
+ for psser in self.pssers:
+ self.assertRaises(TypeError, lambda: self.psser * psser)
def test_truediv(self):
self.assertRaises(TypeError, lambda: self.psser / "x")
self.assertRaises(TypeError, lambda: self.psser / 1)
- with option_context("compute.ops_on_diff_frames", True):
- for psser in self.pssers:
- self.assertRaises(TypeError, lambda: self.psser / psser)
+ for psser in self.pssers:
+ self.assertRaises(TypeError, lambda: self.psser / psser)
def test_floordiv(self):
self.assertRaises(TypeError, lambda: self.psser // "x")
self.assertRaises(TypeError, lambda: self.psser // 1)
- with option_context("compute.ops_on_diff_frames", True):
- for psser in self.pssers:
- self.assertRaises(TypeError, lambda: self.psser // psser)
+ for psser in self.pssers:
+ self.assertRaises(TypeError, lambda: self.psser // psser)
def test_mod(self):
self.assertRaises(TypeError, lambda: self.psser % "x")
self.assertRaises(TypeError, lambda: self.psser % 1)
- with option_context("compute.ops_on_diff_frames", True):
- for psser in self.pssers:
- self.assertRaises(TypeError, lambda: self.psser % psser)
+ for psser in self.pssers:
+ self.assertRaises(TypeError, lambda: self.psser % psser)
def test_pow(self):
self.assertRaises(TypeError, lambda: self.psser ** "x")
self.assertRaises(TypeError, lambda: self.psser ** 1)
- with option_context("compute.ops_on_diff_frames", True):
- for psser in self.pssers:
- self.assertRaises(TypeError, lambda: self.psser ** psser)
+ for psser in self.pssers:
+ self.assertRaises(TypeError, lambda: self.psser ** psser)
def test_radd(self):
self.assertRaises(TypeError, lambda: "x" + self.psser)
@@ -141,12 +146,14 @@ class UDTOpsTest(PandasOnSparkTestCase, TestCasesUtils):
self.assertRaises(TypeError, lambda: ~self.psser)
def test_eq(self):
- with option_context("compute.ops_on_diff_frames", True):
- self.assert_eq(self.pser == self.pser, (self.psser == self.psser).sort_index())
+ pdf, psdf = self.udt_pdf, self.udt_psdf
+ self.assert_eq(pdf["this"] == pdf["this"], psdf["this"] == psdf["this"])
+ self.assert_eq(pdf["this"] == pdf["that"], psdf["this"] == psdf["that"])
def test_ne(self):
- with option_context("compute.ops_on_diff_frames", True):
- self.assert_eq(self.pser != self.pser, (self.psser != self.psser).sort_index())
+ pdf, psdf = self.udt_pdf, self.udt_psdf
+ self.assert_eq(pdf["this"] != pdf["this"], psdf["this"] != psdf["this"])
+ self.assert_eq(pdf["this"] != pdf["that"], psdf["this"] != psdf["that"])
def test_lt(self):
self.assertRaisesRegex(
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org