You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2021/08/03 07:27:06 UTC
[spark] branch branch-3.2 updated: [SPARK-36192][PYTHON] Better
error messages for DataTypeOps against lists
This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.2 by this push:
new c22a25b [SPARK-36192][PYTHON] Better error messages for DataTypeOps against lists
c22a25b is described below
commit c22a25b76a3071ca43b6f3e3d547677f2e752edf
Author: Xinrong Meng <xi...@databricks.com>
AuthorDate: Tue Aug 3 16:25:49 2021 +0900
[SPARK-36192][PYTHON] Better error messages for DataTypeOps against lists
### What changes were proposed in this pull request?
Better error messages for DataTypeOps against lists.
### Why are the changes needed?
Currently, DataTypeOps against lists throw a Py4JJavaError, we shall throw a TypeError with proper messages instead.
### Does this PR introduce _any_ user-facing change?
Yes. A TypeError message will be showed rather than a Py4JJavaError.
From:
```py
>>> import pyspark.pandas as ps
>>> ps.Series([1, 2, 3]) > [3, 2, 1]
Traceback (most recent call last):
...
py4j.protocol.Py4JJavaError: An error occurred while calling o107.gt.
: java.lang.RuntimeException: Unsupported literal type class java.util.ArrayList [3, 2, 1]
...
```
To:
```py
>>> import pyspark.pandas as ps
>>> ps.Series([1, 2, 3]) > [3, 2, 1]
Traceback (most recent call last):
...
TypeError: The operation can not be applied to list.
```
### How was this patch tested?
Unit tests.
Closes #33581 from xinrong-databricks/data_type_ops_list.
Authored-by: Xinrong Meng <xi...@databricks.com>
Signed-off-by: Hyukjin Kwon <gu...@apache.org>
(cherry picked from commit 8ca11fe39f6828bb08f123d05c2a4b44da5231b7)
Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
python/pyspark/pandas/data_type_ops/base.py | 12 +++++++++++
python/pyspark/pandas/data_type_ops/binary_ops.py | 11 ++++++++++
python/pyspark/pandas/data_type_ops/boolean_ops.py | 25 ++++++++++++++++++++++
.../pandas/data_type_ops/categorical_ops.py | 8 ++++++-
python/pyspark/pandas/data_type_ops/complex_ops.py | 10 +++++++++
python/pyspark/pandas/data_type_ops/date_ops.py | 7 ++++++
.../pyspark/pandas/data_type_ops/datetime_ops.py | 7 ++++++
python/pyspark/pandas/data_type_ops/null_ops.py | 5 +++++
python/pyspark/pandas/data_type_ops/num_ops.py | 24 +++++++++++++++++++++
python/pyspark/pandas/data_type_ops/string_ops.py | 9 ++++++++
.../tests/data_type_ops/test_categorical_ops.py | 12 +++++------
11 files changed, 123 insertions(+), 7 deletions(-)
diff --git a/python/pyspark/pandas/data_type_ops/base.py b/python/pyspark/pandas/data_type_ops/base.py
index 743b2c5..c69715f 100644
--- a/python/pyspark/pandas/data_type_ops/base.py
+++ b/python/pyspark/pandas/data_type_ops/base.py
@@ -188,6 +188,12 @@ def _as_other_type(
return index_ops._with_new_scol(scol, field=InternalField(dtype=dtype))
+def _sanitize_list_like(operand: Any) -> None:
+ """Raise TypeError if operand is list-like."""
+ if isinstance(operand, (list, tuple, dict, set)):
+ raise TypeError("The operation can not be applied to %s." % type(operand).__name__)
+
+
class DataTypeOps(object, metaclass=ABCMeta):
"""The base class for binary operations of pandas-on-Spark objects (of different data types)."""
@@ -314,9 +320,11 @@ class DataTypeOps(object, metaclass=ABCMeta):
raise TypeError("Bitwise or can not be applied to %s." % self.pretty_name)
def rand(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
return left.__and__(right)
def ror(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
return left.__or__(right)
def neg(self, operand: IndexOpsLike) -> IndexOpsLike:
@@ -340,11 +348,15 @@ class DataTypeOps(object, metaclass=ABCMeta):
def eq(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
from pyspark.pandas.base import column_op
+ _sanitize_list_like(right)
+
return column_op(Column.__eq__)(left, right)
def ne(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
from pyspark.pandas.base import column_op
+ _sanitize_list_like(right)
+
return column_op(Column.__ne__)(left, right)
def invert(self, operand: IndexOpsLike) -> IndexOpsLike:
diff --git a/python/pyspark/pandas/data_type_ops/binary_ops.py b/python/pyspark/pandas/data_type_ops/binary_ops.py
index 929bed4..8247ade 100644
--- a/python/pyspark/pandas/data_type_ops/binary_ops.py
+++ b/python/pyspark/pandas/data_type_ops/binary_ops.py
@@ -27,6 +27,7 @@ from pyspark.pandas.data_type_ops.base import (
_as_categorical_type,
_as_other_type,
_as_string_type,
+ _sanitize_list_like,
)
from pyspark.pandas.spark import functions as SF
from pyspark.pandas.typedef import pandas_on_spark_type
@@ -44,6 +45,8 @@ class BinaryOps(DataTypeOps):
return "binaries"
def add(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
+
if isinstance(right, IndexOpsMixin) and isinstance(right.spark.data_type, BinaryType):
return column_op(F.concat)(left, right)
elif isinstance(right, bytes):
@@ -54,6 +57,8 @@ class BinaryOps(DataTypeOps):
)
def radd(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
+
if isinstance(right, bytes):
return cast(
SeriesOrIndex, left._with_new_scol(F.concat(SF.lit(right), left.spark.column))
@@ -66,21 +71,27 @@ class BinaryOps(DataTypeOps):
def lt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
from pyspark.pandas.base import column_op
+ _sanitize_list_like(right)
+
return column_op(Column.__lt__)(left, right)
def le(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
from pyspark.pandas.base import column_op
+ _sanitize_list_like(right)
+
return column_op(Column.__le__)(left, right)
def ge(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
from pyspark.pandas.base import column_op
+ _sanitize_list_like(right)
return column_op(Column.__ge__)(left, right)
def gt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
from pyspark.pandas.base import column_op
+ _sanitize_list_like(right)
return column_op(Column.__gt__)(left, right)
def astype(self, index_ops: IndexOpsLike, dtype: Union[str, type, Dtype]) -> IndexOpsLike:
diff --git a/python/pyspark/pandas/data_type_ops/boolean_ops.py b/python/pyspark/pandas/data_type_ops/boolean_ops.py
index 6b257e0..cb77945 100644
--- a/python/pyspark/pandas/data_type_ops/boolean_ops.py
+++ b/python/pyspark/pandas/data_type_ops/boolean_ops.py
@@ -30,6 +30,7 @@ from pyspark.pandas.data_type_ops.base import (
_as_bool_type,
_as_categorical_type,
_as_other_type,
+ _sanitize_list_like,
)
from pyspark.pandas.spark import functions as SF
from pyspark.pandas.typedef.typehints import as_spark_type, extension_dtypes, pandas_on_spark_type
@@ -48,6 +49,7 @@ class BooleanOps(DataTypeOps):
return "bools"
def add(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
if not is_valid_operand_for_numeric_arithmetic(right):
raise TypeError(
"Addition can not be applied to %s and the given type." % self.pretty_name
@@ -67,6 +69,7 @@ class BooleanOps(DataTypeOps):
return left + right
def sub(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
if not is_valid_operand_for_numeric_arithmetic(right, allow_bool=False):
raise TypeError(
"Subtraction can not be applied to %s and the given type." % self.pretty_name
@@ -80,6 +83,7 @@ class BooleanOps(DataTypeOps):
return left - right
def mul(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
if not is_valid_operand_for_numeric_arithmetic(right):
raise TypeError(
"Multiplication can not be applied to %s and the given type." % self.pretty_name
@@ -98,6 +102,7 @@ class BooleanOps(DataTypeOps):
return left * right
def truediv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
if not is_valid_operand_for_numeric_arithmetic(right, allow_bool=False):
raise TypeError(
"True division can not be applied to %s and the given type." % self.pretty_name
@@ -111,6 +116,7 @@ class BooleanOps(DataTypeOps):
return left / right
def floordiv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
if not is_valid_operand_for_numeric_arithmetic(right, allow_bool=False):
raise TypeError(
"Floor division can not be applied to %s and the given type." % self.pretty_name
@@ -124,6 +130,7 @@ class BooleanOps(DataTypeOps):
return left // right
def mod(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
if not is_valid_operand_for_numeric_arithmetic(right, allow_bool=False):
raise TypeError(
"Modulo can not be applied to %s and the given type." % self.pretty_name
@@ -137,6 +144,7 @@ class BooleanOps(DataTypeOps):
return left % right
def pow(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
if not is_valid_operand_for_numeric_arithmetic(right, allow_bool=False):
raise TypeError(
"Exponentiation can not be applied to %s and the given type." % self.pretty_name
@@ -150,6 +158,7 @@ class BooleanOps(DataTypeOps):
return left ** right
def radd(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
if isinstance(right, bool):
return left.__or__(right)
elif isinstance(right, numbers.Number):
@@ -161,6 +170,7 @@ class BooleanOps(DataTypeOps):
)
def rsub(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
if isinstance(right, numbers.Number) and not isinstance(right, bool):
left = transform_boolean_operand_to_numeric(left, spark_type=as_spark_type(type(right)))
return right - left
@@ -170,6 +180,7 @@ class BooleanOps(DataTypeOps):
)
def rmul(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
if isinstance(right, bool):
return left.__and__(right)
elif isinstance(right, numbers.Number):
@@ -181,6 +192,7 @@ class BooleanOps(DataTypeOps):
)
def rtruediv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
if isinstance(right, numbers.Number) and not isinstance(right, bool):
left = transform_boolean_operand_to_numeric(left, spark_type=as_spark_type(type(right)))
return right / left
@@ -190,6 +202,7 @@ class BooleanOps(DataTypeOps):
)
def rfloordiv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
if isinstance(right, numbers.Number) and not isinstance(right, bool):
left = transform_boolean_operand_to_numeric(left, spark_type=as_spark_type(type(right)))
return right // left
@@ -199,6 +212,7 @@ class BooleanOps(DataTypeOps):
)
def rpow(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
if isinstance(right, numbers.Number) and not isinstance(right, bool):
left = transform_boolean_operand_to_numeric(left, spark_type=as_spark_type(type(right)))
return right ** left
@@ -208,6 +222,7 @@ class BooleanOps(DataTypeOps):
)
def rmod(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
if isinstance(right, numbers.Number) and not isinstance(right, bool):
left = transform_boolean_operand_to_numeric(left, spark_type=as_spark_type(type(right)))
return right % left
@@ -217,6 +232,7 @@ class BooleanOps(DataTypeOps):
)
def __and__(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
if isinstance(right, IndexOpsMixin) and isinstance(right.dtype, extension_dtypes):
return right.__and__(left)
else:
@@ -233,6 +249,7 @@ class BooleanOps(DataTypeOps):
return column_op(and_func)(left, right)
def __or__(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
if isinstance(right, IndexOpsMixin) and isinstance(right.dtype, extension_dtypes):
return right.__or__(left)
else:
@@ -281,15 +298,19 @@ class BooleanOps(DataTypeOps):
return operand
def lt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
return column_op(Column.__lt__)(left, right)
def le(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
return column_op(Column.__le__)(left, right)
def ge(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
return column_op(Column.__ge__)(left, right)
def gt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
return column_op(Column.__gt__)(left, right)
def invert(self, operand: IndexOpsLike) -> IndexOpsLike:
@@ -307,6 +328,8 @@ class BooleanExtensionOps(BooleanOps):
return "booleans"
def __and__(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
+
def and_func(left: Column, right: Any) -> Column:
if not isinstance(right, Column):
if pd.isna(right):
@@ -318,6 +341,8 @@ class BooleanExtensionOps(BooleanOps):
return column_op(and_func)(left, right)
def __or__(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
+
def or_func(left: Column, right: Any) -> Column:
if not isinstance(right, Column):
if pd.isna(right):
diff --git a/python/pyspark/pandas/data_type_ops/categorical_ops.py b/python/pyspark/pandas/data_type_ops/categorical_ops.py
index 36d5181..73af82e 100644
--- a/python/pyspark/pandas/data_type_ops/categorical_ops.py
+++ b/python/pyspark/pandas/data_type_ops/categorical_ops.py
@@ -25,7 +25,7 @@ from pandas.api.types import is_list_like, CategoricalDtype
from pyspark.pandas._typing import Dtype, IndexOpsLike, SeriesOrIndex
from pyspark.pandas.base import column_op, IndexOpsMixin
-from pyspark.pandas.data_type_ops.base import DataTypeOps
+from pyspark.pandas.data_type_ops.base import _sanitize_list_like, DataTypeOps
from pyspark.pandas.spark import functions as SF
from pyspark.pandas.typedef import pandas_on_spark_type
from pyspark.sql import functions as F
@@ -64,21 +64,27 @@ class CategoricalOps(DataTypeOps):
return _to_cat(index_ops).astype(dtype)
def eq(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
return _compare(left, right, Column.__eq__, is_equality_comparison=True)
def ne(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
return _compare(left, right, Column.__ne__, is_equality_comparison=True)
def lt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
return _compare(left, right, Column.__lt__)
def le(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
return _compare(left, right, Column.__le__)
def gt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
return _compare(left, right, Column.__gt__)
def ge(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
return _compare(left, right, Column.__ge__)
diff --git a/python/pyspark/pandas/data_type_ops/complex_ops.py b/python/pyspark/pandas/data_type_ops/complex_ops.py
index 41f79d4..bee09f3 100644
--- a/python/pyspark/pandas/data_type_ops/complex_ops.py
+++ b/python/pyspark/pandas/data_type_ops/complex_ops.py
@@ -27,6 +27,7 @@ from pyspark.pandas.data_type_ops.base import (
_as_categorical_type,
_as_other_type,
_as_string_type,
+ _sanitize_list_like,
)
from pyspark.pandas.typedef import pandas_on_spark_type
from pyspark.sql import functions as F, Column
@@ -43,6 +44,7 @@ class ArrayOps(DataTypeOps):
return "arrays"
def add(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
if not isinstance(right, IndexOpsMixin) or (
isinstance(right, IndexOpsMixin) and not isinstance(right.spark.data_type, ArrayType)
):
@@ -65,21 +67,25 @@ class ArrayOps(DataTypeOps):
def lt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
from pyspark.pandas.base import column_op
+ _sanitize_list_like(right)
return column_op(Column.__lt__)(left, right)
def le(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
from pyspark.pandas.base import column_op
+ _sanitize_list_like(right)
return column_op(Column.__le__)(left, right)
def ge(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
from pyspark.pandas.base import column_op
+ _sanitize_list_like(right)
return column_op(Column.__ge__)(left, right)
def gt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
from pyspark.pandas.base import column_op
+ _sanitize_list_like(right)
return column_op(Column.__gt__)(left, right)
def astype(self, index_ops: IndexOpsLike, dtype: Union[str, type, Dtype]) -> IndexOpsLike:
@@ -117,19 +123,23 @@ class StructOps(DataTypeOps):
def lt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
from pyspark.pandas.base import column_op
+ _sanitize_list_like(right)
return column_op(Column.__lt__)(left, right)
def le(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
from pyspark.pandas.base import column_op
+ _sanitize_list_like(right)
return column_op(Column.__le__)(left, right)
def ge(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
from pyspark.pandas.base import column_op
+ _sanitize_list_like(right)
return column_op(Column.__ge__)(left, right)
def gt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
from pyspark.pandas.base import column_op
+ _sanitize_list_like(right)
return column_op(Column.__gt__)(left, right)
diff --git a/python/pyspark/pandas/data_type_ops/date_ops.py b/python/pyspark/pandas/data_type_ops/date_ops.py
index 59c8166..54ece76 100644
--- a/python/pyspark/pandas/data_type_ops/date_ops.py
+++ b/python/pyspark/pandas/data_type_ops/date_ops.py
@@ -33,6 +33,7 @@ from pyspark.pandas.data_type_ops.base import (
_as_categorical_type,
_as_other_type,
_as_string_type,
+ _sanitize_list_like,
)
from pyspark.pandas.spark import functions as SF
from pyspark.pandas.typedef import pandas_on_spark_type
@@ -48,6 +49,7 @@ class DateOps(DataTypeOps):
return "dates"
def sub(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
# Note that date subtraction casts arguments to integer. This is to mimic pandas's
# behaviors. pandas returns 'timedelta64[ns]' in days from date's subtraction.
msg = (
@@ -65,6 +67,7 @@ class DateOps(DataTypeOps):
raise TypeError("Date subtraction can only be applied to date series.")
def rsub(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
# Note that date subtraction casts arguments to integer. This is to mimic pandas's
# behaviors. pandas returns 'timedelta64[ns]' in days from date's subtraction.
msg = (
@@ -81,21 +84,25 @@ class DateOps(DataTypeOps):
def lt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
from pyspark.pandas.base import column_op
+ _sanitize_list_like(right)
return column_op(Column.__lt__)(left, right)
def le(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
from pyspark.pandas.base import column_op
+ _sanitize_list_like(right)
return column_op(Column.__le__)(left, right)
def ge(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
from pyspark.pandas.base import column_op
+ _sanitize_list_like(right)
return column_op(Column.__ge__)(left, right)
def gt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
from pyspark.pandas.base import column_op
+ _sanitize_list_like(right)
return column_op(Column.__gt__)(left, right)
def astype(self, index_ops: IndexOpsLike, dtype: Union[str, type, Dtype]) -> IndexOpsLike:
diff --git a/python/pyspark/pandas/data_type_ops/datetime_ops.py b/python/pyspark/pandas/data_type_ops/datetime_ops.py
index f815742..071c22e 100644
--- a/python/pyspark/pandas/data_type_ops/datetime_ops.py
+++ b/python/pyspark/pandas/data_type_ops/datetime_ops.py
@@ -33,6 +33,7 @@ from pyspark.pandas.data_type_ops.base import (
_as_bool_type,
_as_categorical_type,
_as_other_type,
+ _sanitize_list_like,
)
from pyspark.pandas.spark import functions as SF
from pyspark.pandas.typedef import extension_dtypes, pandas_on_spark_type
@@ -48,6 +49,7 @@ class DatetimeOps(DataTypeOps):
return "datetimes"
def sub(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
# Note that timestamp subtraction casts arguments to integer. This is to mimic pandas's
# behaviors. pandas returns 'timedelta64[ns]' from 'datetime64[ns]'s subtraction.
msg = (
@@ -73,6 +75,7 @@ class DatetimeOps(DataTypeOps):
raise TypeError("Datetime subtraction can only be applied to datetime series.")
def rsub(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
# Note that timestamp subtraction casts arguments to integer. This is to mimic pandas's
# behaviors. pandas returns 'timedelta64[ns]' from 'datetime64[ns]'s subtraction.
msg = (
@@ -97,21 +100,25 @@ class DatetimeOps(DataTypeOps):
def lt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
from pyspark.pandas.base import column_op
+ _sanitize_list_like(right)
return column_op(Column.__lt__)(left, right)
def le(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
from pyspark.pandas.base import column_op
+ _sanitize_list_like(right)
return column_op(Column.__le__)(left, right)
def ge(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
from pyspark.pandas.base import column_op
+ _sanitize_list_like(right)
return column_op(Column.__ge__)(left, right)
def gt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
from pyspark.pandas.base import column_op
+ _sanitize_list_like(right)
return column_op(Column.__gt__)(left, right)
def prepare(self, col: pd.Series) -> pd.Series:
diff --git a/python/pyspark/pandas/data_type_ops/null_ops.py b/python/pyspark/pandas/data_type_ops/null_ops.py
index f26de6f..9205d5e 100644
--- a/python/pyspark/pandas/data_type_ops/null_ops.py
+++ b/python/pyspark/pandas/data_type_ops/null_ops.py
@@ -26,6 +26,7 @@ from pyspark.pandas.data_type_ops.base import (
_as_categorical_type,
_as_other_type,
_as_string_type,
+ _sanitize_list_like,
)
from pyspark.pandas._typing import SeriesOrIndex
from pyspark.pandas.typedef import pandas_on_spark_type
@@ -45,21 +46,25 @@ class NullOps(DataTypeOps):
def lt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
from pyspark.pandas.base import column_op
+ _sanitize_list_like(right)
return column_op(Column.__lt__)(left, right)
def le(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
from pyspark.pandas.base import column_op
+ _sanitize_list_like(right)
return column_op(Column.__le__)(left, right)
def ge(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
from pyspark.pandas.base import column_op
+ _sanitize_list_like(right)
return column_op(Column.__ge__)(left, right)
def gt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
from pyspark.pandas.base import column_op
+ _sanitize_list_like(right)
return column_op(Column.__gt__)(left, right)
def astype(self, index_ops: IndexOpsLike, dtype: Union[str, type, Dtype]) -> IndexOpsLike:
diff --git a/python/pyspark/pandas/data_type_ops/num_ops.py b/python/pyspark/pandas/data_type_ops/num_ops.py
index f84c1af..fd9adbb 100644
--- a/python/pyspark/pandas/data_type_ops/num_ops.py
+++ b/python/pyspark/pandas/data_type_ops/num_ops.py
@@ -32,6 +32,7 @@ from pyspark.pandas.data_type_ops.base import (
_as_categorical_type,
_as_other_type,
_as_string_type,
+ _sanitize_list_like,
)
from pyspark.pandas.spark import functions as SF
from pyspark.pandas.typedef.typehints import extension_dtypes, pandas_on_spark_type
@@ -65,6 +66,7 @@ class NumericOps(DataTypeOps):
return "numerics"
def add(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
if not is_valid_operand_for_numeric_arithmetic(right):
raise TypeError("Addition can not be applied to given types.")
@@ -72,6 +74,7 @@ class NumericOps(DataTypeOps):
return column_op(Column.__add__)(left, right)
def sub(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
if not is_valid_operand_for_numeric_arithmetic(right):
raise TypeError("Subtraction can not be applied to given types.")
@@ -79,6 +82,7 @@ class NumericOps(DataTypeOps):
return column_op(Column.__sub__)(left, right)
def mod(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
if not is_valid_operand_for_numeric_arithmetic(right):
raise TypeError("Modulo can not be applied to given types.")
@@ -89,6 +93,7 @@ class NumericOps(DataTypeOps):
return column_op(mod)(left, right)
def pow(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
if not is_valid_operand_for_numeric_arithmetic(right):
raise TypeError("Exponentiation can not be applied to given types.")
@@ -103,24 +108,28 @@ class NumericOps(DataTypeOps):
return column_op(pow_func)(left, right)
def radd(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
if not isinstance(right, numbers.Number):
raise TypeError("Addition can not be applied to given types.")
right = transform_boolean_operand_to_numeric(right)
return column_op(Column.__radd__)(left, right)
def rsub(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
if not isinstance(right, numbers.Number):
raise TypeError("Subtraction can not be applied to given types.")
right = transform_boolean_operand_to_numeric(right)
return column_op(Column.__rsub__)(left, right)
def rmul(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
if not isinstance(right, numbers.Number):
raise TypeError("Multiplication can not be applied to given types.")
right = transform_boolean_operand_to_numeric(right)
return column_op(Column.__rmul__)(left, right)
def rpow(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
if not isinstance(right, numbers.Number):
raise TypeError("Exponentiation can not be applied to given types.")
@@ -131,6 +140,7 @@ class NumericOps(DataTypeOps):
return column_op(rpow_func)(left, right)
def rmod(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
if not isinstance(right, numbers.Number):
raise TypeError("Modulo can not be applied to given types.")
@@ -149,15 +159,19 @@ class NumericOps(DataTypeOps):
)
def lt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
return column_op(Column.__lt__)(left, right)
def le(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
return column_op(Column.__le__)(left, right)
def ge(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
return column_op(Column.__ge__)(left, right)
def gt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
return column_op(Column.__gt__)(left, right)
@@ -172,6 +186,7 @@ class IntegralOps(NumericOps):
return "integrals"
def mul(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
if isinstance(right, IndexOpsMixin) and isinstance(right.spark.data_type, StringType):
return column_op(SF.repeat)(right, left)
@@ -182,6 +197,7 @@ class IntegralOps(NumericOps):
return column_op(Column.__mul__)(left, right)
def truediv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
if not is_valid_operand_for_numeric_arithmetic(right):
raise TypeError("True division can not be applied to given types.")
@@ -194,6 +210,7 @@ class IntegralOps(NumericOps):
return numpy_column_op(truediv)(left, right)
def floordiv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
if not is_valid_operand_for_numeric_arithmetic(right):
raise TypeError("Floor division can not be applied to given types.")
@@ -208,6 +225,7 @@ class IntegralOps(NumericOps):
return numpy_column_op(floordiv)(left, right)
def rtruediv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
if not isinstance(right, numbers.Number):
raise TypeError("True division can not be applied to given types.")
@@ -220,6 +238,7 @@ class IntegralOps(NumericOps):
return numpy_column_op(rtruediv)(left, right)
def rfloordiv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
if not isinstance(right, numbers.Number):
raise TypeError("Floor division can not be applied to given types.")
@@ -252,6 +271,7 @@ class FractionalOps(NumericOps):
return "fractions"
def mul(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
if not is_valid_operand_for_numeric_arithmetic(right):
raise TypeError("Multiplication can not be applied to given types.")
@@ -259,6 +279,7 @@ class FractionalOps(NumericOps):
return column_op(Column.__mul__)(left, right)
def truediv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
if not is_valid_operand_for_numeric_arithmetic(right):
raise TypeError("True division can not be applied to given types.")
@@ -275,6 +296,7 @@ class FractionalOps(NumericOps):
return numpy_column_op(truediv)(left, right)
def floordiv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
if not is_valid_operand_for_numeric_arithmetic(right):
raise TypeError("Floor division can not be applied to given types.")
@@ -293,6 +315,7 @@ class FractionalOps(NumericOps):
return numpy_column_op(floordiv)(left, right)
def rtruediv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
if not isinstance(right, numbers.Number):
raise TypeError("True division can not be applied to given types.")
@@ -305,6 +328,7 @@ class FractionalOps(NumericOps):
return numpy_column_op(rtruediv)(left, right)
def rfloordiv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
if not isinstance(right, numbers.Number):
raise TypeError("Floor division can not be applied to given types.")
diff --git a/python/pyspark/pandas/data_type_ops/string_ops.py b/python/pyspark/pandas/data_type_ops/string_ops.py
index bfe36e1..69e1717 100644
--- a/python/pyspark/pandas/data_type_ops/string_ops.py
+++ b/python/pyspark/pandas/data_type_ops/string_ops.py
@@ -30,6 +30,7 @@ from pyspark.pandas.data_type_ops.base import (
_as_categorical_type,
_as_other_type,
_as_string_type,
+ _sanitize_list_like,
)
from pyspark.pandas.spark import functions as SF
from pyspark.pandas.typedef import extension_dtypes, pandas_on_spark_type
@@ -47,6 +48,7 @@ class StringOps(DataTypeOps):
return "strings"
def add(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
if isinstance(right, str):
return cast(
SeriesOrIndex,
@@ -60,6 +62,7 @@ class StringOps(DataTypeOps):
raise TypeError("Addition can not be applied to given types.")
def mul(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
if isinstance(right, int):
return cast(
SeriesOrIndex,
@@ -77,6 +80,7 @@ class StringOps(DataTypeOps):
raise TypeError("Multiplication can not be applied to given types.")
def radd(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
if isinstance(right, str):
return cast(
SeriesOrIndex,
@@ -88,6 +92,7 @@ class StringOps(DataTypeOps):
raise TypeError("Addition can not be applied to given types.")
def rmul(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+ _sanitize_list_like(right)
if isinstance(right, int):
return cast(
SeriesOrIndex,
@@ -101,21 +106,25 @@ class StringOps(DataTypeOps):
def lt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
from pyspark.pandas.base import column_op
+ _sanitize_list_like(right)
return column_op(Column.__lt__)(left, right)
def le(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
from pyspark.pandas.base import column_op
+ _sanitize_list_like(right)
return column_op(Column.__le__)(left, right)
def ge(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
from pyspark.pandas.base import column_op
+ _sanitize_list_like(right)
return column_op(Column.__ge__)(left, right)
def gt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
from pyspark.pandas.base import column_op
+ _sanitize_list_like(right)
return column_op(Column.__gt__)(left, right)
def astype(self, index_ops: IndexOpsLike, dtype: Union[str, type, Dtype]) -> IndexOpsLike:
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py
index 1dc9c39..6ac9073 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py
@@ -243,7 +243,7 @@ class CategoricalOpsTest(PandasOnSparkTestCase, TestCasesUtils):
)
self.assertRaisesRegex(
TypeError,
- "Cannot compare a Categorical with the given type",
+ "The operation can not be applied to list",
lambda: ordered_psser == [1, 2, 3],
)
@@ -306,7 +306,7 @@ class CategoricalOpsTest(PandasOnSparkTestCase, TestCasesUtils):
)
self.assertRaisesRegex(
TypeError,
- "Cannot compare a Categorical with the given type",
+ "The operation can not be applied to list.",
lambda: ordered_psser != [1, 2, 3],
)
self.assert_eq(
@@ -367,7 +367,7 @@ class CategoricalOpsTest(PandasOnSparkTestCase, TestCasesUtils):
)
self.assertRaisesRegex(
TypeError,
- "Cannot compare a Categorical with the given type",
+ "The operation can not be applied to list",
lambda: ordered_psser < [1, 2, 3],
)
self.assert_eq(
@@ -416,7 +416,7 @@ class CategoricalOpsTest(PandasOnSparkTestCase, TestCasesUtils):
)
self.assertRaisesRegex(
TypeError,
- "Cannot compare a Categorical with the given type",
+ "The operation can not be applied to list",
lambda: ordered_psser <= [1, 2, 3],
)
self.assert_eq(
@@ -465,7 +465,7 @@ class CategoricalOpsTest(PandasOnSparkTestCase, TestCasesUtils):
)
self.assertRaisesRegex(
TypeError,
- "Cannot compare a Categorical with the given type",
+ "The operation can not be applied to list",
lambda: ordered_psser > [1, 2, 3],
)
self.assert_eq(
@@ -514,7 +514,7 @@ class CategoricalOpsTest(PandasOnSparkTestCase, TestCasesUtils):
)
self.assertRaisesRegex(
TypeError,
- "Cannot compare a Categorical with the given type",
+ "The operation can not be applied to list",
lambda: ordered_psser >= [1, 2, 3],
)
self.assert_eq(
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org