You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2021/08/03 07:27:06 UTC

[spark] branch branch-3.2 updated: [SPARK-36192][PYTHON] Better error messages for DataTypeOps against lists

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.2 by this push:
     new c22a25b  [SPARK-36192][PYTHON] Better error messages for DataTypeOps against lists
c22a25b is described below

commit c22a25b76a3071ca43b6f3e3d547677f2e752edf
Author: Xinrong Meng <xi...@databricks.com>
AuthorDate: Tue Aug 3 16:25:49 2021 +0900

    [SPARK-36192][PYTHON] Better error messages for DataTypeOps against lists
    
    ### What changes were proposed in this pull request?
    Better error messages for DataTypeOps against lists.
    
    ### Why are the changes needed?
    Currently, DataTypeOps against lists throw a Py4JJavaError, we shall throw a TypeError with proper messages instead.
    
    ### Does this PR introduce _any_ user-facing change?
    Yes. A TypeError message will be showed rather than a Py4JJavaError.
    
    From:
    ```py
    >>> import pyspark.pandas as ps
    >>> ps.Series([1, 2, 3]) > [3, 2, 1]
    Traceback (most recent call last):
    ...
    py4j.protocol.Py4JJavaError: An error occurred while calling o107.gt.
    : java.lang.RuntimeException: Unsupported literal type class java.util.ArrayList [3, 2, 1]
    ...
    ```
    
    To:
    ```py
    >>> import pyspark.pandas as ps
    >>> ps.Series([1, 2, 3]) > [3, 2, 1]
    Traceback (most recent call last):
    ...
    TypeError: The operation can not be applied to list.
    ```
    
    ### How was this patch tested?
    Unit tests.
    
    Closes #33581 from xinrong-databricks/data_type_ops_list.
    
    Authored-by: Xinrong Meng <xi...@databricks.com>
    Signed-off-by: Hyukjin Kwon <gu...@apache.org>
    (cherry picked from commit 8ca11fe39f6828bb08f123d05c2a4b44da5231b7)
    Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
 python/pyspark/pandas/data_type_ops/base.py        | 12 +++++++++++
 python/pyspark/pandas/data_type_ops/binary_ops.py  | 11 ++++++++++
 python/pyspark/pandas/data_type_ops/boolean_ops.py | 25 ++++++++++++++++++++++
 .../pandas/data_type_ops/categorical_ops.py        |  8 ++++++-
 python/pyspark/pandas/data_type_ops/complex_ops.py | 10 +++++++++
 python/pyspark/pandas/data_type_ops/date_ops.py    |  7 ++++++
 .../pyspark/pandas/data_type_ops/datetime_ops.py   |  7 ++++++
 python/pyspark/pandas/data_type_ops/null_ops.py    |  5 +++++
 python/pyspark/pandas/data_type_ops/num_ops.py     | 24 +++++++++++++++++++++
 python/pyspark/pandas/data_type_ops/string_ops.py  |  9 ++++++++
 .../tests/data_type_ops/test_categorical_ops.py    | 12 +++++------
 11 files changed, 123 insertions(+), 7 deletions(-)

diff --git a/python/pyspark/pandas/data_type_ops/base.py b/python/pyspark/pandas/data_type_ops/base.py
index 743b2c5..c69715f 100644
--- a/python/pyspark/pandas/data_type_ops/base.py
+++ b/python/pyspark/pandas/data_type_ops/base.py
@@ -188,6 +188,12 @@ def _as_other_type(
     return index_ops._with_new_scol(scol, field=InternalField(dtype=dtype))
 
 
+def _sanitize_list_like(operand: Any) -> None:
+    """Raise TypeError if operand is list-like."""
+    if isinstance(operand, (list, tuple, dict, set)):
+        raise TypeError("The operation can not be applied to %s." % type(operand).__name__)
+
+
 class DataTypeOps(object, metaclass=ABCMeta):
     """The base class for binary operations of pandas-on-Spark objects (of different data types)."""
 
@@ -314,9 +320,11 @@ class DataTypeOps(object, metaclass=ABCMeta):
         raise TypeError("Bitwise or can not be applied to %s." % self.pretty_name)
 
     def rand(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         return left.__and__(right)
 
     def ror(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         return left.__or__(right)
 
     def neg(self, operand: IndexOpsLike) -> IndexOpsLike:
@@ -340,11 +348,15 @@ class DataTypeOps(object, metaclass=ABCMeta):
     def eq(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         from pyspark.pandas.base import column_op
 
+        _sanitize_list_like(right)
+
         return column_op(Column.__eq__)(left, right)
 
     def ne(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         from pyspark.pandas.base import column_op
 
+        _sanitize_list_like(right)
+
         return column_op(Column.__ne__)(left, right)
 
     def invert(self, operand: IndexOpsLike) -> IndexOpsLike:
diff --git a/python/pyspark/pandas/data_type_ops/binary_ops.py b/python/pyspark/pandas/data_type_ops/binary_ops.py
index 929bed4..8247ade 100644
--- a/python/pyspark/pandas/data_type_ops/binary_ops.py
+++ b/python/pyspark/pandas/data_type_ops/binary_ops.py
@@ -27,6 +27,7 @@ from pyspark.pandas.data_type_ops.base import (
     _as_categorical_type,
     _as_other_type,
     _as_string_type,
+    _sanitize_list_like,
 )
 from pyspark.pandas.spark import functions as SF
 from pyspark.pandas.typedef import pandas_on_spark_type
@@ -44,6 +45,8 @@ class BinaryOps(DataTypeOps):
         return "binaries"
 
     def add(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
+
         if isinstance(right, IndexOpsMixin) and isinstance(right.spark.data_type, BinaryType):
             return column_op(F.concat)(left, right)
         elif isinstance(right, bytes):
@@ -54,6 +57,8 @@ class BinaryOps(DataTypeOps):
             )
 
     def radd(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
+
         if isinstance(right, bytes):
             return cast(
                 SeriesOrIndex, left._with_new_scol(F.concat(SF.lit(right), left.spark.column))
@@ -66,21 +71,27 @@ class BinaryOps(DataTypeOps):
     def lt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         from pyspark.pandas.base import column_op
 
+        _sanitize_list_like(right)
+
         return column_op(Column.__lt__)(left, right)
 
     def le(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         from pyspark.pandas.base import column_op
 
+        _sanitize_list_like(right)
+
         return column_op(Column.__le__)(left, right)
 
     def ge(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         from pyspark.pandas.base import column_op
 
+        _sanitize_list_like(right)
         return column_op(Column.__ge__)(left, right)
 
     def gt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         from pyspark.pandas.base import column_op
 
+        _sanitize_list_like(right)
         return column_op(Column.__gt__)(left, right)
 
     def astype(self, index_ops: IndexOpsLike, dtype: Union[str, type, Dtype]) -> IndexOpsLike:
diff --git a/python/pyspark/pandas/data_type_ops/boolean_ops.py b/python/pyspark/pandas/data_type_ops/boolean_ops.py
index 6b257e0..cb77945 100644
--- a/python/pyspark/pandas/data_type_ops/boolean_ops.py
+++ b/python/pyspark/pandas/data_type_ops/boolean_ops.py
@@ -30,6 +30,7 @@ from pyspark.pandas.data_type_ops.base import (
     _as_bool_type,
     _as_categorical_type,
     _as_other_type,
+    _sanitize_list_like,
 )
 from pyspark.pandas.spark import functions as SF
 from pyspark.pandas.typedef.typehints import as_spark_type, extension_dtypes, pandas_on_spark_type
@@ -48,6 +49,7 @@ class BooleanOps(DataTypeOps):
         return "bools"
 
     def add(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         if not is_valid_operand_for_numeric_arithmetic(right):
             raise TypeError(
                 "Addition can not be applied to %s and the given type." % self.pretty_name
@@ -67,6 +69,7 @@ class BooleanOps(DataTypeOps):
                 return left + right
 
     def sub(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         if not is_valid_operand_for_numeric_arithmetic(right, allow_bool=False):
             raise TypeError(
                 "Subtraction can not be applied to %s and the given type." % self.pretty_name
@@ -80,6 +83,7 @@ class BooleanOps(DataTypeOps):
             return left - right
 
     def mul(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         if not is_valid_operand_for_numeric_arithmetic(right):
             raise TypeError(
                 "Multiplication can not be applied to %s and the given type." % self.pretty_name
@@ -98,6 +102,7 @@ class BooleanOps(DataTypeOps):
                 return left * right
 
     def truediv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         if not is_valid_operand_for_numeric_arithmetic(right, allow_bool=False):
             raise TypeError(
                 "True division can not be applied to %s and the given type." % self.pretty_name
@@ -111,6 +116,7 @@ class BooleanOps(DataTypeOps):
             return left / right
 
     def floordiv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         if not is_valid_operand_for_numeric_arithmetic(right, allow_bool=False):
             raise TypeError(
                 "Floor division can not be applied to %s and the given type." % self.pretty_name
@@ -124,6 +130,7 @@ class BooleanOps(DataTypeOps):
             return left // right
 
     def mod(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         if not is_valid_operand_for_numeric_arithmetic(right, allow_bool=False):
             raise TypeError(
                 "Modulo can not be applied to %s and the given type." % self.pretty_name
@@ -137,6 +144,7 @@ class BooleanOps(DataTypeOps):
             return left % right
 
     def pow(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         if not is_valid_operand_for_numeric_arithmetic(right, allow_bool=False):
             raise TypeError(
                 "Exponentiation can not be applied to %s and the given type." % self.pretty_name
@@ -150,6 +158,7 @@ class BooleanOps(DataTypeOps):
             return left ** right
 
     def radd(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         if isinstance(right, bool):
             return left.__or__(right)
         elif isinstance(right, numbers.Number):
@@ -161,6 +170,7 @@ class BooleanOps(DataTypeOps):
             )
 
     def rsub(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         if isinstance(right, numbers.Number) and not isinstance(right, bool):
             left = transform_boolean_operand_to_numeric(left, spark_type=as_spark_type(type(right)))
             return right - left
@@ -170,6 +180,7 @@ class BooleanOps(DataTypeOps):
             )
 
     def rmul(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         if isinstance(right, bool):
             return left.__and__(right)
         elif isinstance(right, numbers.Number):
@@ -181,6 +192,7 @@ class BooleanOps(DataTypeOps):
             )
 
     def rtruediv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         if isinstance(right, numbers.Number) and not isinstance(right, bool):
             left = transform_boolean_operand_to_numeric(left, spark_type=as_spark_type(type(right)))
             return right / left
@@ -190,6 +202,7 @@ class BooleanOps(DataTypeOps):
             )
 
     def rfloordiv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         if isinstance(right, numbers.Number) and not isinstance(right, bool):
             left = transform_boolean_operand_to_numeric(left, spark_type=as_spark_type(type(right)))
             return right // left
@@ -199,6 +212,7 @@ class BooleanOps(DataTypeOps):
             )
 
     def rpow(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         if isinstance(right, numbers.Number) and not isinstance(right, bool):
             left = transform_boolean_operand_to_numeric(left, spark_type=as_spark_type(type(right)))
             return right ** left
@@ -208,6 +222,7 @@ class BooleanOps(DataTypeOps):
             )
 
     def rmod(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         if isinstance(right, numbers.Number) and not isinstance(right, bool):
             left = transform_boolean_operand_to_numeric(left, spark_type=as_spark_type(type(right)))
             return right % left
@@ -217,6 +232,7 @@ class BooleanOps(DataTypeOps):
             )
 
     def __and__(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         if isinstance(right, IndexOpsMixin) and isinstance(right.dtype, extension_dtypes):
             return right.__and__(left)
         else:
@@ -233,6 +249,7 @@ class BooleanOps(DataTypeOps):
             return column_op(and_func)(left, right)
 
     def __or__(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         if isinstance(right, IndexOpsMixin) and isinstance(right.dtype, extension_dtypes):
             return right.__or__(left)
         else:
@@ -281,15 +298,19 @@ class BooleanOps(DataTypeOps):
         return operand
 
     def lt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         return column_op(Column.__lt__)(left, right)
 
     def le(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         return column_op(Column.__le__)(left, right)
 
     def ge(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         return column_op(Column.__ge__)(left, right)
 
     def gt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         return column_op(Column.__gt__)(left, right)
 
     def invert(self, operand: IndexOpsLike) -> IndexOpsLike:
@@ -307,6 +328,8 @@ class BooleanExtensionOps(BooleanOps):
         return "booleans"
 
     def __and__(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
+
         def and_func(left: Column, right: Any) -> Column:
             if not isinstance(right, Column):
                 if pd.isna(right):
@@ -318,6 +341,8 @@ class BooleanExtensionOps(BooleanOps):
         return column_op(and_func)(left, right)
 
     def __or__(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
+
         def or_func(left: Column, right: Any) -> Column:
             if not isinstance(right, Column):
                 if pd.isna(right):
diff --git a/python/pyspark/pandas/data_type_ops/categorical_ops.py b/python/pyspark/pandas/data_type_ops/categorical_ops.py
index 36d5181..73af82e 100644
--- a/python/pyspark/pandas/data_type_ops/categorical_ops.py
+++ b/python/pyspark/pandas/data_type_ops/categorical_ops.py
@@ -25,7 +25,7 @@ from pandas.api.types import is_list_like, CategoricalDtype
 
 from pyspark.pandas._typing import Dtype, IndexOpsLike, SeriesOrIndex
 from pyspark.pandas.base import column_op, IndexOpsMixin
-from pyspark.pandas.data_type_ops.base import DataTypeOps
+from pyspark.pandas.data_type_ops.base import _sanitize_list_like, DataTypeOps
 from pyspark.pandas.spark import functions as SF
 from pyspark.pandas.typedef import pandas_on_spark_type
 from pyspark.sql import functions as F
@@ -64,21 +64,27 @@ class CategoricalOps(DataTypeOps):
         return _to_cat(index_ops).astype(dtype)
 
     def eq(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         return _compare(left, right, Column.__eq__, is_equality_comparison=True)
 
     def ne(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         return _compare(left, right, Column.__ne__, is_equality_comparison=True)
 
     def lt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         return _compare(left, right, Column.__lt__)
 
     def le(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         return _compare(left, right, Column.__le__)
 
     def gt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         return _compare(left, right, Column.__gt__)
 
     def ge(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         return _compare(left, right, Column.__ge__)
 
 
diff --git a/python/pyspark/pandas/data_type_ops/complex_ops.py b/python/pyspark/pandas/data_type_ops/complex_ops.py
index 41f79d4..bee09f3 100644
--- a/python/pyspark/pandas/data_type_ops/complex_ops.py
+++ b/python/pyspark/pandas/data_type_ops/complex_ops.py
@@ -27,6 +27,7 @@ from pyspark.pandas.data_type_ops.base import (
     _as_categorical_type,
     _as_other_type,
     _as_string_type,
+    _sanitize_list_like,
 )
 from pyspark.pandas.typedef import pandas_on_spark_type
 from pyspark.sql import functions as F, Column
@@ -43,6 +44,7 @@ class ArrayOps(DataTypeOps):
         return "arrays"
 
     def add(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         if not isinstance(right, IndexOpsMixin) or (
             isinstance(right, IndexOpsMixin) and not isinstance(right.spark.data_type, ArrayType)
         ):
@@ -65,21 +67,25 @@ class ArrayOps(DataTypeOps):
     def lt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         from pyspark.pandas.base import column_op
 
+        _sanitize_list_like(right)
         return column_op(Column.__lt__)(left, right)
 
     def le(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         from pyspark.pandas.base import column_op
 
+        _sanitize_list_like(right)
         return column_op(Column.__le__)(left, right)
 
     def ge(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         from pyspark.pandas.base import column_op
 
+        _sanitize_list_like(right)
         return column_op(Column.__ge__)(left, right)
 
     def gt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         from pyspark.pandas.base import column_op
 
+        _sanitize_list_like(right)
         return column_op(Column.__gt__)(left, right)
 
     def astype(self, index_ops: IndexOpsLike, dtype: Union[str, type, Dtype]) -> IndexOpsLike:
@@ -117,19 +123,23 @@ class StructOps(DataTypeOps):
     def lt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         from pyspark.pandas.base import column_op
 
+        _sanitize_list_like(right)
         return column_op(Column.__lt__)(left, right)
 
     def le(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         from pyspark.pandas.base import column_op
 
+        _sanitize_list_like(right)
         return column_op(Column.__le__)(left, right)
 
     def ge(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         from pyspark.pandas.base import column_op
 
+        _sanitize_list_like(right)
         return column_op(Column.__ge__)(left, right)
 
     def gt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         from pyspark.pandas.base import column_op
 
+        _sanitize_list_like(right)
         return column_op(Column.__gt__)(left, right)
diff --git a/python/pyspark/pandas/data_type_ops/date_ops.py b/python/pyspark/pandas/data_type_ops/date_ops.py
index 59c8166..54ece76 100644
--- a/python/pyspark/pandas/data_type_ops/date_ops.py
+++ b/python/pyspark/pandas/data_type_ops/date_ops.py
@@ -33,6 +33,7 @@ from pyspark.pandas.data_type_ops.base import (
     _as_categorical_type,
     _as_other_type,
     _as_string_type,
+    _sanitize_list_like,
 )
 from pyspark.pandas.spark import functions as SF
 from pyspark.pandas.typedef import pandas_on_spark_type
@@ -48,6 +49,7 @@ class DateOps(DataTypeOps):
         return "dates"
 
     def sub(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         # Note that date subtraction casts arguments to integer. This is to mimic pandas's
         # behaviors. pandas returns 'timedelta64[ns]' in days from date's subtraction.
         msg = (
@@ -65,6 +67,7 @@ class DateOps(DataTypeOps):
             raise TypeError("Date subtraction can only be applied to date series.")
 
     def rsub(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         # Note that date subtraction casts arguments to integer. This is to mimic pandas's
         # behaviors. pandas returns 'timedelta64[ns]' in days from date's subtraction.
         msg = (
@@ -81,21 +84,25 @@ class DateOps(DataTypeOps):
     def lt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         from pyspark.pandas.base import column_op
 
+        _sanitize_list_like(right)
         return column_op(Column.__lt__)(left, right)
 
     def le(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         from pyspark.pandas.base import column_op
 
+        _sanitize_list_like(right)
         return column_op(Column.__le__)(left, right)
 
     def ge(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         from pyspark.pandas.base import column_op
 
+        _sanitize_list_like(right)
         return column_op(Column.__ge__)(left, right)
 
     def gt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         from pyspark.pandas.base import column_op
 
+        _sanitize_list_like(right)
         return column_op(Column.__gt__)(left, right)
 
     def astype(self, index_ops: IndexOpsLike, dtype: Union[str, type, Dtype]) -> IndexOpsLike:
diff --git a/python/pyspark/pandas/data_type_ops/datetime_ops.py b/python/pyspark/pandas/data_type_ops/datetime_ops.py
index f815742..071c22e 100644
--- a/python/pyspark/pandas/data_type_ops/datetime_ops.py
+++ b/python/pyspark/pandas/data_type_ops/datetime_ops.py
@@ -33,6 +33,7 @@ from pyspark.pandas.data_type_ops.base import (
     _as_bool_type,
     _as_categorical_type,
     _as_other_type,
+    _sanitize_list_like,
 )
 from pyspark.pandas.spark import functions as SF
 from pyspark.pandas.typedef import extension_dtypes, pandas_on_spark_type
@@ -48,6 +49,7 @@ class DatetimeOps(DataTypeOps):
         return "datetimes"
 
     def sub(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         # Note that timestamp subtraction casts arguments to integer. This is to mimic pandas's
         # behaviors. pandas returns 'timedelta64[ns]' from 'datetime64[ns]'s subtraction.
         msg = (
@@ -73,6 +75,7 @@ class DatetimeOps(DataTypeOps):
             raise TypeError("Datetime subtraction can only be applied to datetime series.")
 
     def rsub(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         # Note that timestamp subtraction casts arguments to integer. This is to mimic pandas's
         # behaviors. pandas returns 'timedelta64[ns]' from 'datetime64[ns]'s subtraction.
         msg = (
@@ -97,21 +100,25 @@ class DatetimeOps(DataTypeOps):
     def lt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         from pyspark.pandas.base import column_op
 
+        _sanitize_list_like(right)
         return column_op(Column.__lt__)(left, right)
 
     def le(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         from pyspark.pandas.base import column_op
 
+        _sanitize_list_like(right)
         return column_op(Column.__le__)(left, right)
 
     def ge(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         from pyspark.pandas.base import column_op
 
+        _sanitize_list_like(right)
         return column_op(Column.__ge__)(left, right)
 
     def gt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         from pyspark.pandas.base import column_op
 
+        _sanitize_list_like(right)
         return column_op(Column.__gt__)(left, right)
 
     def prepare(self, col: pd.Series) -> pd.Series:
diff --git a/python/pyspark/pandas/data_type_ops/null_ops.py b/python/pyspark/pandas/data_type_ops/null_ops.py
index f26de6f..9205d5e 100644
--- a/python/pyspark/pandas/data_type_ops/null_ops.py
+++ b/python/pyspark/pandas/data_type_ops/null_ops.py
@@ -26,6 +26,7 @@ from pyspark.pandas.data_type_ops.base import (
     _as_categorical_type,
     _as_other_type,
     _as_string_type,
+    _sanitize_list_like,
 )
 from pyspark.pandas._typing import SeriesOrIndex
 from pyspark.pandas.typedef import pandas_on_spark_type
@@ -45,21 +46,25 @@ class NullOps(DataTypeOps):
     def lt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         from pyspark.pandas.base import column_op
 
+        _sanitize_list_like(right)
         return column_op(Column.__lt__)(left, right)
 
     def le(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         from pyspark.pandas.base import column_op
 
+        _sanitize_list_like(right)
         return column_op(Column.__le__)(left, right)
 
     def ge(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         from pyspark.pandas.base import column_op
 
+        _sanitize_list_like(right)
         return column_op(Column.__ge__)(left, right)
 
     def gt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         from pyspark.pandas.base import column_op
 
+        _sanitize_list_like(right)
         return column_op(Column.__gt__)(left, right)
 
     def astype(self, index_ops: IndexOpsLike, dtype: Union[str, type, Dtype]) -> IndexOpsLike:
diff --git a/python/pyspark/pandas/data_type_ops/num_ops.py b/python/pyspark/pandas/data_type_ops/num_ops.py
index f84c1af..fd9adbb 100644
--- a/python/pyspark/pandas/data_type_ops/num_ops.py
+++ b/python/pyspark/pandas/data_type_ops/num_ops.py
@@ -32,6 +32,7 @@ from pyspark.pandas.data_type_ops.base import (
     _as_categorical_type,
     _as_other_type,
     _as_string_type,
+    _sanitize_list_like,
 )
 from pyspark.pandas.spark import functions as SF
 from pyspark.pandas.typedef.typehints import extension_dtypes, pandas_on_spark_type
@@ -65,6 +66,7 @@ class NumericOps(DataTypeOps):
         return "numerics"
 
     def add(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         if not is_valid_operand_for_numeric_arithmetic(right):
             raise TypeError("Addition can not be applied to given types.")
 
@@ -72,6 +74,7 @@ class NumericOps(DataTypeOps):
         return column_op(Column.__add__)(left, right)
 
     def sub(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         if not is_valid_operand_for_numeric_arithmetic(right):
             raise TypeError("Subtraction can not be applied to given types.")
 
@@ -79,6 +82,7 @@ class NumericOps(DataTypeOps):
         return column_op(Column.__sub__)(left, right)
 
     def mod(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         if not is_valid_operand_for_numeric_arithmetic(right):
             raise TypeError("Modulo can not be applied to given types.")
 
@@ -89,6 +93,7 @@ class NumericOps(DataTypeOps):
         return column_op(mod)(left, right)
 
     def pow(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         if not is_valid_operand_for_numeric_arithmetic(right):
             raise TypeError("Exponentiation can not be applied to given types.")
 
@@ -103,24 +108,28 @@ class NumericOps(DataTypeOps):
         return column_op(pow_func)(left, right)
 
     def radd(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         if not isinstance(right, numbers.Number):
             raise TypeError("Addition can not be applied to given types.")
         right = transform_boolean_operand_to_numeric(right)
         return column_op(Column.__radd__)(left, right)
 
     def rsub(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         if not isinstance(right, numbers.Number):
             raise TypeError("Subtraction can not be applied to given types.")
         right = transform_boolean_operand_to_numeric(right)
         return column_op(Column.__rsub__)(left, right)
 
     def rmul(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         if not isinstance(right, numbers.Number):
             raise TypeError("Multiplication can not be applied to given types.")
         right = transform_boolean_operand_to_numeric(right)
         return column_op(Column.__rmul__)(left, right)
 
     def rpow(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         if not isinstance(right, numbers.Number):
             raise TypeError("Exponentiation can not be applied to given types.")
 
@@ -131,6 +140,7 @@ class NumericOps(DataTypeOps):
         return column_op(rpow_func)(left, right)
 
     def rmod(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         if not isinstance(right, numbers.Number):
             raise TypeError("Modulo can not be applied to given types.")
 
@@ -149,15 +159,19 @@ class NumericOps(DataTypeOps):
         )
 
     def lt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         return column_op(Column.__lt__)(left, right)
 
     def le(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         return column_op(Column.__le__)(left, right)
 
     def ge(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         return column_op(Column.__ge__)(left, right)
 
     def gt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         return column_op(Column.__gt__)(left, right)
 
 
@@ -172,6 +186,7 @@ class IntegralOps(NumericOps):
         return "integrals"
 
     def mul(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         if isinstance(right, IndexOpsMixin) and isinstance(right.spark.data_type, StringType):
             return column_op(SF.repeat)(right, left)
 
@@ -182,6 +197,7 @@ class IntegralOps(NumericOps):
         return column_op(Column.__mul__)(left, right)
 
     def truediv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         if not is_valid_operand_for_numeric_arithmetic(right):
             raise TypeError("True division can not be applied to given types.")
 
@@ -194,6 +210,7 @@ class IntegralOps(NumericOps):
         return numpy_column_op(truediv)(left, right)
 
     def floordiv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         if not is_valid_operand_for_numeric_arithmetic(right):
             raise TypeError("Floor division can not be applied to given types.")
 
@@ -208,6 +225,7 @@ class IntegralOps(NumericOps):
         return numpy_column_op(floordiv)(left, right)
 
     def rtruediv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         if not isinstance(right, numbers.Number):
             raise TypeError("True division can not be applied to given types.")
 
@@ -220,6 +238,7 @@ class IntegralOps(NumericOps):
         return numpy_column_op(rtruediv)(left, right)
 
     def rfloordiv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         if not isinstance(right, numbers.Number):
             raise TypeError("Floor division can not be applied to given types.")
 
@@ -252,6 +271,7 @@ class FractionalOps(NumericOps):
         return "fractions"
 
     def mul(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         if not is_valid_operand_for_numeric_arithmetic(right):
             raise TypeError("Multiplication can not be applied to given types.")
 
@@ -259,6 +279,7 @@ class FractionalOps(NumericOps):
         return column_op(Column.__mul__)(left, right)
 
     def truediv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         if not is_valid_operand_for_numeric_arithmetic(right):
             raise TypeError("True division can not be applied to given types.")
 
@@ -275,6 +296,7 @@ class FractionalOps(NumericOps):
         return numpy_column_op(truediv)(left, right)
 
     def floordiv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         if not is_valid_operand_for_numeric_arithmetic(right):
             raise TypeError("Floor division can not be applied to given types.")
 
@@ -293,6 +315,7 @@ class FractionalOps(NumericOps):
         return numpy_column_op(floordiv)(left, right)
 
     def rtruediv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         if not isinstance(right, numbers.Number):
             raise TypeError("True division can not be applied to given types.")
 
@@ -305,6 +328,7 @@ class FractionalOps(NumericOps):
         return numpy_column_op(rtruediv)(left, right)
 
     def rfloordiv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         if not isinstance(right, numbers.Number):
             raise TypeError("Floor division can not be applied to given types.")
 
diff --git a/python/pyspark/pandas/data_type_ops/string_ops.py b/python/pyspark/pandas/data_type_ops/string_ops.py
index bfe36e1..69e1717 100644
--- a/python/pyspark/pandas/data_type_ops/string_ops.py
+++ b/python/pyspark/pandas/data_type_ops/string_ops.py
@@ -30,6 +30,7 @@ from pyspark.pandas.data_type_ops.base import (
     _as_categorical_type,
     _as_other_type,
     _as_string_type,
+    _sanitize_list_like,
 )
 from pyspark.pandas.spark import functions as SF
 from pyspark.pandas.typedef import extension_dtypes, pandas_on_spark_type
@@ -47,6 +48,7 @@ class StringOps(DataTypeOps):
         return "strings"
 
     def add(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         if isinstance(right, str):
             return cast(
                 SeriesOrIndex,
@@ -60,6 +62,7 @@ class StringOps(DataTypeOps):
             raise TypeError("Addition can not be applied to given types.")
 
     def mul(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         if isinstance(right, int):
             return cast(
                 SeriesOrIndex,
@@ -77,6 +80,7 @@ class StringOps(DataTypeOps):
             raise TypeError("Multiplication can not be applied to given types.")
 
     def radd(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         if isinstance(right, str):
             return cast(
                 SeriesOrIndex,
@@ -88,6 +92,7 @@ class StringOps(DataTypeOps):
             raise TypeError("Addition can not be applied to given types.")
 
     def rmul(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
+        _sanitize_list_like(right)
         if isinstance(right, int):
             return cast(
                 SeriesOrIndex,
@@ -101,21 +106,25 @@ class StringOps(DataTypeOps):
     def lt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         from pyspark.pandas.base import column_op
 
+        _sanitize_list_like(right)
         return column_op(Column.__lt__)(left, right)
 
     def le(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         from pyspark.pandas.base import column_op
 
+        _sanitize_list_like(right)
         return column_op(Column.__le__)(left, right)
 
     def ge(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         from pyspark.pandas.base import column_op
 
+        _sanitize_list_like(right)
         return column_op(Column.__ge__)(left, right)
 
     def gt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         from pyspark.pandas.base import column_op
 
+        _sanitize_list_like(right)
         return column_op(Column.__gt__)(left, right)
 
     def astype(self, index_ops: IndexOpsLike, dtype: Union[str, type, Dtype]) -> IndexOpsLike:
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py
index 1dc9c39..6ac9073 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py
@@ -243,7 +243,7 @@ class CategoricalOpsTest(PandasOnSparkTestCase, TestCasesUtils):
         )
         self.assertRaisesRegex(
             TypeError,
-            "Cannot compare a Categorical with the given type",
+            "The operation can not be applied to list",
             lambda: ordered_psser == [1, 2, 3],
         )
 
@@ -306,7 +306,7 @@ class CategoricalOpsTest(PandasOnSparkTestCase, TestCasesUtils):
         )
         self.assertRaisesRegex(
             TypeError,
-            "Cannot compare a Categorical with the given type",
+            "The operation can not be applied to list.",
             lambda: ordered_psser != [1, 2, 3],
         )
         self.assert_eq(
@@ -367,7 +367,7 @@ class CategoricalOpsTest(PandasOnSparkTestCase, TestCasesUtils):
         )
         self.assertRaisesRegex(
             TypeError,
-            "Cannot compare a Categorical with the given type",
+            "The operation can not be applied to list",
             lambda: ordered_psser < [1, 2, 3],
         )
         self.assert_eq(
@@ -416,7 +416,7 @@ class CategoricalOpsTest(PandasOnSparkTestCase, TestCasesUtils):
         )
         self.assertRaisesRegex(
             TypeError,
-            "Cannot compare a Categorical with the given type",
+            "The operation can not be applied to list",
             lambda: ordered_psser <= [1, 2, 3],
         )
         self.assert_eq(
@@ -465,7 +465,7 @@ class CategoricalOpsTest(PandasOnSparkTestCase, TestCasesUtils):
         )
         self.assertRaisesRegex(
             TypeError,
-            "Cannot compare a Categorical with the given type",
+            "The operation can not be applied to list",
             lambda: ordered_psser > [1, 2, 3],
         )
         self.assert_eq(
@@ -514,7 +514,7 @@ class CategoricalOpsTest(PandasOnSparkTestCase, TestCasesUtils):
         )
         self.assertRaisesRegex(
             TypeError,
-            "Cannot compare a Categorical with the given type",
+            "The operation can not be applied to list",
             lambda: ordered_psser >= [1, 2, 3],
         )
         self.assert_eq(

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org