You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ue...@apache.org on 2021/07/08 19:28:18 UTC
[spark] branch master updated: [SPARK-35340][PYTHON] Standardize
TypeError messages for unsupported basic operations
This is an automated email from the ASF dual-hosted git repository.
ueshin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 819c482 [SPARK-35340][PYTHON] Standardize TypeError messages for unsupported basic operations
819c482 is described below
commit 819c482498c507b71d100e95082a681e4583d349
Author: Xinrong Meng <xi...@databricks.com>
AuthorDate: Thu Jul 8 12:27:48 2021 -0700
[SPARK-35340][PYTHON] Standardize TypeError messages for unsupported basic operations
### What changes were proposed in this pull request?
The PR is proposed to standardize TypeError messages for unsupported basic operations by:
- Capitalize the first letter
- Leverage TypeError messages defined in `pyspark/pandas/data_type_ops/base.py`
- Take advantage of the utility `is_valid_operand_for_numeric_arithmetic` to save duplicated TypeError messages
Related unit tests should be adjusted as well.
### Why are the changes needed?
Inconsistent TypeError messages are shown for unsupported data-type-based basic operations.
Take addition's TypeError messages for example:
- addition can not be applied to given types.
- string addition can only be applied to string series or literals.
Standardizing TypeError messages would improve user experience and reduce maintenance costs.
### Does this PR introduce _any_ user-facing change?
No user-facing behavior change. Only TypeError messages are modified.
### How was this patch tested?
Unit tests.
Closes #33237 from xinrong-databricks/datatypeops_err.
Authored-by: Xinrong Meng <xi...@databricks.com>
Signed-off-by: Takuya UESHIN <ue...@databricks.com>
---
python/pyspark/pandas/data_type_ops/date_ops.py | 4 +-
.../pyspark/pandas/data_type_ops/datetime_ops.py | 4 +-
python/pyspark/pandas/data_type_ops/num_ops.py | 109 ++++-----------------
python/pyspark/pandas/data_type_ops/string_ops.py | 41 +-------
.../pyspark/pandas/tests/indexes/test_datetime.py | 2 +-
python/pyspark/pandas/tests/test_dataframe.py | 31 +++---
.../pyspark/pandas/tests/test_series_datetime.py | 6 +-
7 files changed, 49 insertions(+), 148 deletions(-)
diff --git a/python/pyspark/pandas/data_type_ops/date_ops.py b/python/pyspark/pandas/data_type_ops/date_ops.py
index 9cdd0e5..86fe8c3 100644
--- a/python/pyspark/pandas/data_type_ops/date_ops.py
+++ b/python/pyspark/pandas/data_type_ops/date_ops.py
@@ -62,7 +62,7 @@ class DateOps(DataTypeOps):
warnings.warn(msg, UserWarning)
return column_op(F.datediff)(left, SF.lit(right)).astype("long")
else:
- raise TypeError("date subtraction can only be applied to date series.")
+ raise TypeError("Date subtraction can only be applied to date series.")
def rsub(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
# Note that date subtraction casts arguments to integer. This is to mimic pandas's
@@ -76,7 +76,7 @@ class DateOps(DataTypeOps):
warnings.warn(msg, UserWarning)
return -column_op(F.datediff)(left, SF.lit(right)).astype("long")
else:
- raise TypeError("date subtraction can only be applied to date series.")
+ raise TypeError("Date subtraction can only be applied to date series.")
def lt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
from pyspark.pandas.base import column_op
diff --git a/python/pyspark/pandas/data_type_ops/datetime_ops.py b/python/pyspark/pandas/data_type_ops/datetime_ops.py
index 3b7220a..a30dc96 100644
--- a/python/pyspark/pandas/data_type_ops/datetime_ops.py
+++ b/python/pyspark/pandas/data_type_ops/datetime_ops.py
@@ -67,7 +67,7 @@ class DatetimeOps(DataTypeOps):
),
)
else:
- raise TypeError("datetime subtraction can only be applied to datetime series.")
+ raise TypeError("Datetime subtraction can only be applied to datetime series.")
def rsub(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
# Note that timestamp subtraction casts arguments to integer. This is to mimic pandas's
@@ -86,7 +86,7 @@ class DatetimeOps(DataTypeOps):
),
)
else:
- raise TypeError("datetime subtraction can only be applied to datetime series.")
+ raise TypeError("Datetime subtraction can only be applied to datetime series.")
def lt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
from pyspark.pandas.base import column_op
diff --git a/python/pyspark/pandas/data_type_ops/num_ops.py b/python/pyspark/pandas/data_type_ops/num_ops.py
index 0edf451..8b26843 100644
--- a/python/pyspark/pandas/data_type_ops/num_ops.py
+++ b/python/pyspark/pandas/data_type_ops/num_ops.py
@@ -41,7 +41,6 @@ from pyspark.sql.column import Column
from pyspark.sql.types import (
BooleanType,
StringType,
- TimestampType,
)
@@ -53,39 +52,24 @@ class NumericOps(DataTypeOps):
return "numerics"
def add(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
- if (
- isinstance(right, IndexOpsMixin) and isinstance(right.spark.data_type, StringType)
- ) or isinstance(right, str):
- raise TypeError("string addition can only be applied to string series or literals.")
-
if not is_valid_operand_for_numeric_arithmetic(right):
- raise TypeError("addition can not be applied to given types.")
+ raise TypeError("Addition can not be applied to given types.")
right = transform_boolean_operand_to_numeric(right, left.spark.data_type)
return column_op(Column.__add__)(left, right)
def sub(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
- if (
- isinstance(right, IndexOpsMixin) and isinstance(right.spark.data_type, StringType)
- ) or isinstance(right, str):
- raise TypeError("subtraction can not be applied to string series or literals.")
-
if not is_valid_operand_for_numeric_arithmetic(right):
- raise TypeError("subtraction can not be applied to given types.")
+ raise TypeError("Subtraction can not be applied to given types.")
right = transform_boolean_operand_to_numeric(right, left.spark.data_type)
return column_op(Column.__sub__)(left, right)
def mod(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
- if (
- isinstance(right, IndexOpsMixin) and isinstance(right.spark.data_type, StringType)
- ) or isinstance(right, str):
- raise TypeError("modulo can not be applied on string series or literals.")
-
if not is_valid_operand_for_numeric_arithmetic(right):
- raise TypeError("modulo can not be applied to given types.")
+ raise TypeError("Modulo can not be applied to given types.")
right = transform_boolean_operand_to_numeric(right, left.spark.data_type)
@@ -95,13 +79,8 @@ class NumericOps(DataTypeOps):
return column_op(mod)(left, right)
def pow(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
- if (
- isinstance(right, IndexOpsMixin) and isinstance(right.spark.data_type, StringType)
- ) or isinstance(right, str):
- raise TypeError("exponentiation can not be applied on string series or literals.")
-
if not is_valid_operand_for_numeric_arithmetic(right):
- raise TypeError("exponentiation can not be applied to given types.")
+ raise TypeError("Exponentiation can not be applied to given types.")
right = transform_boolean_operand_to_numeric(right, left.spark.data_type)
@@ -111,34 +90,26 @@ class NumericOps(DataTypeOps):
return column_op(pow_func)(left, right)
def radd(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
- if isinstance(right, str):
- raise TypeError("string addition can only be applied to string series or literals.")
if not isinstance(right, numbers.Number):
- raise TypeError("addition can not be applied to given types.")
+ raise TypeError("Addition can not be applied to given types.")
right = transform_boolean_operand_to_numeric(right)
return column_op(Column.__radd__)(left, right)
def rsub(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
- if isinstance(right, str):
- raise TypeError("subtraction can not be applied to string series or literals.")
if not isinstance(right, numbers.Number):
- raise TypeError("subtraction can not be applied to given types.")
+ raise TypeError("Subtraction can not be applied to given types.")
right = transform_boolean_operand_to_numeric(right)
return column_op(Column.__rsub__)(left, right)
def rmul(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
- if isinstance(right, str):
- raise TypeError("multiplication can not be applied to a string literal.")
if not isinstance(right, numbers.Number):
- raise TypeError("multiplication can not be applied to given types.")
+ raise TypeError("Multiplication can not be applied to given types.")
right = transform_boolean_operand_to_numeric(right)
return column_op(Column.__rmul__)(left, right)
def rpow(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
- if isinstance(right, str):
- raise TypeError("exponentiation can not be applied on string series or literals.")
if not isinstance(right, numbers.Number):
- raise TypeError("exponentiation can not be applied to given types.")
+ raise TypeError("Exponentiation can not be applied to given types.")
def rpow_func(left: Column, right: Any) -> Column:
return F.when(SF.lit(right == 1), right).otherwise(Column.__rpow__(left, right))
@@ -147,10 +118,8 @@ class NumericOps(DataTypeOps):
return column_op(rpow_func)(left, right)
def rmod(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
- if isinstance(right, str):
- raise TypeError("modulo can not be applied on string series or literals.")
if not isinstance(right, numbers.Number):
- raise TypeError("modulo can not be applied to given types.")
+ raise TypeError("Modulo can not be applied to given types.")
def rmod(left: Column, right: Any) -> Column:
return ((right % left) + left) % left
@@ -204,30 +173,19 @@ class IntegralOps(NumericOps):
return "integrals"
def mul(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
- if isinstance(right, str):
- raise TypeError("multiplication can not be applied to a string literal.")
-
- if isinstance(right, IndexOpsMixin) and isinstance(right.spark.data_type, TimestampType):
- raise TypeError("multiplication can not be applied to date times.")
-
if isinstance(right, IndexOpsMixin) and isinstance(right.spark.data_type, StringType):
return column_op(SF.repeat)(right, left)
if not is_valid_operand_for_numeric_arithmetic(right):
- raise TypeError("multiplication can not be applied to given types.")
+ raise TypeError("Multiplication can not be applied to given types.")
right = transform_boolean_operand_to_numeric(right, left.spark.data_type)
return column_op(Column.__mul__)(left, right)
def truediv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
- if (
- isinstance(right, IndexOpsMixin) and isinstance(right.spark.data_type, StringType)
- ) or isinstance(right, str):
- raise TypeError("division can not be applied on string series or literals.")
-
if not is_valid_operand_for_numeric_arithmetic(right):
- raise TypeError("division can not be applied to given types.")
+ raise TypeError("True division can not be applied to given types.")
right = transform_boolean_operand_to_numeric(right, left.spark.data_type)
@@ -239,13 +197,8 @@ class IntegralOps(NumericOps):
return numpy_column_op(truediv)(left, right)
def floordiv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
- if (
- isinstance(right, IndexOpsMixin) and isinstance(right.spark.data_type, StringType)
- ) or isinstance(right, str):
- raise TypeError("division can not be applied on string series or literals.")
-
if not is_valid_operand_for_numeric_arithmetic(right):
- raise TypeError("division can not be applied to given types.")
+ raise TypeError("Floor division can not be applied to given types.")
right = transform_boolean_operand_to_numeric(right, left.spark.data_type)
@@ -259,10 +212,8 @@ class IntegralOps(NumericOps):
return numpy_column_op(floordiv)(left, right)
def rtruediv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
- if isinstance(right, str):
- raise TypeError("division can not be applied on string series or literals.")
if not isinstance(right, numbers.Number):
- raise TypeError("division can not be applied to given types.")
+ raise TypeError("True division can not be applied to given types.")
def rtruediv(left: Column, right: Any) -> Column:
return F.when(left == 0, SF.lit(np.inf).__div__(right)).otherwise(
@@ -273,10 +224,8 @@ class IntegralOps(NumericOps):
return numpy_column_op(rtruediv)(left, right)
def rfloordiv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
- if isinstance(right, str):
- raise TypeError("division can not be applied on string series or literals.")
if not isinstance(right, numbers.Number):
- raise TypeError("division can not be applied to given types.")
+ raise TypeError("Floor division can not be applied to given types.")
def rfloordiv(left: Column, right: Any) -> Column:
return F.when(SF.lit(left == 0), SF.lit(np.inf).__div__(right)).otherwise(
@@ -310,27 +259,16 @@ class FractionalOps(NumericOps):
return "fractions"
def mul(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
- if isinstance(right, str):
- raise TypeError("multiplication can not be applied to a string literal.")
-
- if isinstance(right, IndexOpsMixin) and isinstance(right.spark.data_type, TimestampType):
- raise TypeError("multiplication can not be applied to date times.")
-
if not is_valid_operand_for_numeric_arithmetic(right):
- raise TypeError("multiplication can not be applied to given types.")
+ raise TypeError("Multiplication can not be applied to given types.")
right = transform_boolean_operand_to_numeric(right, left.spark.data_type)
return column_op(Column.__mul__)(left, right)
def truediv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
- if (
- isinstance(right, IndexOpsMixin) and isinstance(right.spark.data_type, StringType)
- ) or isinstance(right, str):
- raise TypeError("division can not be applied on string series or literals.")
-
if not is_valid_operand_for_numeric_arithmetic(right):
- raise TypeError("division can not be applied to given types.")
+ raise TypeError("True division can not be applied to given types.")
right = transform_boolean_operand_to_numeric(right, left.spark.data_type)
@@ -346,13 +284,8 @@ class FractionalOps(NumericOps):
return numpy_column_op(truediv)(left, right)
def floordiv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
- if (
- isinstance(right, IndexOpsMixin) and isinstance(right.spark.data_type, StringType)
- ) or isinstance(right, str):
- raise TypeError("division can not be applied on string series or literals.")
-
if not is_valid_operand_for_numeric_arithmetic(right):
- raise TypeError("division can not be applied to given types.")
+ raise TypeError("Floor division can not be applied to given types.")
right = transform_boolean_operand_to_numeric(right, left.spark.data_type)
@@ -370,10 +303,8 @@ class FractionalOps(NumericOps):
return numpy_column_op(floordiv)(left, right)
def rtruediv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
- if isinstance(right, str):
- raise TypeError("division can not be applied on string series or literals.")
if not isinstance(right, numbers.Number):
- raise TypeError("division can not be applied to given types.")
+ raise TypeError("True division can not be applied to given types.")
def rtruediv(left: Column, right: Any) -> Column:
return F.when(left == 0, SF.lit(np.inf).__div__(right)).otherwise(
@@ -384,10 +315,8 @@ class FractionalOps(NumericOps):
return numpy_column_op(rtruediv)(left, right)
def rfloordiv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
- if isinstance(right, str):
- raise TypeError("division can not be applied on string series or literals.")
if not isinstance(right, numbers.Number):
- raise TypeError("division can not be applied to given types.")
+ raise TypeError("Floor division can not be applied to given types.")
def rfloordiv(left: Column, right: Any) -> Column:
return F.when(SF.lit(left == 0), SF.lit(np.inf).__div__(right)).otherwise(
diff --git a/python/pyspark/pandas/data_type_ops/string_ops.py b/python/pyspark/pandas/data_type_ops/string_ops.py
index 4729582..9c6ca4c 100644
--- a/python/pyspark/pandas/data_type_ops/string_ops.py
+++ b/python/pyspark/pandas/data_type_ops/string_ops.py
@@ -53,15 +53,9 @@ class StringOps(DataTypeOps):
elif isinstance(right, str):
return column_op(F.concat)(left, SF.lit(right))
else:
- raise TypeError("string addition can only be applied to string series or literals.")
-
- def sub(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
- raise TypeError("subtraction can not be applied to string series or literals.")
+ raise TypeError("Addition can not be applied to given types.")
def mul(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
- if isinstance(right, str):
- raise TypeError("multiplication can not be applied to a string literal.")
-
if (
isinstance(right, IndexOpsMixin)
and isinstance(right.spark.data_type, IntegralType)
@@ -69,19 +63,7 @@ class StringOps(DataTypeOps):
) or isinstance(right, int):
return column_op(SF.repeat)(left, right)
else:
- raise TypeError("a string series can only be multiplied to an int series or literal")
-
- def truediv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
- raise TypeError("division can not be applied on string series or literals.")
-
- def floordiv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
- raise TypeError("division can not be applied on string series or literals.")
-
- def mod(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
- raise TypeError("modulo can not be applied on string series or literals.")
-
- def pow(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
- raise TypeError("exponentiation can not be applied on string series or literals.")
+ raise TypeError("Multiplication can not be applied to given types.")
def radd(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
if isinstance(right, str):
@@ -90,28 +72,13 @@ class StringOps(DataTypeOps):
left._with_new_scol(F.concat(SF.lit(right), left.spark.column)), # TODO: dtype?
)
else:
- raise TypeError("string addition can only be applied to string series or literals.")
-
- def rsub(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
- raise TypeError("subtraction can not be applied to string series or literals.")
+ raise TypeError("Addition can not be applied to given types.")
def rmul(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
if isinstance(right, int):
return column_op(SF.repeat)(left, right)
else:
- raise TypeError("a string series can only be multiplied to an int series or literal")
-
- def rtruediv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
- raise TypeError("division can not be applied on string series or literals.")
-
- def rfloordiv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
- raise TypeError("division can not be applied on string series or literals.")
-
- def rpow(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
- raise TypeError("exponentiation can not be applied on string series or literals.")
-
- def rmod(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
- raise TypeError("modulo can not be applied on string series or literals.")
+ raise TypeError("Multiplication can not be applied to given types.")
def lt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
from pyspark.pandas.base import column_op
diff --git a/python/pyspark/pandas/tests/indexes/test_datetime.py b/python/pyspark/pandas/tests/indexes/test_datetime.py
index 8a55e2e..380b481 100644
--- a/python/pyspark/pandas/tests/indexes/test_datetime.py
+++ b/python/pyspark/pandas/tests/indexes/test_datetime.py
@@ -212,7 +212,7 @@ class DatetimeIndexTest(PandasOnSparkTestCase, TestUtils):
self.assertRaisesRegex(TypeError, expected_err_msg, lambda: psidx % other)
self.assertRaisesRegex(TypeError, expected_err_msg, lambda: other % psidx)
- expected_err_msg = "datetime subtraction can only be applied to datetime series."
+ expected_err_msg = "Datetime subtraction can only be applied to datetime series."
for other in [1, 0.1]:
self.assertRaisesRegex(TypeError, expected_err_msg, lambda: psidx - other)
diff --git a/python/pyspark/pandas/tests/test_dataframe.py b/python/pyspark/pandas/tests/test_dataframe.py
index e54b783..ebc2fc8 100644
--- a/python/pyspark/pandas/tests/test_dataframe.py
+++ b/python/pyspark/pandas/tests/test_dataframe.py
@@ -2347,7 +2347,7 @@ class DataFrameTest(PandasOnSparkTestCase, SQLTestUtils):
self.assert_eq(psdf["c"] + psdf["d"], pdf["c"] + pdf["d"])
# Negative
- ks_err_msg = "string addition can only be applied to string series or literals"
+ ks_err_msg = "Addition can not be applied to given types"
self.assertRaisesRegex(TypeError, ks_err_msg, lambda: psdf["a"] + psdf["c"])
self.assertRaisesRegex(TypeError, ks_err_msg, lambda: psdf["c"] + psdf["a"])
@@ -2365,12 +2365,13 @@ class DataFrameTest(PandasOnSparkTestCase, SQLTestUtils):
# Negative
psdf = ps.DataFrame({"a": ["x"], "b": [1]})
- ks_err_msg = "subtraction can not be applied to string series or literals"
-
- self.assertRaisesRegex(TypeError, ks_err_msg, lambda: psdf["a"] - psdf["b"])
+ ks_err_msg = "Subtraction can not be applied to given types"
self.assertRaisesRegex(TypeError, ks_err_msg, lambda: psdf["b"] - psdf["a"])
self.assertRaisesRegex(TypeError, ks_err_msg, lambda: psdf["b"] - "literal")
self.assertRaisesRegex(TypeError, ks_err_msg, lambda: "literal" - psdf["b"])
+
+ ks_err_msg = "Subtraction can not be applied to strings"
+ self.assertRaisesRegex(TypeError, ks_err_msg, lambda: psdf["a"] - psdf["b"])
self.assertRaisesRegex(TypeError, ks_err_msg, lambda: 1 - psdf["a"])
self.assertRaisesRegex(TypeError, ks_err_msg, lambda: psdf["a"] - 1)
@@ -2386,23 +2387,27 @@ class DataFrameTest(PandasOnSparkTestCase, SQLTestUtils):
# Negative
psdf = ps.DataFrame({"a": ["x"], "b": [1]})
- ks_err_msg = "division can not be applied on string series or literals"
- self.assertRaisesRegex(TypeError, ks_err_msg, lambda: psdf["a"] / psdf["b"])
+ ks_err_msg = "True division can not be applied to given types"
self.assertRaisesRegex(TypeError, ks_err_msg, lambda: psdf["b"] / psdf["a"])
self.assertRaisesRegex(TypeError, ks_err_msg, lambda: psdf["b"] / "literal")
self.assertRaisesRegex(TypeError, ks_err_msg, lambda: "literal" / psdf["b"])
+
+ ks_err_msg = "True division can not be applied to strings"
+ self.assertRaisesRegex(TypeError, ks_err_msg, lambda: psdf["a"] / psdf["b"])
self.assertRaisesRegex(TypeError, ks_err_msg, lambda: 1 / psdf["a"])
def test_binary_operator_floordiv(self):
psdf = ps.DataFrame({"a": ["x"], "b": [1]})
- ks_err_msg = "division can not be applied on string series or literals"
+ ks_err_msg = "Floor division can not be applied to strings"
self.assertRaisesRegex(TypeError, ks_err_msg, lambda: psdf["a"] // psdf["b"])
+ self.assertRaisesRegex(TypeError, ks_err_msg, lambda: 1 // psdf["a"])
+
+ ks_err_msg = "Floor division can not be applied to given types"
self.assertRaisesRegex(TypeError, ks_err_msg, lambda: psdf["b"] // psdf["a"])
self.assertRaisesRegex(TypeError, ks_err_msg, lambda: psdf["b"] // "literal")
self.assertRaisesRegex(TypeError, ks_err_msg, lambda: "literal" // psdf["b"])
- self.assertRaisesRegex(TypeError, ks_err_msg, lambda: 1 // psdf["a"])
def test_binary_operator_mod(self):
# Positive
@@ -2413,11 +2418,12 @@ class DataFrameTest(PandasOnSparkTestCase, SQLTestUtils):
# Negative
psdf = ps.DataFrame({"a": ["x"], "b": [1]})
- ks_err_msg = "modulo can not be applied on string series or literals"
-
- self.assertRaisesRegex(TypeError, ks_err_msg, lambda: psdf["a"] % psdf["b"])
+ ks_err_msg = "Modulo can not be applied to given types"
self.assertRaisesRegex(TypeError, ks_err_msg, lambda: psdf["b"] % psdf["a"])
self.assertRaisesRegex(TypeError, ks_err_msg, lambda: psdf["b"] % "literal")
+
+ ks_err_msg = "Modulo can not be applied to strings"
+ self.assertRaisesRegex(TypeError, ks_err_msg, lambda: psdf["a"] % psdf["b"])
self.assertRaisesRegex(TypeError, ks_err_msg, lambda: 1 % psdf["a"])
def test_binary_operator_multiply(self):
@@ -2436,12 +2442,11 @@ class DataFrameTest(PandasOnSparkTestCase, SQLTestUtils):
# Negative
psdf = ps.DataFrame({"a": ["x"], "b": [2]})
- ks_err_msg = "multiplication can not be applied to a string literal"
+ ks_err_msg = "Multiplication can not be applied to given types"
self.assertRaisesRegex(TypeError, ks_err_msg, lambda: psdf["b"] * "literal")
self.assertRaisesRegex(TypeError, ks_err_msg, lambda: "literal" * psdf["b"])
self.assertRaisesRegex(TypeError, ks_err_msg, lambda: psdf["a"] * "literal")
- ks_err_msg = "a string series can only be multiplied to an int series or literal"
self.assertRaisesRegex(TypeError, ks_err_msg, lambda: psdf["a"] * psdf["a"])
self.assertRaisesRegex(TypeError, ks_err_msg, lambda: psdf["a"] * 0.1)
self.assertRaisesRegex(TypeError, ks_err_msg, lambda: 0.1 * psdf["a"])
diff --git a/python/pyspark/pandas/tests/test_series_datetime.py b/python/pyspark/pandas/tests/test_series_datetime.py
index 7c75819..8000b13 100644
--- a/python/pyspark/pandas/tests/test_series_datetime.py
+++ b/python/pyspark/pandas/tests/test_series_datetime.py
@@ -70,7 +70,7 @@ class SeriesDateTimeTest(PandasOnSparkTestCase, SQLTestUtils):
psdf = ps.DataFrame(
{"a": pd.date_range("2016-12-31", "2017-01-08", freq="D"), "b": pd.Series(range(9))}
)
- expected_error_message = "datetime subtraction can only be applied to datetime series."
+ expected_error_message = "Datetime subtraction can only be applied to datetime series."
with self.assertRaisesRegex(TypeError, expected_error_message):
psdf["a"] - psdf["b"]
with self.assertRaisesRegex(TypeError, expected_error_message):
@@ -104,7 +104,7 @@ class SeriesDateTimeTest(PandasOnSparkTestCase, SQLTestUtils):
self.assertRaisesRegex(TypeError, expected_err_msg, lambda: psser % other)
self.assertRaisesRegex(TypeError, expected_err_msg, lambda: other % psser)
- expected_err_msg = "datetime subtraction can only be applied to datetime series."
+ expected_err_msg = "Datetime subtraction can only be applied to datetime series."
for other in [1, 0.1]:
self.assertRaisesRegex(TypeError, expected_err_msg, lambda: psser - other)
@@ -135,7 +135,7 @@ class SeriesDateTimeTest(PandasOnSparkTestCase, SQLTestUtils):
psdf = ps.DataFrame(
{"a": pd.date_range("2016-12-31", "2017-01-08", freq="D"), "b": pd.Series(range(9))}
)
- expected_error_message = "date subtraction can only be applied to date series."
+ expected_error_message = "Date subtraction can only be applied to date series."
with self.assertRaisesRegex(TypeError, expected_error_message):
psdf["a"].dt.date - psdf["b"]
with self.assertRaisesRegex(TypeError, expected_error_message):
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org