You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2021/07/13 00:23:36 UTC
[spark] branch branch-3.2 updated: [SPARK-36103][PYTHON] Manage
InternalField in DataTypeOps.invert
This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.2 by this push:
new 8d9758e [SPARK-36103][PYTHON] Manage InternalField in DataTypeOps.invert
8d9758e is described below
commit 8d9758ee4682921be1be7306bf45d3c22fd43234
Author: Takuya UESHIN <ue...@databricks.com>
AuthorDate: Tue Jul 13 09:22:27 2021 +0900
[SPARK-36103][PYTHON] Manage InternalField in DataTypeOps.invert
### What changes were proposed in this pull request?
Properly set `InternalField` for `DataTypeOps.invert`.
### Why are the changes needed?
The spark data type and nullability must be the same as the original when `DataTypeOps.invert`.
We should manage `InternalField` for this case.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Existing tests.
Closes #33306 from ueshin/issues/SPARK-36103/invert.
Authored-by: Takuya UESHIN <ue...@databricks.com>
Signed-off-by: Hyukjin Kwon <gu...@apache.org>
(cherry picked from commit e2021daafbdca8549a84c3b0bd2292847b2e362e)
Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
python/pyspark/pandas/data_type_ops/boolean_ops.py | 14 ++------------
python/pyspark/pandas/data_type_ops/num_ops.py | 14 +++++---------
python/pyspark/pandas/tests/data_type_ops/test_num_ops.py | 4 ++--
3 files changed, 9 insertions(+), 23 deletions(-)
diff --git a/python/pyspark/pandas/data_type_ops/boolean_ops.py b/python/pyspark/pandas/data_type_ops/boolean_ops.py
index 9ec295e..1f708ca 100644
--- a/python/pyspark/pandas/data_type_ops/boolean_ops.py
+++ b/python/pyspark/pandas/data_type_ops/boolean_ops.py
@@ -16,7 +16,7 @@
#
import numbers
-from typing import cast, Any, Union
+from typing import Any, Union
import pandas as pd
from pandas.api.types import CategoricalDtype
@@ -281,29 +281,19 @@ class BooleanOps(DataTypeOps):
return operand
def lt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
- from pyspark.pandas.base import column_op
-
return column_op(Column.__lt__)(left, right)
def le(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
- from pyspark.pandas.base import column_op
-
return column_op(Column.__le__)(left, right)
def ge(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
- from pyspark.pandas.base import column_op
-
return column_op(Column.__ge__)(left, right)
def gt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
- from pyspark.pandas.base import column_op
-
return column_op(Column.__gt__)(left, right)
def invert(self, operand: IndexOpsLike) -> IndexOpsLike:
- from pyspark.pandas.base import column_op
-
- return cast(IndexOpsLike, column_op(Column.__invert__)(operand))
+ return operand._with_new_scol(~operand.spark.column, field=operand._internal.data_fields[0])
class BooleanExtensionOps(BooleanOps):
diff --git a/python/pyspark/pandas/data_type_ops/num_ops.py b/python/pyspark/pandas/data_type_ops/num_ops.py
index 1c7f051..16cb305 100644
--- a/python/pyspark/pandas/data_type_ops/num_ops.py
+++ b/python/pyspark/pandas/data_type_ops/num_ops.py
@@ -122,9 +122,6 @@ class NumericOps(DataTypeOps):
right = transform_boolean_operand_to_numeric(right)
return column_op(rmod)(left, right)
- def invert(self, operand: IndexOpsLike) -> IndexOpsLike:
- return cast(IndexOpsLike, column_op(F.bitwise_not)(operand))
-
def neg(self, operand: IndexOpsLike) -> IndexOpsLike:
return cast(IndexOpsLike, column_op(Column.__neg__)(operand))
@@ -214,6 +211,11 @@ class IntegralOps(NumericOps):
right = transform_boolean_operand_to_numeric(right, spark_type=left.spark.data_type)
return numpy_column_op(rfloordiv)(left, right)
+ def invert(self, operand: IndexOpsLike) -> IndexOpsLike:
+ return operand._with_new_scol(
+ F.bitwise_not(operand.spark.column), field=operand._internal.data_fields[0]
+ )
+
def astype(self, index_ops: IndexOpsLike, dtype: Union[str, type, Dtype]) -> IndexOpsLike:
dtype, spark_type = pandas_on_spark_type(dtype)
@@ -304,9 +306,6 @@ class FractionalOps(NumericOps):
right = transform_boolean_operand_to_numeric(right, spark_type=left.spark.data_type)
return numpy_column_op(rfloordiv)(left, right)
- def invert(self, operand: IndexOpsLike) -> IndexOpsLike:
- raise TypeError("Unary ~ can not be applied to %s." % self.pretty_name)
-
def isnull(self, index_ops: IndexOpsLike) -> IndexOpsLike:
return index_ops._with_new_scol(
index_ops.spark.column.isNull() | F.isnan(index_ops.spark.column),
@@ -348,9 +347,6 @@ class DecimalOps(FractionalOps):
def pretty_name(self) -> str:
return "decimal"
- def invert(self, operand: IndexOpsLike) -> IndexOpsLike:
- raise TypeError("Unary ~ can not be applied to %s." % self.pretty_name)
-
def lt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
raise TypeError("< can not be applied to %s." % self.pretty_name)
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py
index 9a05c682..d1b26ec 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py
@@ -30,7 +30,7 @@ from pyspark.pandas.typedef.typehints import (
extension_dtypes_available,
extension_float_dtypes_available,
)
-from pyspark.sql.types import ByteType, DecimalType, IntegerType, LongType
+from pyspark.sql.types import DecimalType, IntegralType
from pyspark.testing.pandasutils import PandasOnSparkTestCase
@@ -328,7 +328,7 @@ class NumOpsTest(PandasOnSparkTestCase, TestCasesUtils):
def test_invert(self):
for pser, psser in self.numeric_pser_psser_pairs:
- if type(psser.spark.data_type) in [ByteType, IntegerType, LongType]:
+ if isinstance(psser.spark.data_type, IntegralType):
self.assert_eq(~pser, ~psser)
else:
self.assertRaises(TypeError, lambda: ~psser)
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org