You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2021/07/12 06:11:17 UTC

[spark] branch branch-3.2 updated: [SPARK-36003][PYTHON] Implement unary operator `invert` of integral ps.Series/Index

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.2 by this push:
     new 606a99c  [SPARK-36003][PYTHON] Implement unary operator `invert` of integral ps.Series/Index
606a99c is described below

commit 606a99c01e84889109cfaf643a0f59075cc4d02d
Author: Xinrong Meng <xi...@databricks.com>
AuthorDate: Mon Jul 12 15:10:06 2021 +0900

    [SPARK-36003][PYTHON] Implement unary operator `invert` of integral ps.Series/Index
    
    ### What changes were proposed in this pull request?
    Implement unary operator `invert` of integral ps.Series/Index.
    
    ### Why are the changes needed?
    Currently, unary operator `invert` of integral ps.Series/Index is not supported. We ought to implement that following pandas' behaviors.
    
    ### Does this PR introduce _any_ user-facing change?
    Yes.
    Before:
    ```py
    >>> import pyspark.pandas as ps
    >>> psser = ps.Series([1, 2, 3])
    >>> ~psser
    Traceback (most recent call last):
    ...
    NotImplementedError: Unary ~ can not be applied to integrals.
    ```
    
    After:
    ```py
    >>> import pyspark.pandas as ps
    >>> psser = ps.Series([1, 2, 3])
    >>> ~psser
    0   -2
    1   -3
    2   -4
    dtype: int64
    ```
    
    ### How was this patch tested?
    Unit tests.
    
    Closes #33285 from xinrong-databricks/numeric_invert.
    
    Authored-by: Xinrong Meng <xi...@databricks.com>
    Signed-off-by: Hyukjin Kwon <gu...@apache.org>
    (cherry picked from commit badb0393d46d7aef90710e51e233fb5077977423)
    Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
 python/pyspark/pandas/data_type_ops/num_ops.py         | 18 ++++--------------
 .../pyspark/pandas/tests/data_type_ops/test_num_ops.py | 14 +++++++-------
 2 files changed, 11 insertions(+), 21 deletions(-)

diff --git a/python/pyspark/pandas/data_type_ops/num_ops.py b/python/pyspark/pandas/data_type_ops/num_ops.py
index ed089e5..1c7f051 100644
--- a/python/pyspark/pandas/data_type_ops/num_ops.py
+++ b/python/pyspark/pandas/data_type_ops/num_ops.py
@@ -122,38 +122,25 @@ class NumericOps(DataTypeOps):
         right = transform_boolean_operand_to_numeric(right)
         return column_op(rmod)(left, right)
 
-    # TODO(SPARK-36003): Implement unary operator `invert` as below
     def invert(self, operand: IndexOpsLike) -> IndexOpsLike:
-        raise NotImplementedError("Unary ~ can not be applied to %s." % self.pretty_name)
+        return cast(IndexOpsLike, column_op(F.bitwise_not)(operand))
 
     def neg(self, operand: IndexOpsLike) -> IndexOpsLike:
-        from pyspark.pandas.base import column_op
-
         return cast(IndexOpsLike, column_op(Column.__neg__)(operand))
 
     def abs(self, operand: IndexOpsLike) -> IndexOpsLike:
-        from pyspark.pandas.base import column_op
-
         return cast(IndexOpsLike, column_op(F.abs)(operand))
 
     def lt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
-        from pyspark.pandas.base import column_op
-
         return column_op(Column.__lt__)(left, right)
 
     def le(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
-        from pyspark.pandas.base import column_op
-
         return column_op(Column.__le__)(left, right)
 
     def ge(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
-        from pyspark.pandas.base import column_op
-
         return column_op(Column.__ge__)(left, right)
 
     def gt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
-        from pyspark.pandas.base import column_op
-
         return column_op(Column.__gt__)(left, right)
 
 
@@ -317,6 +304,9 @@ class FractionalOps(NumericOps):
         right = transform_boolean_operand_to_numeric(right, spark_type=left.spark.data_type)
         return numpy_column_op(rfloordiv)(left, right)
 
+    def invert(self, operand: IndexOpsLike) -> IndexOpsLike:
+        raise TypeError("Unary ~ can not be applied to %s." % self.pretty_name)
+
     def isnull(self, index_ops: IndexOpsLike) -> IndexOpsLike:
         return index_ops._with_new_scol(
             index_ops.spark.column.isNull() | F.isnan(index_ops.spark.column),
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py
index bbacc7d..9a05c682 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py
@@ -30,7 +30,7 @@ from pyspark.pandas.typedef.typehints import (
     extension_dtypes_available,
     extension_float_dtypes_available,
 )
-from pyspark.sql.types import DecimalType
+from pyspark.sql.types import ByteType, DecimalType, IntegerType, LongType
 from pyspark.testing.pandasutils import PandasOnSparkTestCase
 
 
@@ -327,9 +327,9 @@ class NumOpsTest(PandasOnSparkTestCase, TestCasesUtils):
             self.assert_eq(abs(pser), abs(psser))
 
     def test_invert(self):
-        for psser in self.numeric_pssers:
-            if not isinstance(psser.spark.data_type, DecimalType):
-                self.assertRaises(NotImplementedError, lambda: ~psser)
+        for pser, psser in self.numeric_pser_psser_pairs:
+            if type(psser.spark.data_type) in [ByteType, IntegerType, LongType]:
+                self.assert_eq(~pser, ~psser)
             else:
                 self.assertRaises(TypeError, lambda: ~psser)
 
@@ -426,8 +426,8 @@ class IntegralExtensionOpsTest(PandasOnSparkTestCase, TestCasesUtils):
             self.check_extension(abs(pser), abs(psser))
 
     def test_invert(self):
-        for psser in self.intergral_extension_pssers:
-            self.assertRaises(NotImplementedError, lambda: ~psser)
+        for pser, psser in self.intergral_extension_pser_psser_pairs:
+            self.check_extension(~pser, ~psser)
 
     def test_eq(self):
         with option_context("compute.ops_on_diff_frames", True):
@@ -507,7 +507,7 @@ class FractionalExtensionOpsTest(PandasOnSparkTestCase, TestCasesUtils):
 
     def test_invert(self):
         for psser in self.fractional_extension_pssers:
-            self.assertRaises(NotImplementedError, lambda: ~psser)
+            self.assertRaises(TypeError, lambda: ~psser)
 
     def test_eq(self):
         with option_context("compute.ops_on_diff_frames", True):

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org