You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2021/07/13 00:23:36 UTC

[spark] branch branch-3.2 updated: [SPARK-36103][PYTHON] Manage InternalField in DataTypeOps.invert

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.2 by this push:
     new 8d9758e  [SPARK-36103][PYTHON] Manage InternalField in DataTypeOps.invert
8d9758e is described below

commit 8d9758ee4682921be1be7306bf45d3c22fd43234
Author: Takuya UESHIN <ue...@databricks.com>
AuthorDate: Tue Jul 13 09:22:27 2021 +0900

    [SPARK-36103][PYTHON] Manage InternalField in DataTypeOps.invert
    
    ### What changes were proposed in this pull request?
    
    Properly set `InternalField` for `DataTypeOps.invert`.
    
    ### Why are the changes needed?
    
    The spark data type and nullability must be the same as the original when `DataTypeOps.invert`.
    We should manage `InternalField` for this case.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Existing tests.
    
    Closes #33306 from ueshin/issues/SPARK-36103/invert.
    
    Authored-by: Takuya UESHIN <ue...@databricks.com>
    Signed-off-by: Hyukjin Kwon <gu...@apache.org>
    (cherry picked from commit e2021daafbdca8549a84c3b0bd2292847b2e362e)
    Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
 python/pyspark/pandas/data_type_ops/boolean_ops.py        | 14 ++------------
 python/pyspark/pandas/data_type_ops/num_ops.py            | 14 +++++---------
 python/pyspark/pandas/tests/data_type_ops/test_num_ops.py |  4 ++--
 3 files changed, 9 insertions(+), 23 deletions(-)

diff --git a/python/pyspark/pandas/data_type_ops/boolean_ops.py b/python/pyspark/pandas/data_type_ops/boolean_ops.py
index 9ec295e..1f708ca 100644
--- a/python/pyspark/pandas/data_type_ops/boolean_ops.py
+++ b/python/pyspark/pandas/data_type_ops/boolean_ops.py
@@ -16,7 +16,7 @@
 #
 
 import numbers
-from typing import cast, Any, Union
+from typing import Any, Union
 
 import pandas as pd
 from pandas.api.types import CategoricalDtype
@@ -281,29 +281,19 @@ class BooleanOps(DataTypeOps):
         return operand
 
     def lt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
-        from pyspark.pandas.base import column_op
-
         return column_op(Column.__lt__)(left, right)
 
     def le(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
-        from pyspark.pandas.base import column_op
-
         return column_op(Column.__le__)(left, right)
 
     def ge(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
-        from pyspark.pandas.base import column_op
-
         return column_op(Column.__ge__)(left, right)
 
     def gt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
-        from pyspark.pandas.base import column_op
-
         return column_op(Column.__gt__)(left, right)
 
     def invert(self, operand: IndexOpsLike) -> IndexOpsLike:
-        from pyspark.pandas.base import column_op
-
-        return cast(IndexOpsLike, column_op(Column.__invert__)(operand))
+        return operand._with_new_scol(~operand.spark.column, field=operand._internal.data_fields[0])
 
 
 class BooleanExtensionOps(BooleanOps):
diff --git a/python/pyspark/pandas/data_type_ops/num_ops.py b/python/pyspark/pandas/data_type_ops/num_ops.py
index 1c7f051..16cb305 100644
--- a/python/pyspark/pandas/data_type_ops/num_ops.py
+++ b/python/pyspark/pandas/data_type_ops/num_ops.py
@@ -122,9 +122,6 @@ class NumericOps(DataTypeOps):
         right = transform_boolean_operand_to_numeric(right)
         return column_op(rmod)(left, right)
 
-    def invert(self, operand: IndexOpsLike) -> IndexOpsLike:
-        return cast(IndexOpsLike, column_op(F.bitwise_not)(operand))
-
     def neg(self, operand: IndexOpsLike) -> IndexOpsLike:
         return cast(IndexOpsLike, column_op(Column.__neg__)(operand))
 
@@ -214,6 +211,11 @@ class IntegralOps(NumericOps):
         right = transform_boolean_operand_to_numeric(right, spark_type=left.spark.data_type)
         return numpy_column_op(rfloordiv)(left, right)
 
+    def invert(self, operand: IndexOpsLike) -> IndexOpsLike:
+        return operand._with_new_scol(
+            F.bitwise_not(operand.spark.column), field=operand._internal.data_fields[0]
+        )
+
     def astype(self, index_ops: IndexOpsLike, dtype: Union[str, type, Dtype]) -> IndexOpsLike:
         dtype, spark_type = pandas_on_spark_type(dtype)
 
@@ -304,9 +306,6 @@ class FractionalOps(NumericOps):
         right = transform_boolean_operand_to_numeric(right, spark_type=left.spark.data_type)
         return numpy_column_op(rfloordiv)(left, right)
 
-    def invert(self, operand: IndexOpsLike) -> IndexOpsLike:
-        raise TypeError("Unary ~ can not be applied to %s." % self.pretty_name)
-
     def isnull(self, index_ops: IndexOpsLike) -> IndexOpsLike:
         return index_ops._with_new_scol(
             index_ops.spark.column.isNull() | F.isnan(index_ops.spark.column),
@@ -348,9 +347,6 @@ class DecimalOps(FractionalOps):
     def pretty_name(self) -> str:
         return "decimal"
 
-    def invert(self, operand: IndexOpsLike) -> IndexOpsLike:
-        raise TypeError("Unary ~ can not be applied to %s." % self.pretty_name)
-
     def lt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         raise TypeError("< can not be applied to %s." % self.pretty_name)
 
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py
index 9a05c682..d1b26ec 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py
@@ -30,7 +30,7 @@ from pyspark.pandas.typedef.typehints import (
     extension_dtypes_available,
     extension_float_dtypes_available,
 )
-from pyspark.sql.types import ByteType, DecimalType, IntegerType, LongType
+from pyspark.sql.types import DecimalType, IntegralType
 from pyspark.testing.pandasutils import PandasOnSparkTestCase
 
 
@@ -328,7 +328,7 @@ class NumOpsTest(PandasOnSparkTestCase, TestCasesUtils):
 
     def test_invert(self):
         for pser, psser in self.numeric_pser_psser_pairs:
-            if type(psser.spark.data_type) in [ByteType, IntegerType, LongType]:
+            if isinstance(psser.spark.data_type, IntegralType):
                 self.assert_eq(~pser, ~psser)
             else:
                 self.assertRaises(TypeError, lambda: ~psser)

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org