You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2020/05/27 03:17:39 UTC

[GitHub] [arrow] wesm commented on a change in pull request #7273: ARROW-5854: [Python] Expose compare kernels on Array class

wesm commented on a change in pull request #7273:
URL: https://github.com/apache/arrow/pull/7273#discussion_r430538863



##########
File path: python/pyarrow/tests/test_compute.py
##########
@@ -238,3 +238,52 @@ def test_filter_errors():
         with pytest.raises(pa.ArrowInvalid,
                            match="must all be the same length"):
             obj.filter(mask)
+
+
+def test_compare_array():
+
+    arr1 = pa.array([1, 2, 3, 4, None])
+    arr2 = pa.array([1, 1, 4, None, 4])
+
+    result = arr1 == arr2
+    assert result.equals(pa.array([True, False, False, None, None]))
+
+    result = arr1 != arr2
+    assert result.equals(pa.array([False, True, True, None, None]))
+
+    result = arr1 < arr2
+    assert result.equals(pa.array([False, False, True, None, None]))
+
+    result = arr1 <= arr2
+    assert result.equals(pa.array([True, False, True, None, None]))
+
+    result = arr1 > arr2
+    assert result.equals(pa.array([False, True, False, None, None]))
+
+    result = arr1 >= arr2
+    assert result.equals(pa.array([True, True, False, None, None]))
+
+
+def test_compare_scalar():
+
+    arr = pa.array([1, 2, 3, None])
+    # TODO this is a hacky way to construct a scalar ..
+    scalar = pa.array([2]).sum()
+
+    result = arr == scalar
+    assert result.equals(pa.array([False, True, False, None]))
+
+    result = arr != scalar
+    assert result.equals(pa.array([True, False, True, None]))
+
+    result = arr < scalar
+    assert result.equals(pa.array([True, False, False, None]))
+
+    result = arr <= scalar
+    assert result.equals(pa.array([True, True, False, None]))
+
+    result = arr > scalar
+    assert result.equals(pa.array([False, False, True, None]))
+
+    result = arr >= scalar
+    assert result.equals(pa.array([False, True, True, None]))

Review comment:
       Can you add a case to show comparing with a null scalar?

##########
File path: python/pyarrow/array.pxi
##########
@@ -602,14 +604,28 @@ cdef class Array(_PandasConvertible):
         self.ap = sp_array.get()
         self.type = pyarrow_wrap_data_type(self.sp_array.get().type())
 
-    def __eq__(self, other):
-        raise NotImplementedError('Comparisons with pyarrow.Array are not '
-                                  'implemented')
-
     def _debug_print(self):
         with nogil:
             check_status(DebugPrint(deref(self.ap), 0))
 
+    def __richcmp__(self, other, int op):
+        cdef str function_name
+
+        if op == Py_EQ:
+            function_name = "equal"
+        elif op == Py_NE:
+            function_name = "not_equal"
+        elif op == Py_GT:
+            function_name = "greater"
+        elif op == Py_GE:
+            function_name = "greater_equal"
+        elif op == Py_LT:
+            function_name = "less"
+        elif op == Py_LE:
+            function_name = "less_equal"
+
+        return _pc().call_function(function_name, [self, other])

Review comment:
       Maybe factor this out into a helper function to avoid the code duplication?

##########
File path: python/pyarrow/tests/test_compute.py
##########
@@ -238,3 +238,52 @@ def test_filter_errors():
         with pytest.raises(pa.ArrowInvalid,
                            match="must all be the same length"):
             obj.filter(mask)
+
+
+def test_compare_array():
+
+    arr1 = pa.array([1, 2, 3, 4, None])
+    arr2 = pa.array([1, 1, 4, None, 4])
+
+    result = arr1 == arr2
+    assert result.equals(pa.array([True, False, False, None, None]))
+
+    result = arr1 != arr2
+    assert result.equals(pa.array([False, True, True, None, None]))
+
+    result = arr1 < arr2
+    assert result.equals(pa.array([False, False, True, None, None]))
+
+    result = arr1 <= arr2
+    assert result.equals(pa.array([True, False, True, None, None]))
+
+    result = arr1 > arr2
+    assert result.equals(pa.array([False, True, False, None, None]))
+
+    result = arr1 >= arr2
+    assert result.equals(pa.array([True, True, False, None, None]))
+
+
+def test_compare_scalar():
+
+    arr = pa.array([1, 2, 3, None])
+    # TODO this is a hacky way to construct a scalar ..
+    scalar = pa.array([2]).sum()

Review comment:
       Is there a JIRA about adding a `pyarrow.scalar` function (in order to do `pa.scalar(2)` here)?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org