You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2022/10/26 12:36:54 UTC

[GitHub] [arrow-nanoarrow] jorisvandenbossche commented on a diff in pull request #62: [Python] Basic Array class wrapping C struct (with conversion to numpy)

jorisvandenbossche commented on code in PR #62:
URL: https://github.com/apache/arrow-nanoarrow/pull/62#discussion_r1005621109


##########
python/tests/test_nanoarrow.py:
##########
@@ -6,22 +8,64 @@
 import pytest
 
 
-def test_as_numpy_array():
-    
-    arr = pa.array([1, 2, 3])
-    result = nanoarrow.as_numpy_array(arr)
-    expected = arr.to_numpy()
+def test_array_from_pyarrow():
+    parr = pa.array([1, 2, 3])
+    result = nanoarrow.Array.from_pyarrow(parr)
+    assert result.format == "l"
+
+
+def test_array_to_numpy_lifetime():
+
+    parr = pa.array([1, 2, 3])
+    arr = nanoarrow.Array.from_pyarrow(parr)
+    refcount = sys.getrefcount(arr)
+    result = arr.to_numpy()
+    assert sys.getrefcount(arr) > refcount
+    assert result.base is arr
+    del arr
+    result
+    assert result.base
+
+
+def test_array_to_numpy():
+    parr = pa.array([1, 2, 3])
+    arr = nanoarrow.Array.from_pyarrow(parr)
+    result = arr.to_numpy()
+    expected = parr.to_numpy()
     np.testing.assert_array_equal(result, expected)
 
-    arr = pa.array([1, 2, 3], pa.uint8())
-    result = nanoarrow.as_numpy_array(arr)
-    expected = arr.to_numpy()
+    parr = pa.array([1, 2, 3], pa.uint8())
+    arr = nanoarrow.Array.from_pyarrow(parr)
+    result = arr.to_numpy()
+    expected = parr.to_numpy()
     np.testing.assert_array_equal(result, expected)
 
-    arr = pa.array([1, 2, None])
+    arr = nanoarrow.Array.from_pyarrow(pa.array([1, 2, None]))
     with pytest.raises(ValueError, match="Cannot convert array with nulls"):
-        nanoarrow.as_numpy_array(arr)
+        arr.to_numpy()
 
-    arr = pa.array([[1], [2, 3]])
+    arr = nanoarrow.Array.from_pyarrow(pa.array([[1], [2, 3]]))
     with pytest.raises(TypeError, match="Cannot convert a non-primitive array"):
-        nanoarrow.as_numpy_array(arr)
+       arr.to_numpy()
+
+
+def test_from_external_pointers():
+    pytest.importorskip("pyarrow.cffi")
+
+    from pyarrow.cffi import ffi
+
+    c_schema = ffi.new("struct ArrowSchema*")
+    ptr_schema = int(ffi.cast("uintptr_t", c_schema))
+    c_array = ffi.new("struct ArrowArray*")
+    ptr_array = int(ffi.cast("uintptr_t", c_array))
+
+    typ = pa.int32()
+    parr = pa.array([1, 2, 3], type=typ)
+    parr._export_to_c(ptr_array, ptr_schema)
+
+    arr = nanoarrow.Array.from_pointers(ptr_array, ptr_schema)
+    assert arr.to_numpy().tolist() == [1, 2, 3]
+
+    # trying to import second time should not cause a segfault? To enable

Review Comment:
   Yes, I think that's correct. Right now, with the raw pointers, it's the responsibility of the user passing around those pointers that they don't consume the pointers twice (as the consumer cannot know that there is another consumer of the same struct). 
   
   I think using PyCapsules instead of raw pointers could make this more robust. 
   But in practice to avoid this issue, I suppose we should "move" the array struct when constructing from external raw pointers, so the original pointer can be marked as released (release callback set to NULL)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org