You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by jo...@apache.org on 2023/04/11 08:14:16 UTC

[arrow] branch main updated: GH-34944: [Python] Fix crash when converting non-sequence object with getitem in pa.array() (#34958)

This is an automated email from the ASF dual-hosted git repository.

jorisvandenbossche pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 61b89df503 GH-34944: [Python] Fix crash when converting non-sequence object with getitem in pa.array() (#34958)
61b89df503 is described below

commit 61b89df503c85e892c0b1b4f0d02b08b26f9eae1
Author: Joris Van den Bossche <jo...@gmail.com>
AuthorDate: Tue Apr 11 10:14:05 2023 +0200

    GH-34944: [Python] Fix crash when converting non-sequence object with getitem in pa.array() (#34958)
    
    ### What changes are included in this PR?
    
    Some python objects can pass the `PySequence_Check` without being "proper" sequences with a length, resulting in a subsequent `PySequence_Size` to fail, but we didn't check for python errors there, and so failed to properly raise this as a python exception.
    * Closes: #34944
    
    Authored-by: Joris Van den Bossche <jo...@gmail.com>
    Signed-off-by: Joris Van den Bossche <jo...@gmail.com>
---
 python/pyarrow/src/arrow/python/python_to_arrow.cc |  1 +
 python/pyarrow/tests/test_convert_builtin.py       | 12 ++++++++++++
 2 files changed, 13 insertions(+)

diff --git a/python/pyarrow/src/arrow/python/python_to_arrow.cc b/python/pyarrow/src/arrow/python/python_to_arrow.cc
index 9e7f07ef81..2bb6a6f459 100644
--- a/python/pyarrow/src/arrow/python/python_to_arrow.cc
+++ b/python/pyarrow/src/arrow/python/python_to_arrow.cc
@@ -1099,6 +1099,7 @@ Status ConvertToSequenceAndInferSize(PyObject* obj, PyObject** seq, int64_t* siz
   if (PySequence_Check(obj)) {
     // obj is already a sequence
     int64_t real_size = static_cast<int64_t>(PySequence_Size(obj));
+    RETURN_IF_PYERROR();
     if (*size < 0) {
       *size = real_size;
     } else {
diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py
index b4ca93ee25..587b4c44d2 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -133,6 +133,18 @@ def test_failing_iterator():
         pa.array((1 // 0 for x in range(10)), size=10)
 
 
+class ObjectWithOnlyGetitem:
+    def __getitem__(self, key):
+        return 3
+
+
+def test_object_with_getitem():
+    # https://github.com/apache/arrow/issues/34944
+    # considered as sequence because of __getitem__, but has no length
+    with pytest.raises(TypeError, match="has no len()"):
+        pa.array(ObjectWithOnlyGetitem())
+
+
 def _as_list(xs):
     return xs