You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by jo...@apache.org on 2023/04/11 08:14:16 UTC
[arrow] branch main updated: GH-34944: [Python] Fix crash when converting non-sequence object with getitem in pa.array() (#34958)
This is an automated email from the ASF dual-hosted git repository.
jorisvandenbossche pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 61b89df503 GH-34944: [Python] Fix crash when converting non-sequence object with getitem in pa.array() (#34958)
61b89df503 is described below
commit 61b89df503c85e892c0b1b4f0d02b08b26f9eae1
Author: Joris Van den Bossche <jo...@gmail.com>
AuthorDate: Tue Apr 11 10:14:05 2023 +0200
GH-34944: [Python] Fix crash when converting non-sequence object with getitem in pa.array() (#34958)
### What changes are included in this PR?
Some python objects can pass the `PySequence_Check` without being "proper" sequences with a length, resulting in a subsequent `PySequence_Size` to fail, but we didn't check for python errors there, and so failed to properly raise this as a python exception.
* Closes: #34944
Authored-by: Joris Van den Bossche <jo...@gmail.com>
Signed-off-by: Joris Van den Bossche <jo...@gmail.com>
---
python/pyarrow/src/arrow/python/python_to_arrow.cc | 1 +
python/pyarrow/tests/test_convert_builtin.py | 12 ++++++++++++
2 files changed, 13 insertions(+)
diff --git a/python/pyarrow/src/arrow/python/python_to_arrow.cc b/python/pyarrow/src/arrow/python/python_to_arrow.cc
index 9e7f07ef81..2bb6a6f459 100644
--- a/python/pyarrow/src/arrow/python/python_to_arrow.cc
+++ b/python/pyarrow/src/arrow/python/python_to_arrow.cc
@@ -1099,6 +1099,7 @@ Status ConvertToSequenceAndInferSize(PyObject* obj, PyObject** seq, int64_t* siz
if (PySequence_Check(obj)) {
// obj is already a sequence
int64_t real_size = static_cast<int64_t>(PySequence_Size(obj));
+ RETURN_IF_PYERROR();
if (*size < 0) {
*size = real_size;
} else {
diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py
index b4ca93ee25..587b4c44d2 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -133,6 +133,18 @@ def test_failing_iterator():
pa.array((1 // 0 for x in range(10)), size=10)
+class ObjectWithOnlyGetitem:
+ def __getitem__(self, key):
+ return 3
+
+
+def test_object_with_getitem():
+ # https://github.com/apache/arrow/issues/34944
+ # considered as sequence because of __getitem__, but has no length
+ with pytest.raises(TypeError, match="has no len()"):
+ pa.array(ObjectWithOnlyGetitem())
+
+
def _as_list(xs):
return xs