You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2024/01/11 18:00:56 UTC
(arrow) branch main updated: GH-37164: [Python] Attach Python stacktrace to errors in `ConvertPyError` (#39380)
This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 6fe7480125 GH-37164: [Python] Attach Python stacktrace to errors in `ConvertPyError` (#39380)
6fe7480125 is described below
commit 6fe7480125b7fdb3a000a27fcc9cf464697b8a60
Author: Will Jones <wi...@gmail.com>
AuthorDate: Thu Jan 11 10:00:49 2024 -0800
GH-37164: [Python] Attach Python stacktrace to errors in `ConvertPyError` (#39380)
### Rationale for this change
Users might define Python generators that are used in RecordBatchReaders and then exported through the C Data Interface. However, if an error occurs in their generator, the stacktrace and message are currently swallowed in the current `ConvertPyError` implementation, which only provides the type of error. This makes debugging code that passed RBRs difficult.
### What changes are included in this PR?
Changes `ConvertPyError` to provide the fully formatted traceback in the error message.
### Are these changes tested?
Yes, added one test to validate the errors messages are propagated.
### Are there any user-facing changes?
This is a minor change in the error reporting behavior, which will provide more information.
* Closes: #37164
Authored-by: Will Jones <wi...@gmail.com>
Signed-off-by: Antoine Pitrou <an...@python.org>
---
python/pyarrow/src/arrow/python/common.cc | 49 ++++++++++++++++++++++++--
python/pyarrow/src/arrow/python/python_test.cc | 17 ++++++---
python/pyarrow/tests/test_cffi.py | 34 ++++++++++++++++++
3 files changed, 92 insertions(+), 8 deletions(-)
diff --git a/python/pyarrow/src/arrow/python/common.cc b/python/pyarrow/src/arrow/python/common.cc
index 6fe2ed4dae..2f44a9122f 100644
--- a/python/pyarrow/src/arrow/python/common.cc
+++ b/python/pyarrow/src/arrow/python/common.cc
@@ -19,6 +19,7 @@
#include <cstdlib>
#include <mutex>
+#include <sstream>
#include <string>
#include "arrow/memory_pool.h"
@@ -90,9 +91,15 @@ class PythonErrorDetail : public StatusDetail {
std::string ToString() const override {
// This is simple enough not to need the GIL
- const auto ty = reinterpret_cast<const PyTypeObject*>(exc_type_.obj());
- // XXX Should we also print traceback?
- return std::string("Python exception: ") + ty->tp_name;
+ Result<std::string> result = FormatImpl();
+
+ if (result.ok()) {
+ return result.ValueOrDie();
+ } else {
+ // Fallback to just the exception type
+ const auto ty = reinterpret_cast<const PyTypeObject*>(exc_type_.obj());
+ return std::string("Python exception: ") + ty->tp_name;
+ }
}
void RestorePyError() const {
@@ -131,6 +138,42 @@ class PythonErrorDetail : public StatusDetail {
}
protected:
+ Result<std::string> FormatImpl() const {
+ PyAcquireGIL lock;
+
+ // Use traceback.format_exception()
+ OwnedRef traceback_module;
+ RETURN_NOT_OK(internal::ImportModule("traceback", &traceback_module));
+
+ OwnedRef fmt_exception;
+ RETURN_NOT_OK(internal::ImportFromModule(traceback_module.obj(), "format_exception",
+ &fmt_exception));
+
+ OwnedRef formatted;
+ formatted.reset(PyObject_CallFunctionObjArgs(fmt_exception.obj(), exc_type_.obj(),
+ exc_value_.obj(), exc_traceback_.obj(),
+ NULL));
+ RETURN_IF_PYERROR();
+
+ std::stringstream ss;
+ ss << "Python exception: ";
+ Py_ssize_t num_lines = PySequence_Length(formatted.obj());
+ RETURN_IF_PYERROR();
+
+ for (Py_ssize_t i = 0; i < num_lines; ++i) {
+ Py_ssize_t line_size;
+
+ PyObject* line = PySequence_GetItem(formatted.obj(), i);
+ RETURN_IF_PYERROR();
+
+ const char* data = PyUnicode_AsUTF8AndSize(line, &line_size);
+ RETURN_IF_PYERROR();
+
+ ss << std::string_view(data, line_size);
+ }
+ return ss.str();
+ }
+
PythonErrorDetail() = default;
OwnedRefNoGIL exc_type_, exc_value_, exc_traceback_;
diff --git a/python/pyarrow/src/arrow/python/python_test.cc b/python/pyarrow/src/arrow/python/python_test.cc
index 01ab8a3038..746bf41091 100644
--- a/python/pyarrow/src/arrow/python/python_test.cc
+++ b/python/pyarrow/src/arrow/python/python_test.cc
@@ -174,10 +174,14 @@ Status TestOwnedRefNoGILMoves() {
}
}
-std::string FormatPythonException(const std::string& exc_class_name) {
+std::string FormatPythonException(const std::string& exc_class_name,
+ const std::string& exc_value) {
std::stringstream ss;
ss << "Python exception: ";
ss << exc_class_name;
+ ss << ": ";
+ ss << exc_value;
+ ss << "\n";
return ss.str();
}
@@ -205,7 +209,8 @@ Status TestCheckPyErrorStatus() {
}
PyErr_SetString(PyExc_TypeError, "some error");
- ASSERT_OK(check_error(st, "some error", FormatPythonException("TypeError")));
+ ASSERT_OK(
+ check_error(st, "some error", FormatPythonException("TypeError", "some error")));
ASSERT_TRUE(st.IsTypeError());
PyErr_SetString(PyExc_ValueError, "some error");
@@ -223,7 +228,8 @@ Status TestCheckPyErrorStatus() {
}
PyErr_SetString(PyExc_NotImplementedError, "some error");
- ASSERT_OK(check_error(st, "some error", FormatPythonException("NotImplementedError")));
+ ASSERT_OK(check_error(st, "some error",
+ FormatPythonException("NotImplementedError", "some error")));
ASSERT_TRUE(st.IsNotImplemented());
// No override if a specific status code is given
@@ -246,7 +252,8 @@ Status TestCheckPyErrorStatusNoGIL() {
lock.release();
ASSERT_TRUE(st.IsUnknownError());
ASSERT_EQ(st.message(), "zzzt");
- ASSERT_EQ(st.detail()->ToString(), FormatPythonException("ZeroDivisionError"));
+ ASSERT_EQ(st.detail()->ToString(),
+ FormatPythonException("ZeroDivisionError", "zzzt"));
return Status::OK();
}
}
@@ -257,7 +264,7 @@ Status TestRestorePyErrorBasics() {
ASSERT_FALSE(PyErr_Occurred());
ASSERT_TRUE(st.IsUnknownError());
ASSERT_EQ(st.message(), "zzzt");
- ASSERT_EQ(st.detail()->ToString(), FormatPythonException("ZeroDivisionError"));
+ ASSERT_EQ(st.detail()->ToString(), FormatPythonException("ZeroDivisionError", "zzzt"));
RestorePyError(st);
ASSERT_TRUE(PyErr_Occurred());
diff --git a/python/pyarrow/tests/test_cffi.py b/python/pyarrow/tests/test_cffi.py
index a9c17cc100..ff81b06440 100644
--- a/python/pyarrow/tests/test_cffi.py
+++ b/python/pyarrow/tests/test_cffi.py
@@ -414,6 +414,40 @@ def test_export_import_batch_reader(reader_factory):
pa.RecordBatchReader._import_from_c(ptr_stream)
+@needs_cffi
+def test_export_import_exception_reader():
+ # See: https://github.com/apache/arrow/issues/37164
+ c_stream = ffi.new("struct ArrowArrayStream*")
+ ptr_stream = int(ffi.cast("uintptr_t", c_stream))
+
+ gc.collect() # Make sure no Arrow data dangles in a ref cycle
+ old_allocated = pa.total_allocated_bytes()
+
+ def gen():
+ if True:
+ try:
+ raise ValueError('foo')
+ except ValueError as e:
+ raise NotImplementedError('bar') from e
+ else:
+ yield from make_batches()
+
+ original = pa.RecordBatchReader.from_batches(make_schema(), gen())
+ original._export_to_c(ptr_stream)
+
+ reader = pa.RecordBatchReader._import_from_c(ptr_stream)
+ with pytest.raises(OSError) as exc_info:
+ reader.read_next_batch()
+
+ # inner *and* outer exception should be present
+ assert 'ValueError: foo' in str(exc_info.value)
+ assert 'NotImplementedError: bar' in str(exc_info.value)
+ # Stacktrace containing line of the raise statement
+ assert 'raise ValueError(\'foo\')' in str(exc_info.value)
+
+ assert pa.total_allocated_bytes() == old_allocated
+
+
@needs_cffi
def test_imported_batch_reader_error():
c_stream = ffi.new("struct ArrowArrayStream*")