You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by uw...@apache.org on 2017/04/27 16:10:28 UTC

arrow git commit: ARROW-866: [Python] Be robust to PyErr_Fetch returning a null exc value

Repository: arrow
Updated Branches:
  refs/heads/master 909f826b5 -> 81be9c667


ARROW-866: [Python] Be robust to PyErr_Fetch returning a null exc value

cc @BryanCutler. This was a tricky one. I am not sure how to reproduce with our current code -- I reverted the patch from ARROW-822 to get a reproduction so I could fix this. Now, the error raised is:

```
/home/wesm/code/arrow/python/pyarrow/_error.pyx in pyarrow._error.check_status (/home/wesm/code/arrow/python/build/temp.linux-x86_64-2.7/_error.cxx:1324)()
     58             raise ArrowInvalid(message)
     59         elif status.IsIOError():
---> 60             raise ArrowIOError(message)
     61         elif status.IsOutOfMemory():
     62             raise ArrowMemoryError(message)

ArrowIOError: IOError: Error message was null
```

I'm not sure why calling `tell` on the socket object results in a bad exception state, but in any case it seems that the result of `PyErr_Fetch` cannot be relied upon to be non-null even when `PyErr_Occurred()` returns non-null

Author: Wes McKinney <we...@twosigma.com>

Closes #606 from wesm/ARROW-866 and squashes the following commits:

fa395cd [Wes McKinney] Enable other kinds of Status errors to be returned
0bd11c2 [Wes McKinney] Consolidate error handling code a bit
9d59dd2 [Wes McKinney] Be robust to PyErr_Fetch returning a null exc value


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/81be9c66
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/81be9c66
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/81be9c66

Branch: refs/heads/master
Commit: 81be9c6679466177d4b8e5dbca55f81185bb3ec6
Parents: 909f826
Author: Wes McKinney <we...@twosigma.com>
Authored: Thu Apr 27 18:10:24 2017 +0200
Committer: Uwe L. Korn <uw...@xhochy.com>
Committed: Thu Apr 27 18:10:24 2017 +0200

----------------------------------------------------------------------
 cpp/src/arrow/python/common.cc | 22 ++++++++++++++++++++++
 cpp/src/arrow/python/common.h  | 29 ++++++++++++++---------------
 cpp/src/arrow/python/io.cc     | 29 +++++++----------------------
 cpp/src/arrow/status.h         |  3 +++
 4 files changed, 46 insertions(+), 37 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/81be9c66/cpp/src/arrow/python/common.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/common.cc b/cpp/src/arrow/python/common.cc
index 717cb5c..bedd458 100644
--- a/cpp/src/arrow/python/common.cc
+++ b/cpp/src/arrow/python/common.cc
@@ -64,5 +64,27 @@ PyBuffer::~PyBuffer() {
   Py_XDECREF(obj_);
 }
 
+Status CheckPyError(StatusCode code) {
+  if (PyErr_Occurred()) {
+    PyObject *exc_type, *exc_value, *traceback;
+    PyErr_Fetch(&exc_type, &exc_value, &traceback);
+    PyObjectStringify stringified(exc_value);
+    Py_XDECREF(exc_type);
+    Py_XDECREF(exc_value);
+    Py_XDECREF(traceback);
+    PyErr_Clear();
+
+    // ARROW-866: in some esoteric cases, formatting exc_value can fail. This
+    // was encountered when calling tell() on a socket file
+    if (stringified.bytes != nullptr) {
+      std::string message(stringified.bytes);
+      return Status(code, message);
+    } else {
+      return Status(code, "Error message was null");
+    }
+  }
+  return Status::OK();
+}
+
 }  // namespace py
 }  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/81be9c66/cpp/src/arrow/python/common.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/common.h b/cpp/src/arrow/python/common.h
index 0211823..c5745a5 100644
--- a/cpp/src/arrow/python/common.h
+++ b/cpp/src/arrow/python/common.h
@@ -98,27 +98,26 @@ struct ARROW_EXPORT PyObjectStringify {
     if (PyUnicode_Check(obj)) {
       bytes_obj = PyUnicode_AsUTF8String(obj);
       tmp_obj.reset(bytes_obj);
+      bytes = PyBytes_AsString(bytes_obj);
+      size = PyBytes_GET_SIZE(bytes_obj);
+    } else if (PyBytes_Check(obj)) {
+      bytes = PyBytes_AsString(obj);
+      size = PyBytes_GET_SIZE(obj);
     } else {
-      bytes_obj = obj;
+      bytes = nullptr;
+      size = -1;
     }
-    bytes = PyBytes_AsString(bytes_obj);
-    size = PyBytes_GET_SIZE(bytes_obj);
   }
 };
 
+Status CheckPyError(StatusCode code = StatusCode::UnknownError);
+
 // TODO(wesm): We can just let errors pass through. To be explored later
-#define RETURN_IF_PYERROR()                         \
-  if (PyErr_Occurred()) {                           \
-    PyObject *exc_type, *exc_value, *traceback;     \
-    PyErr_Fetch(&exc_type, &exc_value, &traceback); \
-    PyObjectStringify stringified(exc_value);       \
-    std::string message(stringified.bytes);         \
-    Py_DECREF(exc_type);                            \
-    Py_XDECREF(exc_value);                          \
-    Py_XDECREF(traceback);                          \
-    PyErr_Clear();                                  \
-    return Status::UnknownError(message);           \
-  }
+#define RETURN_IF_PYERROR()                     \
+  RETURN_NOT_OK(CheckPyError());
+
+#define PY_RETURN_IF_ERROR(CODE)                \
+  RETURN_NOT_OK(CheckPyError(CODE));
 
 // Return the common PyArrow memory pool
 ARROW_EXPORT void set_default_memory_pool(MemoryPool* pool);

http://git-wip-us.apache.org/repos/asf/arrow/blob/81be9c66/cpp/src/arrow/python/io.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/io.cc b/cpp/src/arrow/python/io.cc
index 327e8fe..a719385 100644
--- a/cpp/src/arrow/python/io.cc
+++ b/cpp/src/arrow/python/io.cc
@@ -41,21 +41,6 @@ PythonFile::~PythonFile() {
   Py_DECREF(file_);
 }
 
-static Status CheckPyError() {
-  if (PyErr_Occurred()) {
-    PyObject *exc_type, *exc_value, *traceback;
-    PyErr_Fetch(&exc_type, &exc_value, &traceback);
-    PyObjectStringify stringified(exc_value);
-    std::string message(stringified.bytes);
-    Py_XDECREF(exc_type);
-    Py_XDECREF(exc_value);
-    Py_XDECREF(traceback);
-    PyErr_Clear();
-    return Status::IOError(message);
-  }
-  return Status::OK();
-}
-
 // This is annoying: because C++11 does not allow implicit conversion of string
 // literals to non-const char*, we need to go through some gymnastics to use
 // PyObject_CallMethod without a lot of pain (its arguments are non-const
@@ -71,7 +56,7 @@ Status PythonFile::Close() {
   // whence: 0 for relative to start of file, 2 for end of file
   PyObject* result = cpp_PyObject_CallMethod(file_, "close", "()");
   Py_XDECREF(result);
-  ARROW_RETURN_NOT_OK(CheckPyError());
+  PY_RETURN_IF_ERROR(StatusCode::IOError);
   return Status::OK();
 }
 
@@ -79,13 +64,13 @@ Status PythonFile::Seek(int64_t position, int whence) {
   // whence: 0 for relative to start of file, 2 for end of file
   PyObject* result = cpp_PyObject_CallMethod(file_, "seek", "(ii)", position, whence);
   Py_XDECREF(result);
-  ARROW_RETURN_NOT_OK(CheckPyError());
+  PY_RETURN_IF_ERROR(StatusCode::IOError);
   return Status::OK();
 }
 
 Status PythonFile::Read(int64_t nbytes, PyObject** out) {
   PyObject* result = cpp_PyObject_CallMethod(file_, "read", "(i)", nbytes);
-  ARROW_RETURN_NOT_OK(CheckPyError());
+  PY_RETURN_IF_ERROR(StatusCode::IOError);
   *out = result;
   return Status::OK();
 }
@@ -93,24 +78,24 @@ Status PythonFile::Read(int64_t nbytes, PyObject** out) {
 Status PythonFile::Write(const uint8_t* data, int64_t nbytes) {
   PyObject* py_data =
       PyBytes_FromStringAndSize(reinterpret_cast<const char*>(data), nbytes);
-  ARROW_RETURN_NOT_OK(CheckPyError());
+  PY_RETURN_IF_ERROR(StatusCode::IOError);
 
   PyObject* result = cpp_PyObject_CallMethod(file_, "write", "(O)", py_data);
   Py_XDECREF(py_data);
   Py_XDECREF(result);
-  ARROW_RETURN_NOT_OK(CheckPyError());
+  PY_RETURN_IF_ERROR(StatusCode::IOError);
   return Status::OK();
 }
 
 Status PythonFile::Tell(int64_t* position) {
   PyObject* result = cpp_PyObject_CallMethod(file_, "tell", "()");
-  ARROW_RETURN_NOT_OK(CheckPyError());
+  PY_RETURN_IF_ERROR(StatusCode::IOError);
 
   *position = PyLong_AsLongLong(result);
   Py_DECREF(result);
 
   // PyLong_AsLongLong can raise OverflowError
-  ARROW_RETURN_NOT_OK(CheckPyError());
+  PY_RETURN_IF_ERROR(StatusCode::IOError);
 
   return Status::OK();
 }

http://git-wip-us.apache.org/repos/asf/arrow/blob/81be9c66/cpp/src/arrow/status.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/status.h b/cpp/src/arrow/status.h
index dd65b75..6a8cee2 100644
--- a/cpp/src/arrow/status.h
+++ b/cpp/src/arrow/status.h
@@ -91,6 +91,9 @@ class ARROW_EXPORT Status {
   Status() : state_(NULL) {}
   ~Status() { delete[] state_; }
 
+  Status(StatusCode code, const std::string& msg)
+    : Status(code, msg, -1) {}
+
   // Copy the specified status.
   Status(const Status& s);
   void operator=(const Status& s);