You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by uw...@apache.org on 2018/04/21 13:24:21 UTC

[arrow] branch master updated: ARROW-2390: [C++/Python] Map Python exceptions to Arrow status codes

This is an automated email from the ASF dual-hosted git repository.

uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 4c71f30  ARROW-2390: [C++/Python] Map Python exceptions to Arrow status codes
4c71f30 is described below

commit 4c71f309041e37806dfb7abfe89d6a86a0fb2b97
Author: Antoine Pitrou <an...@python.org>
AuthorDate: Sat Apr 21 14:24:11 2018 +0100

    ARROW-2390: [C++/Python] Map Python exceptions to Arrow status codes
    
    Instead of always returning an "unknown error" status code when a Python exception is raised, we can easily discriminate and return e.g. a TypeError status code when a Python TypeError is raised, etc.
    
    Author: Antoine Pitrou <an...@python.org>
    
    Closes #1916 from pitrou/ARROW-2390-check-py-error and squashes the following commits:
    
    8f385b79 <Antoine Pitrou> Fix test on Python 2.7
    f4a5b01f <Antoine Pitrou> ARROW-2390:  Map Python exceptions to Arrow status codes
---
 cpp/src/arrow/python/common.cc               | 17 ++++++++++
 cpp/src/arrow/python/python-test.cc          | 48 +++++++++++++++++++++++++++-
 python/pyarrow/tests/test_convert_builtin.py |  4 +--
 python/pyarrow/tests/test_convert_pandas.py  | 40 +++++++++++------------
 python/pyarrow/tests/test_serialization.py   |  4 +--
 python/pyarrow/tests/test_table.py           |  2 +-
 6 files changed, 87 insertions(+), 28 deletions(-)

diff --git a/cpp/src/arrow/python/common.cc b/cpp/src/arrow/python/common.cc
index bd13f29..a565d00 100644
--- a/cpp/src/arrow/python/common.cc
+++ b/cpp/src/arrow/python/common.cc
@@ -98,6 +98,23 @@ Status CheckPyError(StatusCode code) {
 
     std::string message;
     RETURN_NOT_OK(internal::PyObject_StdStringStr(exc_value, &message));
+
+    if (code == StatusCode::UnknownError) {
+      // Try to match the Python exception type with an appropriate Status code
+      if (PyErr_GivenExceptionMatches(exc_type, PyExc_MemoryError)) {
+        code = StatusCode::OutOfMemory;
+      } else if (PyErr_GivenExceptionMatches(exc_type, PyExc_KeyError)) {
+        code = StatusCode::KeyError;
+      } else if (PyErr_GivenExceptionMatches(exc_type, PyExc_TypeError)) {
+        code = StatusCode::TypeError;
+      } else if (PyErr_GivenExceptionMatches(exc_type, PyExc_ValueError)) {
+        code = StatusCode::Invalid;
+      } else if (PyErr_GivenExceptionMatches(exc_type, PyExc_EnvironmentError)) {
+        code = StatusCode::IOError;
+      } else if (PyErr_GivenExceptionMatches(exc_type, PyExc_NotImplementedError)) {
+        code = StatusCode::NotImplemented;
+      }
+    }
     return Status(code, message);
   }
   return Status::OK();
diff --git a/cpp/src/arrow/python/python-test.cc b/cpp/src/arrow/python/python-test.cc
index 3ea814f..81d3fea 100644
--- a/cpp/src/arrow/python/python-test.cc
+++ b/cpp/src/arrow/python/python-test.cc
@@ -77,6 +77,52 @@ TEST(OwnedRefNoGIL, TestMoves) {
   ASSERT_EQ(Py_REFCNT(v), 1);
 }
 
+TEST(CheckPyError, TestStatus) {
+  PyAcquireGIL lock;
+  Status st;
+
+  auto check_error = [](Status& st, const char* expected_message = "some error") {
+    st = CheckPyError();
+    ASSERT_EQ(st.message(), expected_message);
+    ASSERT_FALSE(PyErr_Occurred());
+  };
+
+  for (PyObject* exc_type : {PyExc_Exception, PyExc_SyntaxError}) {
+    PyErr_SetString(exc_type, "some error");
+    check_error(st);
+    ASSERT_TRUE(st.IsUnknownError());
+  }
+
+  PyErr_SetString(PyExc_TypeError, "some error");
+  check_error(st);
+  ASSERT_TRUE(st.IsTypeError());
+
+  PyErr_SetString(PyExc_ValueError, "some error");
+  check_error(st);
+  ASSERT_TRUE(st.IsInvalid());
+
+  PyErr_SetString(PyExc_KeyError, "some error");
+  check_error(st, "'some error'");
+  ASSERT_TRUE(st.IsKeyError());
+
+  for (PyObject* exc_type : {PyExc_OSError, PyExc_IOError}) {
+    PyErr_SetString(exc_type, "some error");
+    check_error(st);
+    ASSERT_TRUE(st.IsIOError());
+  }
+
+  PyErr_SetString(PyExc_NotImplementedError, "some error");
+  check_error(st);
+  ASSERT_TRUE(st.IsNotImplemented());
+
+  // No override if a specific status code is given
+  PyErr_SetString(PyExc_TypeError, "some error");
+  st = CheckPyError(StatusCode::SerializationError);
+  ASSERT_TRUE(st.IsSerializationError());
+  ASSERT_EQ(st.message(), "some error");
+  ASSERT_FALSE(PyErr_Occurred());
+}
+
 class DecimalTest : public ::testing::Test {
  public:
   DecimalTest() : lock_(), decimal_constructor_() {
@@ -221,7 +267,7 @@ TEST(BuiltinConversionTest, TestMixedTypeFails) {
   ASSERT_EQ(PyList_SetItem(list, 1, integer), 0);
   ASSERT_EQ(PyList_SetItem(list, 2, doub), 0);
 
-  ASSERT_RAISES(UnknownError, ConvertPySequence(list, pool, &arr));
+  ASSERT_RAISES(TypeError, ConvertPySequence(list, pool, &arr));
 }
 
 TEST_F(DecimalTest, FromPythonDecimalRescaleNotTruncateable) {
diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py
index f21e567..a18d183 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -316,7 +316,7 @@ def test_sequence_utf8_to_unicode():
 
     # test a non-utf8 unicode string
     val = (u'maƱana').encode('utf-16-le')
-    with pytest.raises(pa.ArrowException):
+    with pytest.raises(pa.ArrowInvalid):
         pa.array([val], type=pa.string())
 
 
@@ -496,7 +496,7 @@ def test_sequence_mixed_nesting_levels():
 
 def test_sequence_mixed_types_fails():
     data = ['a', 1, 2.0]
-    with pytest.raises(pa.ArrowException):
+    with pytest.raises(pa.ArrowTypeError):
         pa.array(data)
 
 
diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py
index 6970975..c43c29d 100644
--- a/python/pyarrow/tests/test_convert_pandas.py
+++ b/python/pyarrow/tests/test_convert_pandas.py
@@ -291,10 +291,10 @@ class TestConvertMetadata(object):
             batch = pa.RecordBatch.from_arrays([arr], ['foo'])
             table = pa.Table.from_batches([batch, batch, batch])
 
-            with pytest.raises(pa.ArrowException):
+            with pytest.raises(pa.ArrowInvalid):
                 arr.to_pandas()
 
-            with pytest.raises(pa.ArrowException):
+            with pytest.raises(pa.ArrowInvalid):
                 table.to_pandas()
 
     def test_unicode_with_unicode_column_and_index(self):
@@ -1238,7 +1238,7 @@ class TestConvertStringLikeTypes(object):
     # cannot be converted to utf-8
     def test_array_of_bytes_to_strings_bad_data(self):
         with pytest.raises(
-                pa.lib.ArrowException,
+                pa.lib.ArrowInvalid,
                 message="Unknown error: 'utf-8' codec can't decode byte 0x80 "
                 "in position 0: invalid start byte"):
             pa.array(np.array([b'\x80\x81'], dtype=object), pa.string())
@@ -1321,12 +1321,12 @@ class TestConvertDecimalTypes(object):
     def test_decimal_fails_with_truncation(self):
         data1 = [decimal.Decimal('1.234')]
         type1 = pa.decimal128(10, 2)
-        with pytest.raises(pa.ArrowException):
+        with pytest.raises(pa.ArrowInvalid):
             pa.array(data1, type=type1)
 
         data2 = [decimal.Decimal('1.2345')]
         type2 = pa.decimal128(10, 3)
-        with pytest.raises(pa.ArrowException):
+        with pytest.raises(pa.ArrowInvalid):
             pa.array(data2, type=type2)
 
     def test_decimal_with_different_precisions(self):
@@ -1737,33 +1737,29 @@ class TestZeroCopyConversion(object):
         tm.assert_series_equal(pd.Series(result), pd.Series(values),
                                check_names=False)
 
+    def check_zero_copy_failure(self, arr):
+        with pytest.raises(pa.ArrowInvalid):
+            arr.to_pandas(zero_copy_only=True)
+
     def test_zero_copy_failure_on_object_types(self):
-        with pytest.raises(pa.ArrowException):
-            pa.array(['A', 'B', 'C']).to_pandas(zero_copy_only=True)
+        self.check_zero_copy_failure(pa.array(['A', 'B', 'C']))
 
     def test_zero_copy_failure_with_int_when_nulls(self):
-        with pytest.raises(pa.ArrowException):
-            pa.array([0, 1, None]).to_pandas(zero_copy_only=True)
+        self.check_zero_copy_failure(pa.array([0, 1, None]))
 
     def test_zero_copy_failure_with_float_when_nulls(self):
-        with pytest.raises(pa.ArrowException):
-            pa.array([0.0, 1.0, None]).to_pandas(zero_copy_only=True)
+        self.check_zero_copy_failure(pa.array([0.0, 1.0, None]))
 
     def test_zero_copy_failure_on_bool_types(self):
-        with pytest.raises(pa.ArrowException):
-            pa.array([True, False]).to_pandas(zero_copy_only=True)
+        self.check_zero_copy_failure(pa.array([True, False]))
 
     def test_zero_copy_failure_on_list_types(self):
-        arr = np.array([[1, 2], [8, 9]], dtype=object)
-
-        with pytest.raises(pa.ArrowException):
-            pa.array(arr).to_pandas(zero_copy_only=True)
+        arr = pa.array([[1, 2], [8, 9]], type=pa.list_(pa.int64()))
+        self.check_zero_copy_failure(arr)
 
     def test_zero_copy_failure_on_timestamp_types(self):
         arr = np.array(['2007-07-13'], dtype='datetime64[ns]')
-
-        with pytest.raises(pa.ArrowException):
-            pa.array(arr).to_pandas(zero_copy_only=True)
+        self.check_zero_copy_failure(pa.array(arr))
 
 
 class TestConvertMisc(object):
@@ -1843,11 +1839,11 @@ class TestConvertMisc(object):
 
     def test_mixed_types_fails(self):
         data = pd.DataFrame({'a': ['a', 1, 2.0]})
-        with pytest.raises(pa.ArrowException):
+        with pytest.raises(pa.ArrowTypeError):
             pa.Table.from_pandas(data)
 
         data = pd.DataFrame({'a': [1, True]})
-        with pytest.raises(pa.ArrowException):
+        with pytest.raises(pa.ArrowTypeError):
             pa.Table.from_pandas(data)
 
     def test_strided_data_import(self):
diff --git a/python/pyarrow/tests/test_serialization.py b/python/pyarrow/tests/test_serialization.py
index ba32330..f288611 100644
--- a/python/pyarrow/tests/test_serialization.py
+++ b/python/pyarrow/tests/test_serialization.py
@@ -592,7 +592,7 @@ def test_serialize_to_components_invalid_cases():
         'data': [buf]
     }
 
-    with pytest.raises(pa.ArrowException):
+    with pytest.raises(pa.ArrowInvalid):
         pa.deserialize_components(components)
 
     components = {
@@ -601,7 +601,7 @@ def test_serialize_to_components_invalid_cases():
         'data': [buf, buf]
     }
 
-    with pytest.raises(pa.ArrowException):
+    with pytest.raises(pa.ArrowInvalid):
         pa.deserialize_components(components)
 
 
diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
index 5303cb2..57be9b2 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -234,7 +234,7 @@ def test_recordbatchlist_schema_equals():
     batch1 = pa.RecordBatch.from_pandas(data1)
     batch2 = pa.RecordBatch.from_pandas(data2)
 
-    with pytest.raises(pa.ArrowException):
+    with pytest.raises(pa.ArrowInvalid):
         pa.Table.from_batches([batch1, batch2])
 
 

-- 
To stop receiving notification emails like this one, please contact
uwe@apache.org.