You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by uw...@apache.org on 2018/04/21 13:24:21 UTC
[arrow] branch master updated: ARROW-2390: [C++/Python] Map Python
exceptions to Arrow status codes
This is an automated email from the ASF dual-hosted git repository.
uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 4c71f30 ARROW-2390: [C++/Python] Map Python exceptions to Arrow status codes
4c71f30 is described below
commit 4c71f309041e37806dfb7abfe89d6a86a0fb2b97
Author: Antoine Pitrou <an...@python.org>
AuthorDate: Sat Apr 21 14:24:11 2018 +0100
ARROW-2390: [C++/Python] Map Python exceptions to Arrow status codes
Instead of always returning an "unknown error" status code when a Python exception is raised, we can easily discriminate and return e.g. a TypeError status code when a Python TypeError is raised, etc.
Author: Antoine Pitrou <an...@python.org>
Closes #1916 from pitrou/ARROW-2390-check-py-error and squashes the following commits:
8f385b79 <Antoine Pitrou> Fix test on Python 2.7
f4a5b01f <Antoine Pitrou> ARROW-2390: Map Python exceptions to Arrow status codes
---
cpp/src/arrow/python/common.cc | 17 ++++++++++
cpp/src/arrow/python/python-test.cc | 48 +++++++++++++++++++++++++++-
python/pyarrow/tests/test_convert_builtin.py | 4 +--
python/pyarrow/tests/test_convert_pandas.py | 40 +++++++++++------------
python/pyarrow/tests/test_serialization.py | 4 +--
python/pyarrow/tests/test_table.py | 2 +-
6 files changed, 87 insertions(+), 28 deletions(-)
diff --git a/cpp/src/arrow/python/common.cc b/cpp/src/arrow/python/common.cc
index bd13f29..a565d00 100644
--- a/cpp/src/arrow/python/common.cc
+++ b/cpp/src/arrow/python/common.cc
@@ -98,6 +98,23 @@ Status CheckPyError(StatusCode code) {
std::string message;
RETURN_NOT_OK(internal::PyObject_StdStringStr(exc_value, &message));
+
+ if (code == StatusCode::UnknownError) {
+ // Try to match the Python exception type with an appropriate Status code
+ if (PyErr_GivenExceptionMatches(exc_type, PyExc_MemoryError)) {
+ code = StatusCode::OutOfMemory;
+ } else if (PyErr_GivenExceptionMatches(exc_type, PyExc_KeyError)) {
+ code = StatusCode::KeyError;
+ } else if (PyErr_GivenExceptionMatches(exc_type, PyExc_TypeError)) {
+ code = StatusCode::TypeError;
+ } else if (PyErr_GivenExceptionMatches(exc_type, PyExc_ValueError)) {
+ code = StatusCode::Invalid;
+ } else if (PyErr_GivenExceptionMatches(exc_type, PyExc_EnvironmentError)) {
+ code = StatusCode::IOError;
+ } else if (PyErr_GivenExceptionMatches(exc_type, PyExc_NotImplementedError)) {
+ code = StatusCode::NotImplemented;
+ }
+ }
return Status(code, message);
}
return Status::OK();
diff --git a/cpp/src/arrow/python/python-test.cc b/cpp/src/arrow/python/python-test.cc
index 3ea814f..81d3fea 100644
--- a/cpp/src/arrow/python/python-test.cc
+++ b/cpp/src/arrow/python/python-test.cc
@@ -77,6 +77,52 @@ TEST(OwnedRefNoGIL, TestMoves) {
ASSERT_EQ(Py_REFCNT(v), 1);
}
+TEST(CheckPyError, TestStatus) {
+ PyAcquireGIL lock;
+ Status st;
+
+ auto check_error = [](Status& st, const char* expected_message = "some error") {
+ st = CheckPyError();
+ ASSERT_EQ(st.message(), expected_message);
+ ASSERT_FALSE(PyErr_Occurred());
+ };
+
+ for (PyObject* exc_type : {PyExc_Exception, PyExc_SyntaxError}) {
+ PyErr_SetString(exc_type, "some error");
+ check_error(st);
+ ASSERT_TRUE(st.IsUnknownError());
+ }
+
+ PyErr_SetString(PyExc_TypeError, "some error");
+ check_error(st);
+ ASSERT_TRUE(st.IsTypeError());
+
+ PyErr_SetString(PyExc_ValueError, "some error");
+ check_error(st);
+ ASSERT_TRUE(st.IsInvalid());
+
+ PyErr_SetString(PyExc_KeyError, "some error");
+ check_error(st, "'some error'");
+ ASSERT_TRUE(st.IsKeyError());
+
+ for (PyObject* exc_type : {PyExc_OSError, PyExc_IOError}) {
+ PyErr_SetString(exc_type, "some error");
+ check_error(st);
+ ASSERT_TRUE(st.IsIOError());
+ }
+
+ PyErr_SetString(PyExc_NotImplementedError, "some error");
+ check_error(st);
+ ASSERT_TRUE(st.IsNotImplemented());
+
+ // No override if a specific status code is given
+ PyErr_SetString(PyExc_TypeError, "some error");
+ st = CheckPyError(StatusCode::SerializationError);
+ ASSERT_TRUE(st.IsSerializationError());
+ ASSERT_EQ(st.message(), "some error");
+ ASSERT_FALSE(PyErr_Occurred());
+}
+
class DecimalTest : public ::testing::Test {
public:
DecimalTest() : lock_(), decimal_constructor_() {
@@ -221,7 +267,7 @@ TEST(BuiltinConversionTest, TestMixedTypeFails) {
ASSERT_EQ(PyList_SetItem(list, 1, integer), 0);
ASSERT_EQ(PyList_SetItem(list, 2, doub), 0);
- ASSERT_RAISES(UnknownError, ConvertPySequence(list, pool, &arr));
+ ASSERT_RAISES(TypeError, ConvertPySequence(list, pool, &arr));
}
TEST_F(DecimalTest, FromPythonDecimalRescaleNotTruncateable) {
diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py
index f21e567..a18d183 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -316,7 +316,7 @@ def test_sequence_utf8_to_unicode():
# test a non-utf8 unicode string
val = (u'maƱana').encode('utf-16-le')
- with pytest.raises(pa.ArrowException):
+ with pytest.raises(pa.ArrowInvalid):
pa.array([val], type=pa.string())
@@ -496,7 +496,7 @@ def test_sequence_mixed_nesting_levels():
def test_sequence_mixed_types_fails():
data = ['a', 1, 2.0]
- with pytest.raises(pa.ArrowException):
+ with pytest.raises(pa.ArrowTypeError):
pa.array(data)
diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py
index 6970975..c43c29d 100644
--- a/python/pyarrow/tests/test_convert_pandas.py
+++ b/python/pyarrow/tests/test_convert_pandas.py
@@ -291,10 +291,10 @@ class TestConvertMetadata(object):
batch = pa.RecordBatch.from_arrays([arr], ['foo'])
table = pa.Table.from_batches([batch, batch, batch])
- with pytest.raises(pa.ArrowException):
+ with pytest.raises(pa.ArrowInvalid):
arr.to_pandas()
- with pytest.raises(pa.ArrowException):
+ with pytest.raises(pa.ArrowInvalid):
table.to_pandas()
def test_unicode_with_unicode_column_and_index(self):
@@ -1238,7 +1238,7 @@ class TestConvertStringLikeTypes(object):
# cannot be converted to utf-8
def test_array_of_bytes_to_strings_bad_data(self):
with pytest.raises(
- pa.lib.ArrowException,
+ pa.lib.ArrowInvalid,
message="Unknown error: 'utf-8' codec can't decode byte 0x80 "
"in position 0: invalid start byte"):
pa.array(np.array([b'\x80\x81'], dtype=object), pa.string())
@@ -1321,12 +1321,12 @@ class TestConvertDecimalTypes(object):
def test_decimal_fails_with_truncation(self):
data1 = [decimal.Decimal('1.234')]
type1 = pa.decimal128(10, 2)
- with pytest.raises(pa.ArrowException):
+ with pytest.raises(pa.ArrowInvalid):
pa.array(data1, type=type1)
data2 = [decimal.Decimal('1.2345')]
type2 = pa.decimal128(10, 3)
- with pytest.raises(pa.ArrowException):
+ with pytest.raises(pa.ArrowInvalid):
pa.array(data2, type=type2)
def test_decimal_with_different_precisions(self):
@@ -1737,33 +1737,29 @@ class TestZeroCopyConversion(object):
tm.assert_series_equal(pd.Series(result), pd.Series(values),
check_names=False)
+ def check_zero_copy_failure(self, arr):
+ with pytest.raises(pa.ArrowInvalid):
+ arr.to_pandas(zero_copy_only=True)
+
def test_zero_copy_failure_on_object_types(self):
- with pytest.raises(pa.ArrowException):
- pa.array(['A', 'B', 'C']).to_pandas(zero_copy_only=True)
+ self.check_zero_copy_failure(pa.array(['A', 'B', 'C']))
def test_zero_copy_failure_with_int_when_nulls(self):
- with pytest.raises(pa.ArrowException):
- pa.array([0, 1, None]).to_pandas(zero_copy_only=True)
+ self.check_zero_copy_failure(pa.array([0, 1, None]))
def test_zero_copy_failure_with_float_when_nulls(self):
- with pytest.raises(pa.ArrowException):
- pa.array([0.0, 1.0, None]).to_pandas(zero_copy_only=True)
+ self.check_zero_copy_failure(pa.array([0.0, 1.0, None]))
def test_zero_copy_failure_on_bool_types(self):
- with pytest.raises(pa.ArrowException):
- pa.array([True, False]).to_pandas(zero_copy_only=True)
+ self.check_zero_copy_failure(pa.array([True, False]))
def test_zero_copy_failure_on_list_types(self):
- arr = np.array([[1, 2], [8, 9]], dtype=object)
-
- with pytest.raises(pa.ArrowException):
- pa.array(arr).to_pandas(zero_copy_only=True)
+ arr = pa.array([[1, 2], [8, 9]], type=pa.list_(pa.int64()))
+ self.check_zero_copy_failure(arr)
def test_zero_copy_failure_on_timestamp_types(self):
arr = np.array(['2007-07-13'], dtype='datetime64[ns]')
-
- with pytest.raises(pa.ArrowException):
- pa.array(arr).to_pandas(zero_copy_only=True)
+ self.check_zero_copy_failure(pa.array(arr))
class TestConvertMisc(object):
@@ -1843,11 +1839,11 @@ class TestConvertMisc(object):
def test_mixed_types_fails(self):
data = pd.DataFrame({'a': ['a', 1, 2.0]})
- with pytest.raises(pa.ArrowException):
+ with pytest.raises(pa.ArrowTypeError):
pa.Table.from_pandas(data)
data = pd.DataFrame({'a': [1, True]})
- with pytest.raises(pa.ArrowException):
+ with pytest.raises(pa.ArrowTypeError):
pa.Table.from_pandas(data)
def test_strided_data_import(self):
diff --git a/python/pyarrow/tests/test_serialization.py b/python/pyarrow/tests/test_serialization.py
index ba32330..f288611 100644
--- a/python/pyarrow/tests/test_serialization.py
+++ b/python/pyarrow/tests/test_serialization.py
@@ -592,7 +592,7 @@ def test_serialize_to_components_invalid_cases():
'data': [buf]
}
- with pytest.raises(pa.ArrowException):
+ with pytest.raises(pa.ArrowInvalid):
pa.deserialize_components(components)
components = {
@@ -601,7 +601,7 @@ def test_serialize_to_components_invalid_cases():
'data': [buf, buf]
}
- with pytest.raises(pa.ArrowException):
+ with pytest.raises(pa.ArrowInvalid):
pa.deserialize_components(components)
diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
index 5303cb2..57be9b2 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -234,7 +234,7 @@ def test_recordbatchlist_schema_equals():
batch1 = pa.RecordBatch.from_pandas(data1)
batch2 = pa.RecordBatch.from_pandas(data2)
- with pytest.raises(pa.ArrowException):
+ with pytest.raises(pa.ArrowInvalid):
pa.Table.from_batches([batch1, batch2])
--
To stop receiving notification emails like this one, please contact
uwe@apache.org.