You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/04/04 17:18:22 UTC
arrow git commit: ARROW-765: [Python] Add more natural Exception type
hierarchy for thirdparty users
Repository: arrow
Updated Branches:
refs/heads/master ec6188efc -> 2aed7845f
ARROW-765: [Python] Add more natural Exception type hierarchy for thirdparty users
I also took the liberty of changing a number of error types in libarrow_python
Author: Wes McKinney <we...@twosigma.com>
Closes #489 from wesm/ARROW-765 and squashes the following commits:
74c43df [Wes McKinney] Make a nicer Exception hierachy, with more intuitive bases for thirdparty users
2a58a1b [Wes McKinney] Add a nicer exception hierarchy. Unknown errors return as ValueError
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/2aed7845
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/2aed7845
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/2aed7845
Branch: refs/heads/master
Commit: 2aed7845fbc9e3d91ab9d16965ee9f6f3abc668b
Parents: ec6188e
Author: Wes McKinney <we...@twosigma.com>
Authored: Tue Apr 4 13:18:16 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Tue Apr 4 13:18:16 2017 -0400
----------------------------------------------------------------------
cpp/src/arrow/python/builtin_convert.cc | 8 +--
cpp/src/arrow/python/pandas_convert.cc | 6 +-
cpp/src/arrow/status.h | 2 +-
python/pyarrow/__init__.py | 8 ++-
python/pyarrow/error.pyx | 43 ++++++++++++-
python/pyarrow/includes/common.pxd | 4 +-
python/pyarrow/tests/test_convert_builtin.py | 78 +++++++++++------------
python/pyarrow/tests/test_convert_pandas.py | 4 +-
python/pyarrow/tests/test_feather.py | 2 +-
9 files changed, 101 insertions(+), 54 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/2aed7845/cpp/src/arrow/python/builtin_convert.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/builtin_convert.cc b/cpp/src/arrow/python/builtin_convert.cc
index 6a13fdc..25b32ee 100644
--- a/cpp/src/arrow/python/builtin_convert.cc
+++ b/cpp/src/arrow/python/builtin_convert.cc
@@ -394,7 +394,7 @@ class BytesConverter : public TypedConverter<BinaryBuilder> {
} else if (PyBytes_Check(item)) {
bytes_obj = item;
} else {
- return Status::TypeError(
+ return Status::Invalid(
"Value that cannot be converted to bytes was encountered");
}
// No error checking
@@ -429,7 +429,7 @@ class FixedWidthBytesConverter : public TypedConverter<FixedSizeBinaryBuilder> {
} else if (PyBytes_Check(item)) {
bytes_obj = item;
} else {
- return Status::TypeError(
+ return Status::Invalid(
"Value that cannot be converted to bytes was encountered");
}
// No error checking
@@ -458,7 +458,7 @@ class UTF8Converter : public TypedConverter<StringBuilder> {
RETURN_NOT_OK(typed_builder_->AppendNull());
continue;
} else if (!PyUnicode_Check(item)) {
- return Status::TypeError("Non-unicode value encountered");
+ return Status::Invalid("Non-unicode value encountered");
}
tmp.reset(PyUnicode_AsUTF8String(item));
RETURN_IF_PYERROR();
@@ -585,7 +585,7 @@ Status CheckPythonBytesAreFixedLength(PyObject* obj, Py_ssize_t expected_length)
std::stringstream ss;
ss << "Found byte string of length " << length << ", expected length is "
<< expected_length;
- return Status::TypeError(ss.str());
+ return Status::Invalid(ss.str());
}
return Status::OK();
}
http://git-wip-us.apache.org/repos/asf/arrow/blob/2aed7845/cpp/src/arrow/python/pandas_convert.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/pandas_convert.cc b/cpp/src/arrow/python/pandas_convert.cc
index 9577892..48d3489 100644
--- a/cpp/src/arrow/python/pandas_convert.cc
+++ b/cpp/src/arrow/python/pandas_convert.cc
@@ -161,7 +161,7 @@ static Status AppendObjectStrings(
obj = PyUnicode_AsUTF8String(obj);
if (obj == NULL) {
PyErr_Clear();
- return Status::TypeError("failed converting unicode to UTF8");
+ return Status::Invalid("failed converting unicode to UTF8");
}
const int32_t length = static_cast<int32_t>(PyBytes_GET_SIZE(obj));
Status s = builder->Append(PyBytes_AS_STRING(obj), length);
@@ -200,7 +200,7 @@ static Status AppendObjectFixedWidthBytes(PyArrayObject* arr, PyArrayObject* mas
obj = PyUnicode_AsUTF8String(obj);
if (obj == NULL) {
PyErr_Clear();
- return Status::TypeError("failed converting unicode to UTF8");
+ return Status::Invalid("failed converting unicode to UTF8");
}
RETURN_NOT_OK(CheckPythonBytesAreFixedLength(obj, byte_width));
@@ -482,7 +482,7 @@ Status InvalidConversion(PyObject* obj, const std::string& expected_type_name) {
std::stringstream ss;
ss << "Python object of type " << cpp_type_name << " is not None and is not a "
<< expected_type_name << " object";
- return Status::TypeError(ss.str());
+ return Status::Invalid(ss.str());
}
Status PandasConverter::ConvertDates() {
http://git-wip-us.apache.org/repos/asf/arrow/blob/2aed7845/cpp/src/arrow/status.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/status.h b/cpp/src/arrow/status.h
index 05f5b74..dd65b75 100644
--- a/cpp/src/arrow/status.h
+++ b/cpp/src/arrow/status.h
@@ -134,7 +134,7 @@ class ARROW_EXPORT Status {
bool IsKeyError() const { return code() == StatusCode::KeyError; }
bool IsInvalid() const { return code() == StatusCode::Invalid; }
bool IsIOError() const { return code() == StatusCode::IOError; }
-
+ bool IsTypeError() const { return code() == StatusCode::TypeError; }
bool IsUnknownError() const { return code() == StatusCode::UnknownError; }
bool IsNotImplemented() const { return code() == StatusCode::NotImplemented; }
http://git-wip-us.apache.org/repos/asf/arrow/blob/2aed7845/python/pyarrow/__init__.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 6860f98..8c52074 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -38,7 +38,13 @@ from pyarrow.array import (Array, Tensor, from_pylist,
ListArray, StringArray,
DictionaryArray)
-from pyarrow.error import ArrowException
+from pyarrow.error import (ArrowException,
+ ArrowKeyError,
+ ArrowInvalid,
+ ArrowIOError,
+ ArrowMemoryError,
+ ArrowNotImplementedError,
+ ArrowTypeError)
from pyarrow.filesystem import Filesystem, HdfsClient, LocalFilesystem
from pyarrow.io import (HdfsFile, NativeFile, PythonFileInterface,
http://git-wip-us.apache.org/repos/asf/arrow/blob/2aed7845/python/pyarrow/error.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/error.pyx b/python/pyarrow/error.pyx
index b8a82b3..259aeb0 100644
--- a/python/pyarrow/error.pyx
+++ b/python/pyarrow/error.pyx
@@ -19,13 +19,52 @@ from pyarrow.includes.libarrow cimport CStatus
from pyarrow.includes.common cimport c_string
from pyarrow.compat import frombytes
+
class ArrowException(Exception):
pass
+
+class ArrowInvalid(ValueError, ArrowException):
+ pass
+
+
+class ArrowMemoryError(MemoryError, ArrowException):
+ pass
+
+
+class ArrowIOError(IOError, ArrowException):
+ pass
+
+
+class ArrowKeyError(KeyError, ArrowException):
+ pass
+
+
+class ArrowTypeError(TypeError, ArrowException):
+ pass
+
+
+class ArrowNotImplementedError(NotImplementedError, ArrowException):
+ pass
+
+
cdef int check_status(const CStatus& status) nogil except -1:
if status.ok():
return 0
- cdef c_string c_message = status.ToString()
with gil:
- raise ArrowException(frombytes(c_message))
+ message = frombytes(status.ToString())
+ if status.IsInvalid():
+ raise ArrowInvalid(message)
+ elif status.IsIOError():
+ raise ArrowIOError(message)
+ elif status.IsOutOfMemory():
+ raise ArrowMemoryError(message)
+ elif status.IsKeyError():
+ raise ArrowKeyError(message)
+ elif status.IsNotImplemented():
+ raise ArrowNotImplementedError(message)
+ elif status.IsTypeError():
+ raise ArrowTypeError(message)
+ else:
+ raise ArrowException(message)
http://git-wip-us.apache.org/repos/asf/arrow/blob/2aed7845/python/pyarrow/includes/common.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/includes/common.pxd b/python/pyarrow/includes/common.pxd
index f689bdc..ab38ff3 100644
--- a/python/pyarrow/includes/common.pxd
+++ b/python/pyarrow/includes/common.pxd
@@ -43,10 +43,12 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
c_string ToString()
c_bool ok()
+ c_bool IsIOError()
c_bool IsOutOfMemory()
+ c_bool IsInvalid()
c_bool IsKeyError()
c_bool IsNotImplemented()
- c_bool IsInvalid()
+ c_bool IsTypeError()
cdef inline object PyObject_to_object(PyObject* o):
http://git-wip-us.apache.org/repos/asf/arrow/blob/2aed7845/python/pyarrow/tests/test_convert_builtin.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py
index 15fca56..e2b03d8 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -17,7 +17,7 @@
# under the License.
from pyarrow.compat import unittest, u # noqa
-import pyarrow
+import pyarrow as pa
import datetime
@@ -26,32 +26,32 @@ class TestConvertList(unittest.TestCase):
def test_boolean(self):
expected = [True, None, False, None]
- arr = pyarrow.from_pylist(expected)
+ arr = pa.from_pylist(expected)
assert len(arr) == 4
assert arr.null_count == 2
- assert arr.type == pyarrow.bool_()
+ assert arr.type == pa.bool_()
assert arr.to_pylist() == expected
def test_empty_list(self):
- arr = pyarrow.from_pylist([])
+ arr = pa.from_pylist([])
assert len(arr) == 0
assert arr.null_count == 0
- assert arr.type == pyarrow.null()
+ assert arr.type == pa.null()
assert arr.to_pylist() == []
def test_all_none(self):
- arr = pyarrow.from_pylist([None, None])
+ arr = pa.from_pylist([None, None])
assert len(arr) == 2
assert arr.null_count == 2
- assert arr.type == pyarrow.null()
+ assert arr.type == pa.null()
assert arr.to_pylist() == [None, None]
def test_integer(self):
expected = [1, None, 3, None]
- arr = pyarrow.from_pylist(expected)
+ arr = pa.from_pylist(expected)
assert len(arr) == 4
assert arr.null_count == 2
- assert arr.type == pyarrow.int64()
+ assert arr.type == pa.int64()
assert arr.to_pylist() == expected
def test_garbage_collection(self):
@@ -60,25 +60,25 @@ class TestConvertList(unittest.TestCase):
# Force the cyclic garbage collector to run
gc.collect()
- bytes_before = pyarrow.total_allocated_bytes()
- pyarrow.from_pylist([1, None, 3, None])
+ bytes_before = pa.total_allocated_bytes()
+ pa.from_pylist([1, None, 3, None])
gc.collect()
- assert pyarrow.total_allocated_bytes() == bytes_before
+ assert pa.total_allocated_bytes() == bytes_before
def test_double(self):
data = [1.5, 1, None, 2.5, None, None]
- arr = pyarrow.from_pylist(data)
+ arr = pa.from_pylist(data)
assert len(arr) == 6
assert arr.null_count == 3
- assert arr.type == pyarrow.float64()
+ assert arr.type == pa.float64()
assert arr.to_pylist() == data
def test_unicode(self):
data = [u'foo', u'bar', None, u'ma�ana']
- arr = pyarrow.from_pylist(data)
+ arr = pa.from_pylist(data)
assert len(arr) == 4
assert arr.null_count == 1
- assert arr.type == pyarrow.string()
+ assert arr.type == pa.string()
assert arr.to_pylist() == data
def test_bytes(self):
@@ -86,31 +86,31 @@ class TestConvertList(unittest.TestCase):
data = [b'foo',
u1.decode('utf-8'), # unicode gets encoded,
None]
- arr = pyarrow.from_pylist(data)
+ arr = pa.from_pylist(data)
assert len(arr) == 3
assert arr.null_count == 1
- assert arr.type == pyarrow.binary()
+ assert arr.type == pa.binary()
assert arr.to_pylist() == [b'foo', u1, None]
def test_fixed_size_bytes(self):
data = [b'foof', None, b'barb', b'2346']
- arr = pyarrow.from_pylist(data, type=pyarrow.binary(4))
+ arr = pa.from_pylist(data, type=pa.binary(4))
assert len(arr) == 4
assert arr.null_count == 1
- assert arr.type == pyarrow.binary(4)
+ assert arr.type == pa.binary(4)
assert arr.to_pylist() == data
def test_fixed_size_bytes_does_not_accept_varying_lengths(self):
data = [b'foo', None, b'barb', b'2346']
- with self.assertRaises(pyarrow.error.ArrowException):
- pyarrow.from_pylist(data, type=pyarrow.binary(4))
+ with self.assertRaises(pa.ArrowInvalid):
+ pa.from_pylist(data, type=pa.binary(4))
def test_date(self):
data = [datetime.date(2000, 1, 1), None, datetime.date(1970, 1, 1),
datetime.date(2040, 2, 26)]
- arr = pyarrow.from_pylist(data)
+ arr = pa.from_pylist(data)
assert len(arr) == 4
- assert arr.type == pyarrow.date64()
+ assert arr.type == pa.date64()
assert arr.null_count == 1
assert arr[0].as_py() == datetime.date(2000, 1, 1)
assert arr[1].as_py() is None
@@ -124,9 +124,9 @@ class TestConvertList(unittest.TestCase):
datetime.datetime(2006, 1, 13, 12, 34, 56, 432539),
datetime.datetime(2010, 8, 13, 5, 46, 57, 437699)
]
- arr = pyarrow.from_pylist(data)
+ arr = pa.from_pylist(data)
assert len(arr) == 4
- assert arr.type == pyarrow.timestamp('us')
+ assert arr.type == pa.timestamp('us')
assert arr.null_count == 1
assert arr[0].as_py() == datetime.datetime(2007, 7, 13, 1,
23, 34, 123456)
@@ -137,28 +137,28 @@ class TestConvertList(unittest.TestCase):
46, 57, 437699)
def test_mixed_nesting_levels(self):
- pyarrow.from_pylist([1, 2, None])
- pyarrow.from_pylist([[1], [2], None])
- pyarrow.from_pylist([[1], [2], [None]])
+ pa.from_pylist([1, 2, None])
+ pa.from_pylist([[1], [2], None])
+ pa.from_pylist([[1], [2], [None]])
- with self.assertRaises(pyarrow.ArrowException):
- pyarrow.from_pylist([1, 2, [1]])
+ with self.assertRaises(pa.ArrowInvalid):
+ pa.from_pylist([1, 2, [1]])
- with self.assertRaises(pyarrow.ArrowException):
- pyarrow.from_pylist([1, 2, []])
+ with self.assertRaises(pa.ArrowInvalid):
+ pa.from_pylist([1, 2, []])
- with self.assertRaises(pyarrow.ArrowException):
- pyarrow.from_pylist([[1], [2], [None, [1]]])
+ with self.assertRaises(pa.ArrowInvalid):
+ pa.from_pylist([[1], [2], [None, [1]]])
def test_list_of_int(self):
data = [[1, 2, 3], [], None, [1, 2]]
- arr = pyarrow.from_pylist(data)
+ arr = pa.from_pylist(data)
assert len(arr) == 4
assert arr.null_count == 1
- assert arr.type == pyarrow.list_(pyarrow.int64())
+ assert arr.type == pa.list_(pa.int64())
assert arr.to_pylist() == data
def test_mixed_types_fails(self):
data = ['a', 1, 2.0]
- with self.assertRaises(pyarrow.error.ArrowException):
- pyarrow.from_pylist(data)
+ with self.assertRaises(pa.ArrowException):
+ pa.from_pylist(data)
http://git-wip-us.apache.org/repos/asf/arrow/blob/2aed7845/python/pyarrow/tests/test_convert_pandas.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py
index 56830a8..87c9c03 100644
--- a/python/pyarrow/tests/test_convert_pandas.py
+++ b/python/pyarrow/tests/test_convert_pandas.py
@@ -266,7 +266,7 @@ class TestPandasConversion(unittest.TestCase):
values = [b'foo', None, b'ba', None, None, b'hey']
df = pd.DataFrame({'strings': values})
schema = A.Schema.from_fields([A.field('strings', A.binary(3))])
- with self.assertRaises(A.error.ArrowException):
+ with self.assertRaises(A.ArrowInvalid):
A.Table.from_pandas(df, schema=schema)
def test_timestamps_notimezone_no_nulls(self):
@@ -409,7 +409,7 @@ class TestPandasConversion(unittest.TestCase):
def test_mixed_types_fails(self):
data = pd.DataFrame({'a': ['a', 1, 2.0]})
- with self.assertRaises(A.error.ArrowException):
+ with self.assertRaises(A.ArrowException):
A.Table.from_pandas(data)
def test_strided_data_import(self):
http://git-wip-us.apache.org/repos/asf/arrow/blob/2aed7845/python/pyarrow/tests/test_feather.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_feather.py b/python/pyarrow/tests/test_feather.py
index c7b4f1e..cba9464 100644
--- a/python/pyarrow/tests/test_feather.py
+++ b/python/pyarrow/tests/test_feather.py
@@ -45,7 +45,7 @@ class TestFeatherReader(unittest.TestCase):
pass
def test_file_not_exist(self):
- with self.assertRaises(pa.ArrowException):
+ with self.assertRaises(pa.ArrowIOError):
FeatherReader('test_invalid_file')
def _get_null_counts(self, path, columns=None):