You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/04/04 17:18:22 UTC

arrow git commit: ARROW-765: [Python] Add more natural Exception type hierarchy for thirdparty users

Repository: arrow
Updated Branches:
  refs/heads/master ec6188efc -> 2aed7845f


ARROW-765: [Python] Add more natural Exception type hierarchy for thirdparty users

I also took the liberty of changing a number of error types in libarrow_python

Author: Wes McKinney <we...@twosigma.com>

Closes #489 from wesm/ARROW-765 and squashes the following commits:

74c43df [Wes McKinney] Make a nicer Exception hierachy, with more intuitive bases for thirdparty users
2a58a1b [Wes McKinney] Add a nicer exception hierarchy. Unknown errors return as ValueError


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/2aed7845
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/2aed7845
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/2aed7845

Branch: refs/heads/master
Commit: 2aed7845fbc9e3d91ab9d16965ee9f6f3abc668b
Parents: ec6188e
Author: Wes McKinney <we...@twosigma.com>
Authored: Tue Apr 4 13:18:16 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Tue Apr 4 13:18:16 2017 -0400

----------------------------------------------------------------------
 cpp/src/arrow/python/builtin_convert.cc      |  8 +--
 cpp/src/arrow/python/pandas_convert.cc       |  6 +-
 cpp/src/arrow/status.h                       |  2 +-
 python/pyarrow/__init__.py                   |  8 ++-
 python/pyarrow/error.pyx                     | 43 ++++++++++++-
 python/pyarrow/includes/common.pxd           |  4 +-
 python/pyarrow/tests/test_convert_builtin.py | 78 +++++++++++------------
 python/pyarrow/tests/test_convert_pandas.py  |  4 +-
 python/pyarrow/tests/test_feather.py         |  2 +-
 9 files changed, 101 insertions(+), 54 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/2aed7845/cpp/src/arrow/python/builtin_convert.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/builtin_convert.cc b/cpp/src/arrow/python/builtin_convert.cc
index 6a13fdc..25b32ee 100644
--- a/cpp/src/arrow/python/builtin_convert.cc
+++ b/cpp/src/arrow/python/builtin_convert.cc
@@ -394,7 +394,7 @@ class BytesConverter : public TypedConverter<BinaryBuilder> {
       } else if (PyBytes_Check(item)) {
         bytes_obj = item;
       } else {
-        return Status::TypeError(
+        return Status::Invalid(
             "Value that cannot be converted to bytes was encountered");
       }
       // No error checking
@@ -429,7 +429,7 @@ class FixedWidthBytesConverter : public TypedConverter<FixedSizeBinaryBuilder> {
       } else if (PyBytes_Check(item)) {
         bytes_obj = item;
       } else {
-        return Status::TypeError(
+        return Status::Invalid(
             "Value that cannot be converted to bytes was encountered");
       }
       // No error checking
@@ -458,7 +458,7 @@ class UTF8Converter : public TypedConverter<StringBuilder> {
         RETURN_NOT_OK(typed_builder_->AppendNull());
         continue;
       } else if (!PyUnicode_Check(item)) {
-        return Status::TypeError("Non-unicode value encountered");
+        return Status::Invalid("Non-unicode value encountered");
       }
       tmp.reset(PyUnicode_AsUTF8String(item));
       RETURN_IF_PYERROR();
@@ -585,7 +585,7 @@ Status CheckPythonBytesAreFixedLength(PyObject* obj, Py_ssize_t expected_length)
     std::stringstream ss;
     ss << "Found byte string of length " << length << ", expected length is "
        << expected_length;
-    return Status::TypeError(ss.str());
+    return Status::Invalid(ss.str());
   }
   return Status::OK();
 }

http://git-wip-us.apache.org/repos/asf/arrow/blob/2aed7845/cpp/src/arrow/python/pandas_convert.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/pandas_convert.cc b/cpp/src/arrow/python/pandas_convert.cc
index 9577892..48d3489 100644
--- a/cpp/src/arrow/python/pandas_convert.cc
+++ b/cpp/src/arrow/python/pandas_convert.cc
@@ -161,7 +161,7 @@ static Status AppendObjectStrings(
       obj = PyUnicode_AsUTF8String(obj);
       if (obj == NULL) {
         PyErr_Clear();
-        return Status::TypeError("failed converting unicode to UTF8");
+        return Status::Invalid("failed converting unicode to UTF8");
       }
       const int32_t length = static_cast<int32_t>(PyBytes_GET_SIZE(obj));
       Status s = builder->Append(PyBytes_AS_STRING(obj), length);
@@ -200,7 +200,7 @@ static Status AppendObjectFixedWidthBytes(PyArrayObject* arr, PyArrayObject* mas
       obj = PyUnicode_AsUTF8String(obj);
       if (obj == NULL) {
         PyErr_Clear();
-        return Status::TypeError("failed converting unicode to UTF8");
+        return Status::Invalid("failed converting unicode to UTF8");
       }
 
       RETURN_NOT_OK(CheckPythonBytesAreFixedLength(obj, byte_width));
@@ -482,7 +482,7 @@ Status InvalidConversion(PyObject* obj, const std::string& expected_type_name) {
   std::stringstream ss;
   ss << "Python object of type " << cpp_type_name << " is not None and is not a "
      << expected_type_name << " object";
-  return Status::TypeError(ss.str());
+  return Status::Invalid(ss.str());
 }
 
 Status PandasConverter::ConvertDates() {

http://git-wip-us.apache.org/repos/asf/arrow/blob/2aed7845/cpp/src/arrow/status.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/status.h b/cpp/src/arrow/status.h
index 05f5b74..dd65b75 100644
--- a/cpp/src/arrow/status.h
+++ b/cpp/src/arrow/status.h
@@ -134,7 +134,7 @@ class ARROW_EXPORT Status {
   bool IsKeyError() const { return code() == StatusCode::KeyError; }
   bool IsInvalid() const { return code() == StatusCode::Invalid; }
   bool IsIOError() const { return code() == StatusCode::IOError; }
-
+  bool IsTypeError() const { return code() == StatusCode::TypeError; }
   bool IsUnknownError() const { return code() == StatusCode::UnknownError; }
   bool IsNotImplemented() const { return code() == StatusCode::NotImplemented; }
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/2aed7845/python/pyarrow/__init__.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 6860f98..8c52074 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -38,7 +38,13 @@ from pyarrow.array import (Array, Tensor, from_pylist,
                            ListArray, StringArray,
                            DictionaryArray)
 
-from pyarrow.error import ArrowException
+from pyarrow.error import (ArrowException,
+                           ArrowKeyError,
+                           ArrowInvalid,
+                           ArrowIOError,
+                           ArrowMemoryError,
+                           ArrowNotImplementedError,
+                           ArrowTypeError)
 
 from pyarrow.filesystem import Filesystem, HdfsClient, LocalFilesystem
 from pyarrow.io import (HdfsFile, NativeFile, PythonFileInterface,

http://git-wip-us.apache.org/repos/asf/arrow/blob/2aed7845/python/pyarrow/error.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/error.pyx b/python/pyarrow/error.pyx
index b8a82b3..259aeb0 100644
--- a/python/pyarrow/error.pyx
+++ b/python/pyarrow/error.pyx
@@ -19,13 +19,52 @@ from pyarrow.includes.libarrow cimport CStatus
 from pyarrow.includes.common cimport c_string
 from pyarrow.compat import frombytes
 
+
 class ArrowException(Exception):
     pass
 
+
+class ArrowInvalid(ValueError, ArrowException):
+    pass
+
+
+class ArrowMemoryError(MemoryError, ArrowException):
+    pass
+
+
+class ArrowIOError(IOError, ArrowException):
+    pass
+
+
+class ArrowKeyError(KeyError, ArrowException):
+    pass
+
+
+class ArrowTypeError(TypeError, ArrowException):
+    pass
+
+
+class ArrowNotImplementedError(NotImplementedError, ArrowException):
+    pass
+
+
 cdef int check_status(const CStatus& status) nogil except -1:
     if status.ok():
         return 0
 
-    cdef c_string c_message = status.ToString()
     with gil:
-        raise ArrowException(frombytes(c_message))
+        message = frombytes(status.ToString())
+        if status.IsInvalid():
+            raise ArrowInvalid(message)
+        elif status.IsIOError():
+            raise ArrowIOError(message)
+        elif status.IsOutOfMemory():
+            raise ArrowMemoryError(message)
+        elif status.IsKeyError():
+            raise ArrowKeyError(message)
+        elif status.IsNotImplemented():
+            raise ArrowNotImplementedError(message)
+        elif status.IsTypeError():
+            raise ArrowTypeError(message)
+        else:
+            raise ArrowException(message)

http://git-wip-us.apache.org/repos/asf/arrow/blob/2aed7845/python/pyarrow/includes/common.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/includes/common.pxd b/python/pyarrow/includes/common.pxd
index f689bdc..ab38ff3 100644
--- a/python/pyarrow/includes/common.pxd
+++ b/python/pyarrow/includes/common.pxd
@@ -43,10 +43,12 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         c_string ToString()
 
         c_bool ok()
+        c_bool IsIOError()
         c_bool IsOutOfMemory()
+        c_bool IsInvalid()
         c_bool IsKeyError()
         c_bool IsNotImplemented()
-        c_bool IsInvalid()
+        c_bool IsTypeError()
 
 
 cdef inline object PyObject_to_object(PyObject* o):

http://git-wip-us.apache.org/repos/asf/arrow/blob/2aed7845/python/pyarrow/tests/test_convert_builtin.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py
index 15fca56..e2b03d8 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -17,7 +17,7 @@
 # under the License.
 
 from pyarrow.compat import unittest, u  # noqa
-import pyarrow
+import pyarrow as pa
 
 import datetime
 
@@ -26,32 +26,32 @@ class TestConvertList(unittest.TestCase):
 
     def test_boolean(self):
         expected = [True, None, False, None]
-        arr = pyarrow.from_pylist(expected)
+        arr = pa.from_pylist(expected)
         assert len(arr) == 4
         assert arr.null_count == 2
-        assert arr.type == pyarrow.bool_()
+        assert arr.type == pa.bool_()
         assert arr.to_pylist() == expected
 
     def test_empty_list(self):
-        arr = pyarrow.from_pylist([])
+        arr = pa.from_pylist([])
         assert len(arr) == 0
         assert arr.null_count == 0
-        assert arr.type == pyarrow.null()
+        assert arr.type == pa.null()
         assert arr.to_pylist() == []
 
     def test_all_none(self):
-        arr = pyarrow.from_pylist([None, None])
+        arr = pa.from_pylist([None, None])
         assert len(arr) == 2
         assert arr.null_count == 2
-        assert arr.type == pyarrow.null()
+        assert arr.type == pa.null()
         assert arr.to_pylist() == [None, None]
 
     def test_integer(self):
         expected = [1, None, 3, None]
-        arr = pyarrow.from_pylist(expected)
+        arr = pa.from_pylist(expected)
         assert len(arr) == 4
         assert arr.null_count == 2
-        assert arr.type == pyarrow.int64()
+        assert arr.type == pa.int64()
         assert arr.to_pylist() == expected
 
     def test_garbage_collection(self):
@@ -60,25 +60,25 @@ class TestConvertList(unittest.TestCase):
         # Force the cyclic garbage collector to run
         gc.collect()
 
-        bytes_before = pyarrow.total_allocated_bytes()
-        pyarrow.from_pylist([1, None, 3, None])
+        bytes_before = pa.total_allocated_bytes()
+        pa.from_pylist([1, None, 3, None])
         gc.collect()
-        assert pyarrow.total_allocated_bytes() == bytes_before
+        assert pa.total_allocated_bytes() == bytes_before
 
     def test_double(self):
         data = [1.5, 1, None, 2.5, None, None]
-        arr = pyarrow.from_pylist(data)
+        arr = pa.from_pylist(data)
         assert len(arr) == 6
         assert arr.null_count == 3
-        assert arr.type == pyarrow.float64()
+        assert arr.type == pa.float64()
         assert arr.to_pylist() == data
 
     def test_unicode(self):
         data = [u'foo', u'bar', None, u'ma�ana']
-        arr = pyarrow.from_pylist(data)
+        arr = pa.from_pylist(data)
         assert len(arr) == 4
         assert arr.null_count == 1
-        assert arr.type == pyarrow.string()
+        assert arr.type == pa.string()
         assert arr.to_pylist() == data
 
     def test_bytes(self):
@@ -86,31 +86,31 @@ class TestConvertList(unittest.TestCase):
         data = [b'foo',
                 u1.decode('utf-8'),  # unicode gets encoded,
                 None]
-        arr = pyarrow.from_pylist(data)
+        arr = pa.from_pylist(data)
         assert len(arr) == 3
         assert arr.null_count == 1
-        assert arr.type == pyarrow.binary()
+        assert arr.type == pa.binary()
         assert arr.to_pylist() == [b'foo', u1, None]
 
     def test_fixed_size_bytes(self):
         data = [b'foof', None, b'barb', b'2346']
-        arr = pyarrow.from_pylist(data, type=pyarrow.binary(4))
+        arr = pa.from_pylist(data, type=pa.binary(4))
         assert len(arr) == 4
         assert arr.null_count == 1
-        assert arr.type == pyarrow.binary(4)
+        assert arr.type == pa.binary(4)
         assert arr.to_pylist() == data
 
     def test_fixed_size_bytes_does_not_accept_varying_lengths(self):
         data = [b'foo', None, b'barb', b'2346']
-        with self.assertRaises(pyarrow.error.ArrowException):
-            pyarrow.from_pylist(data, type=pyarrow.binary(4))
+        with self.assertRaises(pa.ArrowInvalid):
+            pa.from_pylist(data, type=pa.binary(4))
 
     def test_date(self):
         data = [datetime.date(2000, 1, 1), None, datetime.date(1970, 1, 1),
                 datetime.date(2040, 2, 26)]
-        arr = pyarrow.from_pylist(data)
+        arr = pa.from_pylist(data)
         assert len(arr) == 4
-        assert arr.type == pyarrow.date64()
+        assert arr.type == pa.date64()
         assert arr.null_count == 1
         assert arr[0].as_py() == datetime.date(2000, 1, 1)
         assert arr[1].as_py() is None
@@ -124,9 +124,9 @@ class TestConvertList(unittest.TestCase):
             datetime.datetime(2006, 1, 13, 12, 34, 56, 432539),
             datetime.datetime(2010, 8, 13, 5, 46, 57, 437699)
         ]
-        arr = pyarrow.from_pylist(data)
+        arr = pa.from_pylist(data)
         assert len(arr) == 4
-        assert arr.type == pyarrow.timestamp('us')
+        assert arr.type == pa.timestamp('us')
         assert arr.null_count == 1
         assert arr[0].as_py() == datetime.datetime(2007, 7, 13, 1,
                                                    23, 34, 123456)
@@ -137,28 +137,28 @@ class TestConvertList(unittest.TestCase):
                                                    46, 57, 437699)
 
     def test_mixed_nesting_levels(self):
-        pyarrow.from_pylist([1, 2, None])
-        pyarrow.from_pylist([[1], [2], None])
-        pyarrow.from_pylist([[1], [2], [None]])
+        pa.from_pylist([1, 2, None])
+        pa.from_pylist([[1], [2], None])
+        pa.from_pylist([[1], [2], [None]])
 
-        with self.assertRaises(pyarrow.ArrowException):
-            pyarrow.from_pylist([1, 2, [1]])
+        with self.assertRaises(pa.ArrowInvalid):
+            pa.from_pylist([1, 2, [1]])
 
-        with self.assertRaises(pyarrow.ArrowException):
-            pyarrow.from_pylist([1, 2, []])
+        with self.assertRaises(pa.ArrowInvalid):
+            pa.from_pylist([1, 2, []])
 
-        with self.assertRaises(pyarrow.ArrowException):
-            pyarrow.from_pylist([[1], [2], [None, [1]]])
+        with self.assertRaises(pa.ArrowInvalid):
+            pa.from_pylist([[1], [2], [None, [1]]])
 
     def test_list_of_int(self):
         data = [[1, 2, 3], [], None, [1, 2]]
-        arr = pyarrow.from_pylist(data)
+        arr = pa.from_pylist(data)
         assert len(arr) == 4
         assert arr.null_count == 1
-        assert arr.type == pyarrow.list_(pyarrow.int64())
+        assert arr.type == pa.list_(pa.int64())
         assert arr.to_pylist() == data
 
     def test_mixed_types_fails(self):
         data = ['a', 1, 2.0]
-        with self.assertRaises(pyarrow.error.ArrowException):
-            pyarrow.from_pylist(data)
+        with self.assertRaises(pa.ArrowException):
+            pa.from_pylist(data)

http://git-wip-us.apache.org/repos/asf/arrow/blob/2aed7845/python/pyarrow/tests/test_convert_pandas.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py
index 56830a8..87c9c03 100644
--- a/python/pyarrow/tests/test_convert_pandas.py
+++ b/python/pyarrow/tests/test_convert_pandas.py
@@ -266,7 +266,7 @@ class TestPandasConversion(unittest.TestCase):
         values = [b'foo', None, b'ba', None, None, b'hey']
         df = pd.DataFrame({'strings': values})
         schema = A.Schema.from_fields([A.field('strings', A.binary(3))])
-        with self.assertRaises(A.error.ArrowException):
+        with self.assertRaises(A.ArrowInvalid):
             A.Table.from_pandas(df, schema=schema)
 
     def test_timestamps_notimezone_no_nulls(self):
@@ -409,7 +409,7 @@ class TestPandasConversion(unittest.TestCase):
 
     def test_mixed_types_fails(self):
         data = pd.DataFrame({'a': ['a', 1, 2.0]})
-        with self.assertRaises(A.error.ArrowException):
+        with self.assertRaises(A.ArrowException):
             A.Table.from_pandas(data)
 
     def test_strided_data_import(self):

http://git-wip-us.apache.org/repos/asf/arrow/blob/2aed7845/python/pyarrow/tests/test_feather.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_feather.py b/python/pyarrow/tests/test_feather.py
index c7b4f1e..cba9464 100644
--- a/python/pyarrow/tests/test_feather.py
+++ b/python/pyarrow/tests/test_feather.py
@@ -45,7 +45,7 @@ class TestFeatherReader(unittest.TestCase):
                 pass
 
     def test_file_not_exist(self):
-        with self.assertRaises(pa.ArrowException):
+        with self.assertRaises(pa.ArrowIOError):
             FeatherReader('test_invalid_file')
 
     def _get_null_counts(self, path, columns=None):