You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/02/07 16:13:10 UTC
arrow git commit: ARROW-535: [Python] Add type mapping for
NPY_LONGLONG
Repository: arrow
Updated Branches:
refs/heads/master f268e927a -> 4c3481ea5
ARROW-535: [Python] Add type mapping for NPY_LONGLONG
Based on https://github.com/wesm/feather/pull/107
Author: Uwe L. Korn <uw...@xhochy.com>
Closes #323 from xhochy/ARROW-535 and squashes the following commits:
72221fa [Uwe L. Korn] Address review comments
5d3c046 [Uwe L. Korn] ARROW-535: [Python] Add type mapping for NPY_LONGLONG
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/4c3481ea
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/4c3481ea
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/4c3481ea
Branch: refs/heads/master
Commit: 4c3481ea5438d52878f390b0f562f6113f111a8f
Parents: f268e92
Author: Uwe L. Korn <uw...@xhochy.com>
Authored: Tue Feb 7 11:13:00 2017 -0500
Committer: Wes McKinney <we...@twosigma.com>
Committed: Tue Feb 7 11:13:00 2017 -0500
----------------------------------------------------------------------
python/pyarrow/tests/test_convert_pandas.py | 6 +++--
python/src/pyarrow/adapters/pandas.cc | 29 ++++++++++++++++++++++--
2 files changed, 31 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/4c3481ea/python/pyarrow/tests/test_convert_pandas.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py
index f04fbe5..960653d 100644
--- a/python/pyarrow/tests/test_convert_pandas.py
+++ b/python/pyarrow/tests/test_convert_pandas.py
@@ -127,13 +127,14 @@ class TestPandasConversion(unittest.TestCase):
tm.assert_frame_equal(result, ex_frame)
def test_integer_no_nulls(self):
- data = {}
+ data = OrderedDict()
fields = []
numpy_dtypes = [('i1', A.int8()), ('i2', A.int16()),
('i4', A.int32()), ('i8', A.int64()),
('u1', A.uint8()), ('u2', A.uint16()),
- ('u4', A.uint32()), ('u8', A.uint64())]
+ ('u4', A.uint32()), ('u8', A.uint64()),
+ ('longlong', A.int64()), ('ulonglong', A.uint64())]
num_values = 100
for dtype, arrow_dtype in numpy_dtypes:
@@ -148,6 +149,7 @@ class TestPandasConversion(unittest.TestCase):
schema = A.Schema.from_fields(fields)
self._check_pandas_roundtrip(df, expected_schema=schema)
+
def test_integer_with_nulls(self):
# pandas requires upcast to float dtype
http://git-wip-us.apache.org/repos/asf/arrow/blob/4c3481ea/python/src/pyarrow/adapters/pandas.cc
----------------------------------------------------------------------
diff --git a/python/src/pyarrow/adapters/pandas.cc b/python/src/pyarrow/adapters/pandas.cc
index 345dc90..b4e0d2f 100644
--- a/python/src/pyarrow/adapters/pandas.cc
+++ b/python/src/pyarrow/adapters/pandas.cc
@@ -91,11 +91,17 @@ NPY_INT_DECL(INT8, Int8, int8_t);
NPY_INT_DECL(INT16, Int16, int16_t);
NPY_INT_DECL(INT32, Int32, int32_t);
NPY_INT_DECL(INT64, Int64, int64_t);
+
NPY_INT_DECL(UINT8, UInt8, uint8_t);
NPY_INT_DECL(UINT16, UInt16, uint16_t);
NPY_INT_DECL(UINT32, UInt32, uint32_t);
NPY_INT_DECL(UINT64, UInt64, uint64_t);
+#if NPY_INT64 != NPY_LONGLONG
+NPY_INT_DECL(LONGLONG, Int64, int64_t);
+NPY_INT_DECL(ULONGLONG, UInt64, uint64_t);
+#endif
+
template <>
struct npy_traits<NPY_FLOAT32> {
typedef float value_type;
@@ -1706,16 +1712,35 @@ Status PandasToArrow(arrow::MemoryPool* pool, PyObject* ao, PyObject* mo,
return Status::Invalid("only handle 1-dimensional arrays");
}
- switch (PyArray_DESCR(arr)->type_num) {
+ int type_num = PyArray_DESCR(arr)->type_num;
+
+#if (NPY_INT64 == NPY_LONGLONG) && (NPY_SIZEOF_LONGLONG == 8)
+ // Both LONGLONG and INT64 can be observed in the wild, which is buggy. We set
+ // U/LONGLONG to U/INT64 so things work properly.
+ if (type_num == NPY_LONGLONG) {
+ type_num = NPY_INT64;
+ }
+ if (type_num == NPY_ULONGLONG) {
+ type_num = NPY_UINT64;
+ }
+#endif
+
+ switch (type_num) {
TO_ARROW_CASE(BOOL);
TO_ARROW_CASE(INT8);
TO_ARROW_CASE(INT16);
TO_ARROW_CASE(INT32);
TO_ARROW_CASE(INT64);
+#if (NPY_INT64 != NPY_LONGLONG)
+ TO_ARROW_CASE(LONGLONG);
+#endif
TO_ARROW_CASE(UINT8);
TO_ARROW_CASE(UINT16);
TO_ARROW_CASE(UINT32);
TO_ARROW_CASE(UINT64);
+#if (NPY_UINT64 != NPY_ULONGLONG)
+ TO_ARROW_CASE(ULONGLONG);
+#endif
TO_ARROW_CASE(FLOAT32);
TO_ARROW_CASE(FLOAT64);
TO_ARROW_CASE(DATETIME);
@@ -1726,7 +1751,7 @@ Status PandasToArrow(arrow::MemoryPool* pool, PyObject* ao, PyObject* mo,
} break;
default:
std::stringstream ss;
- ss << "unsupported type " << PyArray_DESCR(arr)->type_num << std::endl;
+ ss << "Unsupported numpy type " << PyArray_DESCR(arr)->type_num << std::endl;
return Status::NotImplemented(ss.str());
}
return Status::OK();