You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/02/07 16:13:10 UTC

arrow git commit: ARROW-535: [Python] Add type mapping for NPY_LONGLONG

Repository: arrow
Updated Branches:
  refs/heads/master f268e927a -> 4c3481ea5


ARROW-535: [Python] Add type mapping for NPY_LONGLONG

Based on https://github.com/wesm/feather/pull/107

Author: Uwe L. Korn <uw...@xhochy.com>

Closes #323 from xhochy/ARROW-535 and squashes the following commits:

72221fa [Uwe L. Korn] Address review comments
5d3c046 [Uwe L. Korn] ARROW-535: [Python] Add type mapping for NPY_LONGLONG


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/4c3481ea
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/4c3481ea
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/4c3481ea

Branch: refs/heads/master
Commit: 4c3481ea5438d52878f390b0f562f6113f111a8f
Parents: f268e92
Author: Uwe L. Korn <uw...@xhochy.com>
Authored: Tue Feb 7 11:13:00 2017 -0500
Committer: Wes McKinney <we...@twosigma.com>
Committed: Tue Feb 7 11:13:00 2017 -0500

----------------------------------------------------------------------
 python/pyarrow/tests/test_convert_pandas.py |  6 +++--
 python/src/pyarrow/adapters/pandas.cc       | 29 ++++++++++++++++++++++--
 2 files changed, 31 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/4c3481ea/python/pyarrow/tests/test_convert_pandas.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py
index f04fbe5..960653d 100644
--- a/python/pyarrow/tests/test_convert_pandas.py
+++ b/python/pyarrow/tests/test_convert_pandas.py
@@ -127,13 +127,14 @@ class TestPandasConversion(unittest.TestCase):
         tm.assert_frame_equal(result, ex_frame)
 
     def test_integer_no_nulls(self):
-        data = {}
+        data = OrderedDict()
         fields = []
 
         numpy_dtypes = [('i1', A.int8()), ('i2', A.int16()),
                         ('i4', A.int32()), ('i8', A.int64()),
                         ('u1', A.uint8()), ('u2', A.uint16()),
-                        ('u4', A.uint32()), ('u8', A.uint64())]
+                        ('u4', A.uint32()), ('u8', A.uint64()),
+                        ('longlong', A.int64()), ('ulonglong', A.uint64())]
         num_values = 100
 
         for dtype, arrow_dtype in numpy_dtypes:
@@ -148,6 +149,7 @@ class TestPandasConversion(unittest.TestCase):
         schema = A.Schema.from_fields(fields)
         self._check_pandas_roundtrip(df, expected_schema=schema)
 
+
     def test_integer_with_nulls(self):
         # pandas requires upcast to float dtype
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/4c3481ea/python/src/pyarrow/adapters/pandas.cc
----------------------------------------------------------------------
diff --git a/python/src/pyarrow/adapters/pandas.cc b/python/src/pyarrow/adapters/pandas.cc
index 345dc90..b4e0d2f 100644
--- a/python/src/pyarrow/adapters/pandas.cc
+++ b/python/src/pyarrow/adapters/pandas.cc
@@ -91,11 +91,17 @@ NPY_INT_DECL(INT8, Int8, int8_t);
 NPY_INT_DECL(INT16, Int16, int16_t);
 NPY_INT_DECL(INT32, Int32, int32_t);
 NPY_INT_DECL(INT64, Int64, int64_t);
+
 NPY_INT_DECL(UINT8, UInt8, uint8_t);
 NPY_INT_DECL(UINT16, UInt16, uint16_t);
 NPY_INT_DECL(UINT32, UInt32, uint32_t);
 NPY_INT_DECL(UINT64, UInt64, uint64_t);
 
+#if NPY_INT64 != NPY_LONGLONG
+NPY_INT_DECL(LONGLONG, Int64, int64_t);
+NPY_INT_DECL(ULONGLONG, UInt64, uint64_t);
+#endif
+
 template <>
 struct npy_traits<NPY_FLOAT32> {
   typedef float value_type;
@@ -1706,16 +1712,35 @@ Status PandasToArrow(arrow::MemoryPool* pool, PyObject* ao, PyObject* mo,
     return Status::Invalid("only handle 1-dimensional arrays");
   }
 
-  switch (PyArray_DESCR(arr)->type_num) {
+  int type_num = PyArray_DESCR(arr)->type_num;
+
+#if (NPY_INT64 == NPY_LONGLONG) && (NPY_SIZEOF_LONGLONG == 8)
+  // Both LONGLONG and INT64 can be observed in the wild, which is buggy. We set
+  // U/LONGLONG to U/INT64 so things work properly.
+  if (type_num == NPY_LONGLONG) {
+    type_num = NPY_INT64;
+  }
+  if (type_num == NPY_ULONGLONG) {
+    type_num = NPY_UINT64;
+  }
+#endif
+
+  switch (type_num) {
     TO_ARROW_CASE(BOOL);
     TO_ARROW_CASE(INT8);
     TO_ARROW_CASE(INT16);
     TO_ARROW_CASE(INT32);
     TO_ARROW_CASE(INT64);
+#if (NPY_INT64 != NPY_LONGLONG)
+	TO_ARROW_CASE(LONGLONG);
+#endif
     TO_ARROW_CASE(UINT8);
     TO_ARROW_CASE(UINT16);
     TO_ARROW_CASE(UINT32);
     TO_ARROW_CASE(UINT64);
+#if (NPY_UINT64 != NPY_ULONGLONG)
+	TO_ARROW_CASE(ULONGLONG);
+#endif
     TO_ARROW_CASE(FLOAT32);
     TO_ARROW_CASE(FLOAT64);
     TO_ARROW_CASE(DATETIME);
@@ -1726,7 +1751,7 @@ Status PandasToArrow(arrow::MemoryPool* pool, PyObject* ao, PyObject* mo,
     } break;
     default:
       std::stringstream ss;
-      ss << "unsupported type " << PyArray_DESCR(arr)->type_num << std::endl;
+      ss << "Unsupported numpy type " << PyArray_DESCR(arr)->type_num << std::endl;
       return Status::NotImplemented(ss.str());
   }
   return Status::OK();