You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2016/11/16 21:18:57 UTC

arrow git commit: ARROW-371: Handle pandas-nullable types correctly

Repository: arrow
Updated Branches:
  refs/heads/master 48f9780a8 -> 78288b5fc


ARROW-371: Handle pandas-nullable types correctly

Author: Uwe L. Korn <uw...@xhochy.com>

Closes #205 from xhochy/ARROW-371 and squashes the following commits:

1f73e8b [Uwe L. Korn] ARROW-371: Handle pandas-nullable types correctly


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/78288b5f
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/78288b5f
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/78288b5f

Branch: refs/heads/master
Commit: 78288b5fca8ff527257e487d45c7e68f7dbd8cd2
Parents: 48f9780
Author: Uwe L. Korn <uw...@xhochy.com>
Authored: Wed Nov 16 16:18:50 2016 -0500
Committer: Wes McKinney <we...@twosigma.com>
Committed: Wed Nov 16 16:18:50 2016 -0500

----------------------------------------------------------------------
 python/pyarrow/tests/test_convert_pandas.py | 22 +++++++++++-
 python/src/pyarrow/adapters/pandas.cc       | 46 ++++++++++++------------
 2 files changed, 44 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/78288b5f/python/pyarrow/tests/test_convert_pandas.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py
index 5530299..b527ca7 100644
--- a/python/pyarrow/tests/test_convert_pandas.py
+++ b/python/pyarrow/tests/test_convert_pandas.py
@@ -165,7 +165,7 @@ class TestPandasConversion(unittest.TestCase):
         expected = pd.DataFrame({'strings': values * repeats})
         self._check_pandas_roundtrip(df, expected)
 
-    def test_timestamps_notimezone(self):
+    def test_timestamps_notimezone_no_nulls(self):
         df = pd.DataFrame({
             'datetime64': np.array([
                 '2007-07-13T01:23:34.123',
@@ -184,6 +184,26 @@ class TestPandasConversion(unittest.TestCase):
             })
         self._check_pandas_roundtrip(df, timestamps_to_ms=False)
 
+    def test_timestamps_notimezone_nulls(self):
+        df = pd.DataFrame({
+            'datetime64': np.array([
+                '2007-07-13T01:23:34.123',
+                None,
+                '2010-08-13T05:46:57.437'],
+                dtype='datetime64[ms]')
+            })
+        df.info()
+        self._check_pandas_roundtrip(df, timestamps_to_ms=True)
+
+        df = pd.DataFrame({
+            'datetime64': np.array([
+                '2007-07-13T01:23:34.123456789',
+                None,
+                '2010-08-13T05:46:57.437699912'],
+                dtype='datetime64[ns]')
+            })
+        self._check_pandas_roundtrip(df, timestamps_to_ms=False)
+
     # def test_category(self):
     #     repeats = 1000
     #     values = [b'foo', None, u'bar', 'qux', np.nan]

http://git-wip-us.apache.org/repos/asf/arrow/blob/78288b5f/python/src/pyarrow/adapters/pandas.cc
----------------------------------------------------------------------
diff --git a/python/src/pyarrow/adapters/pandas.cc b/python/src/pyarrow/adapters/pandas.cc
index 6a3966b..1f5b700 100644
--- a/python/src/pyarrow/adapters/pandas.cc
+++ b/python/src/pyarrow/adapters/pandas.cc
@@ -489,20 +489,20 @@ struct arrow_traits<arrow::Type::BOOL> {
   static constexpr int npy_type = NPY_BOOL;
   static constexpr bool supports_nulls = false;
   static constexpr bool is_boolean = true;
-  static constexpr bool is_integer = false;
-  static constexpr bool is_floating = false;
+  static constexpr bool is_pandas_numeric_not_nullable = false;
+  static constexpr bool is_pandas_numeric_nullable = false;
 };
 
-#define INT_DECL(TYPE)                                      \
-  template <>                                               \
-  struct arrow_traits<arrow::Type::TYPE> {              \
-    static constexpr int npy_type = NPY_##TYPE;             \
-    static constexpr bool supports_nulls = false;           \
-    static constexpr double na_value = NAN;                 \
-    static constexpr bool is_boolean = false;               \
-    static constexpr bool is_integer = true;                \
-    static constexpr bool is_floating = false;              \
-    typedef typename npy_traits<NPY_##TYPE>::value_type T;  \
+#define INT_DECL(TYPE)                                           \
+  template <>                                                    \
+  struct arrow_traits<arrow::Type::TYPE> {                       \
+    static constexpr int npy_type = NPY_##TYPE;                  \
+    static constexpr bool supports_nulls = false;                \
+    static constexpr double na_value = NAN;                      \
+    static constexpr bool is_boolean = false;                    \
+    static constexpr bool is_pandas_numeric_not_nullable = true; \
+    static constexpr bool is_pandas_numeric_nullable = false;    \
+    typedef typename npy_traits<NPY_##TYPE>::value_type T;       \
   };
 
 INT_DECL(INT8);
@@ -520,8 +520,8 @@ struct arrow_traits<arrow::Type::FLOAT> {
   static constexpr bool supports_nulls = true;
   static constexpr float na_value = NAN;
   static constexpr bool is_boolean = false;
-  static constexpr bool is_integer = false;
-  static constexpr bool is_floating = true;
+  static constexpr bool is_pandas_numeric_not_nullable = false;
+  static constexpr bool is_pandas_numeric_nullable = true;
   typedef typename npy_traits<NPY_FLOAT32>::value_type T;
 };
 
@@ -531,8 +531,8 @@ struct arrow_traits<arrow::Type::DOUBLE> {
   static constexpr bool supports_nulls = true;
   static constexpr double na_value = NAN;
   static constexpr bool is_boolean = false;
-  static constexpr bool is_integer = false;
-  static constexpr bool is_floating = true;
+  static constexpr bool is_pandas_numeric_not_nullable = false;
+  static constexpr bool is_pandas_numeric_nullable = true;
   typedef typename npy_traits<NPY_FLOAT64>::value_type T;
 };
 
@@ -542,8 +542,8 @@ struct arrow_traits<arrow::Type::TIMESTAMP> {
   static constexpr bool supports_nulls = true;
   static constexpr int64_t na_value = std::numeric_limits<int64_t>::min();
   static constexpr bool is_boolean = false;
-  static constexpr bool is_integer = true;
-  static constexpr bool is_floating = false;
+  static constexpr bool is_pandas_numeric_not_nullable = false;
+  static constexpr bool is_pandas_numeric_nullable = true;
   typedef typename npy_traits<NPY_DATETIME>::value_type T;
 };
 
@@ -552,8 +552,8 @@ struct arrow_traits<arrow::Type::STRING> {
   static constexpr int npy_type = NPY_OBJECT;
   static constexpr bool supports_nulls = true;
   static constexpr bool is_boolean = false;
-  static constexpr bool is_integer = false;
-  static constexpr bool is_floating = false;
+  static constexpr bool is_pandas_numeric_not_nullable = false;
+  static constexpr bool is_pandas_numeric_nullable = false;
 };
 
 
@@ -655,7 +655,7 @@ class ArrowDeserializer {
 
   template <int T2>
   inline typename std::enable_if<
-    arrow_traits<T2>::is_floating, Status>::type
+    arrow_traits<T2>::is_pandas_numeric_nullable, Status>::type
   ConvertValues(const std::shared_ptr<Array>& arr) {
     typedef typename arrow_traits<T2>::T T;
 
@@ -668,7 +668,7 @@ class ArrowDeserializer {
 
       T* out_values = reinterpret_cast<T*>(PyArray_DATA(out_));
       for (int64_t i = 0; i < arr->length(); ++i) {
-        out_values[i] = arr->IsNull(i) ? NAN : in_values[i];
+        out_values[i] = arr->IsNull(i) ? arrow_traits<T2>::na_value : in_values[i];
       }
     } else {
       // Zero-Copy. We can pass the data pointer directly to NumPy.
@@ -683,7 +683,7 @@ class ArrowDeserializer {
   // Integer specialization
   template <int T2>
   inline typename std::enable_if<
-    arrow_traits<T2>::is_integer, Status>::type
+    arrow_traits<T2>::is_pandas_numeric_not_nullable, Status>::type
   ConvertValues(const std::shared_ptr<Array>& arr) {
     typedef typename arrow_traits<T2>::T T;