You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2016/11/16 21:18:57 UTC
arrow git commit: ARROW-371: Handle pandas-nullable types correctly
Repository: arrow
Updated Branches:
refs/heads/master 48f9780a8 -> 78288b5fc
ARROW-371: Handle pandas-nullable types correctly
Author: Uwe L. Korn <uw...@xhochy.com>
Closes #205 from xhochy/ARROW-371 and squashes the following commits:
1f73e8b [Uwe L. Korn] ARROW-371: Handle pandas-nullable types correctly
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/78288b5f
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/78288b5f
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/78288b5f
Branch: refs/heads/master
Commit: 78288b5fca8ff527257e487d45c7e68f7dbd8cd2
Parents: 48f9780
Author: Uwe L. Korn <uw...@xhochy.com>
Authored: Wed Nov 16 16:18:50 2016 -0500
Committer: Wes McKinney <we...@twosigma.com>
Committed: Wed Nov 16 16:18:50 2016 -0500
----------------------------------------------------------------------
python/pyarrow/tests/test_convert_pandas.py | 22 +++++++++++-
python/src/pyarrow/adapters/pandas.cc | 46 ++++++++++++------------
2 files changed, 44 insertions(+), 24 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/78288b5f/python/pyarrow/tests/test_convert_pandas.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py
index 5530299..b527ca7 100644
--- a/python/pyarrow/tests/test_convert_pandas.py
+++ b/python/pyarrow/tests/test_convert_pandas.py
@@ -165,7 +165,7 @@ class TestPandasConversion(unittest.TestCase):
expected = pd.DataFrame({'strings': values * repeats})
self._check_pandas_roundtrip(df, expected)
- def test_timestamps_notimezone(self):
+ def test_timestamps_notimezone_no_nulls(self):
df = pd.DataFrame({
'datetime64': np.array([
'2007-07-13T01:23:34.123',
@@ -184,6 +184,26 @@ class TestPandasConversion(unittest.TestCase):
})
self._check_pandas_roundtrip(df, timestamps_to_ms=False)
+ def test_timestamps_notimezone_nulls(self):
+ df = pd.DataFrame({
+ 'datetime64': np.array([
+ '2007-07-13T01:23:34.123',
+ None,
+ '2010-08-13T05:46:57.437'],
+ dtype='datetime64[ms]')
+ })
+ df.info()
+ self._check_pandas_roundtrip(df, timestamps_to_ms=True)
+
+ df = pd.DataFrame({
+ 'datetime64': np.array([
+ '2007-07-13T01:23:34.123456789',
+ None,
+ '2010-08-13T05:46:57.437699912'],
+ dtype='datetime64[ns]')
+ })
+ self._check_pandas_roundtrip(df, timestamps_to_ms=False)
+
# def test_category(self):
# repeats = 1000
# values = [b'foo', None, u'bar', 'qux', np.nan]
http://git-wip-us.apache.org/repos/asf/arrow/blob/78288b5f/python/src/pyarrow/adapters/pandas.cc
----------------------------------------------------------------------
diff --git a/python/src/pyarrow/adapters/pandas.cc b/python/src/pyarrow/adapters/pandas.cc
index 6a3966b..1f5b700 100644
--- a/python/src/pyarrow/adapters/pandas.cc
+++ b/python/src/pyarrow/adapters/pandas.cc
@@ -489,20 +489,20 @@ struct arrow_traits<arrow::Type::BOOL> {
static constexpr int npy_type = NPY_BOOL;
static constexpr bool supports_nulls = false;
static constexpr bool is_boolean = true;
- static constexpr bool is_integer = false;
- static constexpr bool is_floating = false;
+ static constexpr bool is_pandas_numeric_not_nullable = false;
+ static constexpr bool is_pandas_numeric_nullable = false;
};
-#define INT_DECL(TYPE) \
- template <> \
- struct arrow_traits<arrow::Type::TYPE> { \
- static constexpr int npy_type = NPY_##TYPE; \
- static constexpr bool supports_nulls = false; \
- static constexpr double na_value = NAN; \
- static constexpr bool is_boolean = false; \
- static constexpr bool is_integer = true; \
- static constexpr bool is_floating = false; \
- typedef typename npy_traits<NPY_##TYPE>::value_type T; \
+#define INT_DECL(TYPE) \
+ template <> \
+ struct arrow_traits<arrow::Type::TYPE> { \
+ static constexpr int npy_type = NPY_##TYPE; \
+ static constexpr bool supports_nulls = false; \
+ static constexpr double na_value = NAN; \
+ static constexpr bool is_boolean = false; \
+ static constexpr bool is_pandas_numeric_not_nullable = true; \
+ static constexpr bool is_pandas_numeric_nullable = false; \
+ typedef typename npy_traits<NPY_##TYPE>::value_type T; \
};
INT_DECL(INT8);
@@ -520,8 +520,8 @@ struct arrow_traits<arrow::Type::FLOAT> {
static constexpr bool supports_nulls = true;
static constexpr float na_value = NAN;
static constexpr bool is_boolean = false;
- static constexpr bool is_integer = false;
- static constexpr bool is_floating = true;
+ static constexpr bool is_pandas_numeric_not_nullable = false;
+ static constexpr bool is_pandas_numeric_nullable = true;
typedef typename npy_traits<NPY_FLOAT32>::value_type T;
};
@@ -531,8 +531,8 @@ struct arrow_traits<arrow::Type::DOUBLE> {
static constexpr bool supports_nulls = true;
static constexpr double na_value = NAN;
static constexpr bool is_boolean = false;
- static constexpr bool is_integer = false;
- static constexpr bool is_floating = true;
+ static constexpr bool is_pandas_numeric_not_nullable = false;
+ static constexpr bool is_pandas_numeric_nullable = true;
typedef typename npy_traits<NPY_FLOAT64>::value_type T;
};
@@ -542,8 +542,8 @@ struct arrow_traits<arrow::Type::TIMESTAMP> {
static constexpr bool supports_nulls = true;
static constexpr int64_t na_value = std::numeric_limits<int64_t>::min();
static constexpr bool is_boolean = false;
- static constexpr bool is_integer = true;
- static constexpr bool is_floating = false;
+ static constexpr bool is_pandas_numeric_not_nullable = false;
+ static constexpr bool is_pandas_numeric_nullable = true;
typedef typename npy_traits<NPY_DATETIME>::value_type T;
};
@@ -552,8 +552,8 @@ struct arrow_traits<arrow::Type::STRING> {
static constexpr int npy_type = NPY_OBJECT;
static constexpr bool supports_nulls = true;
static constexpr bool is_boolean = false;
- static constexpr bool is_integer = false;
- static constexpr bool is_floating = false;
+ static constexpr bool is_pandas_numeric_not_nullable = false;
+ static constexpr bool is_pandas_numeric_nullable = false;
};
@@ -655,7 +655,7 @@ class ArrowDeserializer {
template <int T2>
inline typename std::enable_if<
- arrow_traits<T2>::is_floating, Status>::type
+ arrow_traits<T2>::is_pandas_numeric_nullable, Status>::type
ConvertValues(const std::shared_ptr<Array>& arr) {
typedef typename arrow_traits<T2>::T T;
@@ -668,7 +668,7 @@ class ArrowDeserializer {
T* out_values = reinterpret_cast<T*>(PyArray_DATA(out_));
for (int64_t i = 0; i < arr->length(); ++i) {
- out_values[i] = arr->IsNull(i) ? NAN : in_values[i];
+ out_values[i] = arr->IsNull(i) ? arrow_traits<T2>::na_value : in_values[i];
}
} else {
// Zero-Copy. We can pass the data pointer directly to NumPy.
@@ -683,7 +683,7 @@ class ArrowDeserializer {
// Integer specialization
template <int T2>
inline typename std::enable_if<
- arrow_traits<T2>::is_integer, Status>::type
+ arrow_traits<T2>::is_pandas_numeric_not_nullable, Status>::type
ConvertValues(const std::shared_ptr<Array>& arr) {
typedef typename arrow_traits<T2>::T T;