You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2019/01/15 17:42:43 UTC
[arrow] branch master updated: ARROW-4258: [Python] Safe cast fails
from numpy float64 array with nans to integer
This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 18c0e82 ARROW-4258: [Python] Safe cast fails from numpy float64 array with nans to integer
18c0e82 is described below
commit 18c0e8241b5af5b98d7238e64753e44e43a598a8
Author: Krisztián Szűcs <sz...@gmail.com>
AuthorDate: Tue Jan 15 11:42:36 2019 -0600
ARROW-4258: [Python] Safe cast fails from numpy float64 array with nans to integer
I'll write more tests to cover all three occurrences...
Author: Krisztián Szűcs <sz...@gmail.com>
Closes #3395 from kszucs/ARROW-4258 and squashes the following commits:
85129b41f <Krisztián Szűcs> skip test_plasma_tf_op
6452792f2 <Krisztián Szűcs> lint
76300d1d0 <Krisztián Szűcs> fix
---
cpp/src/arrow/python/numpy_to_arrow.cc | 12 ++++++------
python/pyarrow/tests/test_convert_pandas.py | 9 +++++++++
python/pyarrow/tests/test_plasma_tf_op.py | 1 +
3 files changed, 16 insertions(+), 6 deletions(-)
diff --git a/cpp/src/arrow/python/numpy_to_arrow.cc b/cpp/src/arrow/python/numpy_to_arrow.cc
index aada6bf..a944b80 100644
--- a/cpp/src/arrow/python/numpy_to_arrow.cc
+++ b/cpp/src/arrow/python/numpy_to_arrow.cc
@@ -443,8 +443,8 @@ inline Status NumPyConverter::ConvertData(std::shared_ptr<Buffer>* data) {
RETURN_NOT_OK(NumPyDtypeToArrow(reinterpret_cast<PyObject*>(dtype_), &input_type));
if (!input_type->Equals(*type_)) {
- RETURN_NOT_OK(CastBuffer(input_type, *data, length_, nullptr, 0, type_, cast_options_,
- pool_, data));
+ RETURN_NOT_OK(CastBuffer(input_type, *data, length_, null_bitmap_, null_count_, type_,
+ cast_options_, pool_, data));
}
return Status::OK();
@@ -477,8 +477,8 @@ inline Status NumPyConverter::ConvertData<Date32Type>(std::shared_ptr<Buffer>* d
} else {
RETURN_NOT_OK(NumPyDtypeToArrow(reinterpret_cast<PyObject*>(dtype_), &input_type));
if (!input_type->Equals(*type_)) {
- RETURN_NOT_OK(CastBuffer(input_type, *data, length_, nullptr, 0, type_,
- cast_options_, pool_, data));
+ RETURN_NOT_OK(CastBuffer(input_type, *data, length_, null_bitmap_, null_count_,
+ type_, cast_options_, pool_, data));
}
}
@@ -518,8 +518,8 @@ inline Status NumPyConverter::ConvertData<Date64Type>(std::shared_ptr<Buffer>* d
} else {
RETURN_NOT_OK(NumPyDtypeToArrow(reinterpret_cast<PyObject*>(dtype_), &input_type));
if (!input_type->Equals(*type_)) {
- RETURN_NOT_OK(CastBuffer(input_type, *data, length_, nullptr, 0, type_,
- cast_options_, pool_, data));
+ RETURN_NOT_OK(CastBuffer(input_type, *data, length_, null_bitmap_, null_count_,
+ type_, cast_options_, pool_, data));
}
}
diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py
index 264b51c..9bee905 100644
--- a/python/pyarrow/tests/test_convert_pandas.py
+++ b/python/pyarrow/tests/test_convert_pandas.py
@@ -2224,6 +2224,15 @@ class TestConvertMisc(object):
assert table.column('B').type == pa.int32()
+def test_safe_cast_from_float_with_nans_to_int():
+ # TODO(kszucs): write tests for creating Date32 and Date64 arrays, see
+ # ARROW-4258 and https://github.com/apache/arrow/pull/3395
+ values = pd.Series([1, 2, None, 4])
+ arr = pa.Array.from_pandas(values, type=pa.int32(), safe=True)
+ expected = pa.array([1, 2, None, 4], type=pa.int32())
+ assert arr.equals(expected)
+
+
def _fully_loaded_dataframe_example():
index = pd.MultiIndex.from_arrays([
pd.date_range('2000-01-01', periods=5).repeat(2),
diff --git a/python/pyarrow/tests/test_plasma_tf_op.py b/python/pyarrow/tests/test_plasma_tf_op.py
index e239055..53ecae2 100644
--- a/python/pyarrow/tests/test_plasma_tf_op.py
+++ b/python/pyarrow/tests/test_plasma_tf_op.py
@@ -82,6 +82,7 @@ def run_tensorflow_test_with_dtype(tf, plasma, plasma_store_name,
@pytest.mark.plasma
@pytest.mark.tensorflow
+@pytest.mark.skip(reason='Until ARROW-4259 is resolved')
def test_plasma_tf_op(use_gpu=False):
import pyarrow.plasma as plasma
import tensorflow as tf