You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2019/01/15 17:42:43 UTC

[arrow] branch master updated: ARROW-4258: [Python] Safe cast fails from numpy float64 array with nans to integer

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 18c0e82  ARROW-4258: [Python] Safe cast fails from numpy float64 array with nans to integer
18c0e82 is described below

commit 18c0e8241b5af5b98d7238e64753e44e43a598a8
Author: Krisztián Szűcs <sz...@gmail.com>
AuthorDate: Tue Jan 15 11:42:36 2019 -0600

    ARROW-4258: [Python] Safe cast fails from numpy float64 array with nans to integer
    
    I'll write more tests to cover all three occurrences...
    
    Author: Krisztián Szűcs <sz...@gmail.com>
    
    Closes #3395 from kszucs/ARROW-4258 and squashes the following commits:
    
    85129b41f <Krisztián Szűcs> skip test_plasma_tf_op
    6452792f2 <Krisztián Szűcs> lint
    76300d1d0 <Krisztián Szűcs> fix
---
 cpp/src/arrow/python/numpy_to_arrow.cc      | 12 ++++++------
 python/pyarrow/tests/test_convert_pandas.py |  9 +++++++++
 python/pyarrow/tests/test_plasma_tf_op.py   |  1 +
 3 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/cpp/src/arrow/python/numpy_to_arrow.cc b/cpp/src/arrow/python/numpy_to_arrow.cc
index aada6bf..a944b80 100644
--- a/cpp/src/arrow/python/numpy_to_arrow.cc
+++ b/cpp/src/arrow/python/numpy_to_arrow.cc
@@ -443,8 +443,8 @@ inline Status NumPyConverter::ConvertData(std::shared_ptr<Buffer>* data) {
   RETURN_NOT_OK(NumPyDtypeToArrow(reinterpret_cast<PyObject*>(dtype_), &input_type));
 
   if (!input_type->Equals(*type_)) {
-    RETURN_NOT_OK(CastBuffer(input_type, *data, length_, nullptr, 0, type_, cast_options_,
-                             pool_, data));
+    RETURN_NOT_OK(CastBuffer(input_type, *data, length_, null_bitmap_, null_count_, type_,
+                             cast_options_, pool_, data));
   }
 
   return Status::OK();
@@ -477,8 +477,8 @@ inline Status NumPyConverter::ConvertData<Date32Type>(std::shared_ptr<Buffer>* d
   } else {
     RETURN_NOT_OK(NumPyDtypeToArrow(reinterpret_cast<PyObject*>(dtype_), &input_type));
     if (!input_type->Equals(*type_)) {
-      RETURN_NOT_OK(CastBuffer(input_type, *data, length_, nullptr, 0, type_,
-                               cast_options_, pool_, data));
+      RETURN_NOT_OK(CastBuffer(input_type, *data, length_, null_bitmap_, null_count_,
+                               type_, cast_options_, pool_, data));
     }
   }
 
@@ -518,8 +518,8 @@ inline Status NumPyConverter::ConvertData<Date64Type>(std::shared_ptr<Buffer>* d
   } else {
     RETURN_NOT_OK(NumPyDtypeToArrow(reinterpret_cast<PyObject*>(dtype_), &input_type));
     if (!input_type->Equals(*type_)) {
-      RETURN_NOT_OK(CastBuffer(input_type, *data, length_, nullptr, 0, type_,
-                               cast_options_, pool_, data));
+      RETURN_NOT_OK(CastBuffer(input_type, *data, length_, null_bitmap_, null_count_,
+                               type_, cast_options_, pool_, data));
     }
   }
 
diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py
index 264b51c..9bee905 100644
--- a/python/pyarrow/tests/test_convert_pandas.py
+++ b/python/pyarrow/tests/test_convert_pandas.py
@@ -2224,6 +2224,15 @@ class TestConvertMisc(object):
         assert table.column('B').type == pa.int32()
 
 
+def test_safe_cast_from_float_with_nans_to_int():
+    # TODO(kszucs): write tests for creating Date32 and Date64 arrays, see
+    #               ARROW-4258 and https://github.com/apache/arrow/pull/3395
+    values = pd.Series([1, 2, None, 4])
+    arr = pa.Array.from_pandas(values, type=pa.int32(), safe=True)
+    expected = pa.array([1, 2, None, 4], type=pa.int32())
+    assert arr.equals(expected)
+
+
 def _fully_loaded_dataframe_example():
     index = pd.MultiIndex.from_arrays([
         pd.date_range('2000-01-01', periods=5).repeat(2),
diff --git a/python/pyarrow/tests/test_plasma_tf_op.py b/python/pyarrow/tests/test_plasma_tf_op.py
index e239055..53ecae2 100644
--- a/python/pyarrow/tests/test_plasma_tf_op.py
+++ b/python/pyarrow/tests/test_plasma_tf_op.py
@@ -82,6 +82,7 @@ def run_tensorflow_test_with_dtype(tf, plasma, plasma_store_name,
 
 @pytest.mark.plasma
 @pytest.mark.tensorflow
+@pytest.mark.skip(reason='Until ARROW-4259 is resolved')
 def test_plasma_tf_op(use_gpu=False):
     import pyarrow.plasma as plasma
     import tensorflow as tf