You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2018/01/26 15:28:14 UTC

[arrow] branch master updated: ARROW-2007: [Python] Implement float32 conversions, use NumPy dtype when possible for inner arrays

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new f680dac  ARROW-2007: [Python] Implement float32 conversions, use NumPy dtype when possible for inner arrays
f680dac is described below

commit f680dac68ef5bc911499ae0b62e14c46046816a1
Author: Wes McKinney <we...@twosigma.com>
AuthorDate: Fri Jan 26 10:28:08 2018 -0500

    ARROW-2007: [Python] Implement float32 conversions, use NumPy dtype when possible for inner arrays
    
    Author: Wes McKinney <we...@twosigma.com>
    
    Closes #1509 from wesm/ARROW-2007 and squashes the following commits:
    
    cd12626d [Wes McKinney] Pin thrift-cpp in Appveyor
    326c82e1 [Wes McKinney] Pin Thrift 0.10.0 in toolchain
    e334f4e2 [Wes McKinney] Add explicit type check
    db046597 [Wes McKinney] Implement float32 conversions, use NumPy dtype when possible for inner arrays rather than dispatching to the generic sequence routine
---
 ci/msvc-build.bat                       |  2 +-
 ci/travis_before_script_cpp.sh          |  2 +-
 cpp/src/arrow/python/builtin_convert.cc | 11 +++++++++++
 cpp/src/arrow/python/numpy_to_arrow.cc  | 13 ++++++++++++-
 python/pyarrow/tests/test_array.py      | 17 +++++++++++++++++
 5 files changed, 42 insertions(+), 3 deletions(-)

diff --git a/ci/msvc-build.bat b/ci/msvc-build.bat
index 62ebcf3..94eb16a 100644
--- a/ci/msvc-build.bat
+++ b/ci/msvc-build.bat
@@ -81,7 +81,7 @@ conda info -a
 
 conda create -n arrow -q -y python=%PYTHON% ^
       six pytest setuptools numpy pandas cython ^
-      thrift-cpp
+      thrift-cpp=0.10.0
 
 if "%JOB%" == "Toolchain" (
 
diff --git a/ci/travis_before_script_cpp.sh b/ci/travis_before_script_cpp.sh
index fd2c164..2f164c4 100755
--- a/ci/travis_before_script_cpp.sh
+++ b/ci/travis_before_script_cpp.sh
@@ -47,7 +47,7 @@ if [ "$ARROW_TRAVIS_USE_TOOLCHAIN" == "1" ]; then
         zlib \
         cmake \
         curl \
-        thrift-cpp \
+        thrift-cpp=0.10.0 \
         ninja
 
   # HACK(wesm): We started experiencing OpenSSL failures when Miniconda was
diff --git a/cpp/src/arrow/python/builtin_convert.cc b/cpp/src/arrow/python/builtin_convert.cc
index 0879b3f..71f2fde 100644
--- a/cpp/src/arrow/python/builtin_convert.cc
+++ b/cpp/src/arrow/python/builtin_convert.cc
@@ -586,6 +586,15 @@ class TimestampConverter
   TimeUnit::type unit_;
 };
 
+class Float32Converter : public TypedConverterVisitor<FloatBuilder, Float32Converter> {
+ public:
+  Status AppendItem(const OwnedRef& item) {
+    float val = static_cast<float>(PyFloat_AsDouble(item.obj()));
+    RETURN_IF_PYERROR();
+    return typed_builder_->Append(val);
+  }
+};
+
 class DoubleConverter : public TypedConverterVisitor<DoubleBuilder, DoubleConverter> {
  public:
   Status AppendItem(const OwnedRef& item) {
@@ -740,6 +749,8 @@ std::shared_ptr<SeqConverter> GetConverter(const std::shared_ptr<DataType>& type
     case Type::TIMESTAMP:
       return std::make_shared<TimestampConverter>(
           static_cast<const TimestampType&>(*type).unit());
+    case Type::FLOAT:
+      return std::make_shared<Float32Converter>();
     case Type::DOUBLE:
       return std::make_shared<DoubleConverter>();
     case Type::BINARY:
diff --git a/cpp/src/arrow/python/numpy_to_arrow.cc b/cpp/src/arrow/python/numpy_to_arrow.cc
index c5c02e3..b5a75ae 100644
--- a/cpp/src/arrow/python/numpy_to_arrow.cc
+++ b/cpp/src/arrow/python/numpy_to_arrow.cc
@@ -1008,10 +1008,21 @@ Status NumPyConverter::ConvertObjectsInfer() {
       return ConvertTimes();
     } else if (PyObject_IsInstance(const_cast<PyObject*>(obj), Decimal.obj())) {
       return ConvertDecimals();
-    } else if (PyList_Check(obj) || PyArray_Check(obj)) {
+    } else if (PyList_Check(obj)) {
       std::shared_ptr<DataType> inferred_type;
       RETURN_NOT_OK(InferArrowType(obj, &inferred_type));
       return ConvertLists(inferred_type);
+    } else if (PyArray_Check(obj)) {
+      std::shared_ptr<DataType> inferred_type;
+      PyArray_Descr* dtype = PyArray_DESCR(reinterpret_cast<PyArrayObject*>(obj));
+
+      if (dtype->type_num == NPY_OBJECT) {
+        RETURN_NOT_OK(InferArrowType(obj, &inferred_type));
+      } else {
+        RETURN_NOT_OK(
+            NumPyDtypeToArrow(reinterpret_cast<PyObject*>(dtype), &inferred_type));
+      }
+      return ConvertLists(inferred_type);
     } else {
       const std::string supported_types =
           "string, bool, float, int, date, time, decimal, list, array";
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index 2d99111..1d5d300 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -513,6 +513,23 @@ def test_array_from_numpy_datetimeD():
     assert result.equals(expected)
 
 
+def test_array_from_py_float32():
+    data = [[1.2, 3.4], [9.0, 42.0]]
+
+    t = pa.float32()
+
+    arr1 = pa.array(data[0], type=t)
+    arr2 = pa.array(data, type=pa.list_(t))
+
+    expected1 = np.array(data[0], dtype=np.float32)
+    expected2 = pd.Series([np.array(data[0], dtype=np.float32),
+                           np.array(data[1], dtype=np.float32)])
+
+    assert arr1.type == t
+    assert arr1.equals(pa.array(expected1))
+    assert arr2.equals(pa.array(expected2))
+
+
 def test_array_from_numpy_ascii():
     arr = np.array(['abcde', 'abc', ''], dtype='|S5')
 

-- 
To stop receiving notification emails like this one, please contact
wesm@apache.org.