You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2018/01/26 15:28:14 UTC
[arrow] branch master updated: ARROW-2007: [Python] Implement
float32 conversions, use NumPy dtype when possible for inner arrays
This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new f680dac ARROW-2007: [Python] Implement float32 conversions, use NumPy dtype when possible for inner arrays
f680dac is described below
commit f680dac68ef5bc911499ae0b62e14c46046816a1
Author: Wes McKinney <we...@twosigma.com>
AuthorDate: Fri Jan 26 10:28:08 2018 -0500
ARROW-2007: [Python] Implement float32 conversions, use NumPy dtype when possible for inner arrays
Author: Wes McKinney <we...@twosigma.com>
Closes #1509 from wesm/ARROW-2007 and squashes the following commits:
cd12626d [Wes McKinney] Pin thrift-cpp in Appveyor
326c82e1 [Wes McKinney] Pin Thrift 0.10.0 in toolchain
e334f4e2 [Wes McKinney] Add explicit type check
db046597 [Wes McKinney] Implement float32 conversions, use NumPy dtype when possible for inner arrays rather than dispatching to the generic sequence routine
---
ci/msvc-build.bat | 2 +-
ci/travis_before_script_cpp.sh | 2 +-
cpp/src/arrow/python/builtin_convert.cc | 11 +++++++++++
cpp/src/arrow/python/numpy_to_arrow.cc | 13 ++++++++++++-
python/pyarrow/tests/test_array.py | 17 +++++++++++++++++
5 files changed, 42 insertions(+), 3 deletions(-)
diff --git a/ci/msvc-build.bat b/ci/msvc-build.bat
index 62ebcf3..94eb16a 100644
--- a/ci/msvc-build.bat
+++ b/ci/msvc-build.bat
@@ -81,7 +81,7 @@ conda info -a
conda create -n arrow -q -y python=%PYTHON% ^
six pytest setuptools numpy pandas cython ^
- thrift-cpp
+ thrift-cpp=0.10.0
if "%JOB%" == "Toolchain" (
diff --git a/ci/travis_before_script_cpp.sh b/ci/travis_before_script_cpp.sh
index fd2c164..2f164c4 100755
--- a/ci/travis_before_script_cpp.sh
+++ b/ci/travis_before_script_cpp.sh
@@ -47,7 +47,7 @@ if [ "$ARROW_TRAVIS_USE_TOOLCHAIN" == "1" ]; then
zlib \
cmake \
curl \
- thrift-cpp \
+ thrift-cpp=0.10.0 \
ninja
# HACK(wesm): We started experiencing OpenSSL failures when Miniconda was
diff --git a/cpp/src/arrow/python/builtin_convert.cc b/cpp/src/arrow/python/builtin_convert.cc
index 0879b3f..71f2fde 100644
--- a/cpp/src/arrow/python/builtin_convert.cc
+++ b/cpp/src/arrow/python/builtin_convert.cc
@@ -586,6 +586,15 @@ class TimestampConverter
TimeUnit::type unit_;
};
+class Float32Converter : public TypedConverterVisitor<FloatBuilder, Float32Converter> {
+ public:
+ Status AppendItem(const OwnedRef& item) {
+ float val = static_cast<float>(PyFloat_AsDouble(item.obj()));
+ RETURN_IF_PYERROR();
+ return typed_builder_->Append(val);
+ }
+};
+
class DoubleConverter : public TypedConverterVisitor<DoubleBuilder, DoubleConverter> {
public:
Status AppendItem(const OwnedRef& item) {
@@ -740,6 +749,8 @@ std::shared_ptr<SeqConverter> GetConverter(const std::shared_ptr<DataType>& type
case Type::TIMESTAMP:
return std::make_shared<TimestampConverter>(
static_cast<const TimestampType&>(*type).unit());
+ case Type::FLOAT:
+ return std::make_shared<Float32Converter>();
case Type::DOUBLE:
return std::make_shared<DoubleConverter>();
case Type::BINARY:
diff --git a/cpp/src/arrow/python/numpy_to_arrow.cc b/cpp/src/arrow/python/numpy_to_arrow.cc
index c5c02e3..b5a75ae 100644
--- a/cpp/src/arrow/python/numpy_to_arrow.cc
+++ b/cpp/src/arrow/python/numpy_to_arrow.cc
@@ -1008,10 +1008,21 @@ Status NumPyConverter::ConvertObjectsInfer() {
return ConvertTimes();
} else if (PyObject_IsInstance(const_cast<PyObject*>(obj), Decimal.obj())) {
return ConvertDecimals();
- } else if (PyList_Check(obj) || PyArray_Check(obj)) {
+ } else if (PyList_Check(obj)) {
std::shared_ptr<DataType> inferred_type;
RETURN_NOT_OK(InferArrowType(obj, &inferred_type));
return ConvertLists(inferred_type);
+ } else if (PyArray_Check(obj)) {
+ std::shared_ptr<DataType> inferred_type;
+ PyArray_Descr* dtype = PyArray_DESCR(reinterpret_cast<PyArrayObject*>(obj));
+
+ if (dtype->type_num == NPY_OBJECT) {
+ RETURN_NOT_OK(InferArrowType(obj, &inferred_type));
+ } else {
+ RETURN_NOT_OK(
+ NumPyDtypeToArrow(reinterpret_cast<PyObject*>(dtype), &inferred_type));
+ }
+ return ConvertLists(inferred_type);
} else {
const std::string supported_types =
"string, bool, float, int, date, time, decimal, list, array";
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index 2d99111..1d5d300 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -513,6 +513,23 @@ def test_array_from_numpy_datetimeD():
assert result.equals(expected)
+def test_array_from_py_float32():
+ data = [[1.2, 3.4], [9.0, 42.0]]
+
+ t = pa.float32()
+
+ arr1 = pa.array(data[0], type=t)
+ arr2 = pa.array(data, type=pa.list_(t))
+
+ expected1 = np.array(data[0], dtype=np.float32)
+ expected2 = pd.Series([np.array(data[0], dtype=np.float32),
+ np.array(data[1], dtype=np.float32)])
+
+ assert arr1.type == t
+ assert arr1.equals(pa.array(expected1))
+ assert arr2.equals(pa.array(expected2))
+
+
def test_array_from_numpy_ascii():
arr = np.array(['abcde', 'abc', ''], dtype='|S5')
--
To stop receiving notification emails like this one, please contact
wesm@apache.org.