You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/04/02 15:59:27 UTC
arrow git commit: ARROW-718: [Python] Implement pyarrow.Tensor
container, zero-copy NumPy roundtrips
Repository: arrow
Updated Branches:
refs/heads/master 7fec7d30c -> e333576a0
ARROW-718: [Python] Implement pyarrow.Tensor container, zero-copy NumPy roundtrips
Author: Wes McKinney <we...@twosigma.com>
Closes #477 from wesm/ARROW-718 and squashes the following commits:
2c23427 [Wes McKinney] Restore clang-format-3.9 formatting
eb21a17 [Wes McKinney] Finish basic tensor zero-copy roundtrips, simple repr. flake8 fixes
4cf6d2b [Wes McKinney] Draft tensor conversion to/from numpy
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/e333576a
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/e333576a
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/e333576a
Branch: refs/heads/master
Commit: e333576a0d215e97cc4e2a218ddc56ee1242986d
Parents: 7fec7d3
Author: Wes McKinney <we...@twosigma.com>
Authored: Sun Apr 2 11:59:18 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Sun Apr 2 11:59:18 2017 -0400
----------------------------------------------------------------------
cpp/src/arrow/CMakeLists.txt | 1 +
cpp/src/arrow/api.h | 1 +
cpp/src/arrow/python/CMakeLists.txt | 10 +
cpp/src/arrow/python/api.h | 1 +
cpp/src/arrow/python/common.h | 17 --
cpp/src/arrow/python/helpers.cc | 1 +
cpp/src/arrow/python/numpy_convert.cc | 267 ++++++++++++++++++++++
cpp/src/arrow/python/numpy_convert.h | 69 ++++++
cpp/src/arrow/python/pandas_convert.cc | 88 +------
cpp/src/arrow/python/pandas_convert.h | 3 -
cpp/src/arrow/python/python-test.cc | 2 +-
cpp/src/arrow/tensor.cc | 14 +-
cpp/src/arrow/tensor.h | 7 +
python/pyarrow/__init__.py | 6 +-
python/pyarrow/array.pxd | 13 +-
python/pyarrow/array.pyx | 97 +++++++-
python/pyarrow/includes/libarrow.pxd | 17 ++
python/pyarrow/includes/pyarrow.pxd | 12 +-
python/pyarrow/io.pyx | 2 +-
python/pyarrow/schema.pyx | 14 +-
python/pyarrow/table.pyx | 2 +-
python/pyarrow/tests/pandas_examples.py | 5 +-
python/pyarrow/tests/test_convert_builtin.py | 2 +-
python/pyarrow/tests/test_convert_pandas.py | 15 +-
python/pyarrow/tests/test_feather.py | 2 -
python/pyarrow/tests/test_jemalloc.py | 17 +-
26 files changed, 541 insertions(+), 144 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/e333576a/cpp/src/arrow/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index bd33bf5..8eaa76a 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -28,6 +28,7 @@ install(FILES
pretty_print.h
status.h
table.h
+ tensor.h
type.h
type_fwd.h
type_traits.h
http://git-wip-us.apache.org/repos/asf/arrow/blob/e333576a/cpp/src/arrow/api.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/api.h b/cpp/src/arrow/api.h
index 50a0951..aa0da75 100644
--- a/cpp/src/arrow/api.h
+++ b/cpp/src/arrow/api.h
@@ -29,6 +29,7 @@
#include "arrow/pretty_print.h"
#include "arrow/status.h"
#include "arrow/table.h"
+#include "arrow/tensor.h"
#include "arrow/type.h"
#include "arrow/visitor.h"
http://git-wip-us.apache.org/repos/asf/arrow/blob/e333576a/cpp/src/arrow/python/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/CMakeLists.txt b/cpp/src/arrow/python/CMakeLists.txt
index faaad89..a8b4cc7 100644
--- a/cpp/src/arrow/python/CMakeLists.txt
+++ b/cpp/src/arrow/python/CMakeLists.txt
@@ -55,6 +55,7 @@ set(ARROW_PYTHON_SRCS
config.cc
helpers.cc
io.cc
+ numpy_convert.cc
pandas_convert.cc
)
@@ -71,6 +72,14 @@ ADD_ARROW_LIB(arrow_python
STATIC_LINK_LIBS ${ARROW_IO_SHARED_PRIVATE_LINK_LIBS}
)
+if ("${COMPILER_FAMILY}" STREQUAL "clang")
+ # Clang, be quiet. Python C API has lots of macros
+ set_property(SOURCE ${ARROW_PYTHON_SRCS}
+ APPEND_STRING
+ PROPERTY
+ COMPILE_FLAGS -Wno-parentheses-equality)
+endif()
+
install(FILES
api.h
builtin_convert.h
@@ -79,6 +88,7 @@ install(FILES
do_import_numpy.h
helpers.h
io.h
+ numpy_convert.h
numpy_interop.h
pandas_convert.h
type_traits.h
http://git-wip-us.apache.org/repos/asf/arrow/blob/e333576a/cpp/src/arrow/python/api.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/api.h b/cpp/src/arrow/python/api.h
index f4f1c0c..895d1f4 100644
--- a/cpp/src/arrow/python/api.h
+++ b/cpp/src/arrow/python/api.h
@@ -22,6 +22,7 @@
#include "arrow/python/common.h"
#include "arrow/python/helpers.h"
#include "arrow/python/io.h"
+#include "arrow/python/numpy_convert.h"
#include "arrow/python/pandas_convert.h"
#endif // ARROW_PYTHON_API_H
http://git-wip-us.apache.org/repos/asf/arrow/blob/e333576a/cpp/src/arrow/python/common.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/common.h b/cpp/src/arrow/python/common.h
index f1be471..32bfa78 100644
--- a/cpp/src/arrow/python/common.h
+++ b/cpp/src/arrow/python/common.h
@@ -103,23 +103,6 @@ struct PyObjectStringify {
ARROW_EXPORT void set_default_memory_pool(MemoryPool* pool);
ARROW_EXPORT MemoryPool* get_memory_pool();
-class ARROW_EXPORT NumPyBuffer : public Buffer {
- public:
- explicit NumPyBuffer(PyArrayObject* arr) : Buffer(nullptr, 0) {
- arr_ = arr;
- Py_INCREF(arr);
-
- data_ = reinterpret_cast<const uint8_t*>(PyArray_DATA(arr_));
- size_ = PyArray_SIZE(arr_) * PyArray_DESCR(arr_)->elsize;
- capacity_ = size_;
- }
-
- virtual ~NumPyBuffer() { Py_XDECREF(arr_); }
-
- private:
- PyArrayObject* arr_;
-};
-
class ARROW_EXPORT PyBuffer : public Buffer {
public:
/// Note that the GIL must be held when calling the PyBuffer constructor.
http://git-wip-us.apache.org/repos/asf/arrow/blob/e333576a/cpp/src/arrow/python/helpers.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/helpers.cc b/cpp/src/arrow/python/helpers.cc
index add2d9a..be5f412 100644
--- a/cpp/src/arrow/python/helpers.cc
+++ b/cpp/src/arrow/python/helpers.cc
@@ -42,6 +42,7 @@ std::shared_ptr<DataType> GetPrimitiveType(Type::type type) {
GET_PRIMITIVE_TYPE(DATE32, date32);
GET_PRIMITIVE_TYPE(DATE64, date64);
GET_PRIMITIVE_TYPE(BOOL, boolean);
+ GET_PRIMITIVE_TYPE(HALF_FLOAT, float16);
GET_PRIMITIVE_TYPE(FLOAT, float32);
GET_PRIMITIVE_TYPE(DOUBLE, float64);
GET_PRIMITIVE_TYPE(BINARY, binary);
http://git-wip-us.apache.org/repos/asf/arrow/blob/e333576a/cpp/src/arrow/python/numpy_convert.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/numpy_convert.cc b/cpp/src/arrow/python/numpy_convert.cc
new file mode 100644
index 0000000..3697819
--- /dev/null
+++ b/cpp/src/arrow/python/numpy_convert.cc
@@ -0,0 +1,267 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <Python.h>
+
+#include "arrow/python/numpy_convert.h"
+#include "arrow/python/numpy_interop.h"
+
+#include <cstdint>
+#include <memory>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "arrow/buffer.h"
+#include "arrow/tensor.h"
+#include "arrow/type.h"
+
+#include "arrow/python/common.h"
+#include "arrow/python/type_traits.h"
+
+namespace arrow {
+namespace py {
+
+bool is_contiguous(PyObject* array) {
+ if (PyArray_Check(array)) {
+ return PyArray_FLAGS(reinterpret_cast<PyArrayObject*>(array)) &
+ (NPY_ARRAY_C_CONTIGUOUS | NPY_ARRAY_F_CONTIGUOUS);
+ } else {
+ return false;
+ }
+}
+
+int cast_npy_type_compat(int type_num) {
+// Both LONGLONG and INT64 can be observed in the wild, which is buggy. We set
+// U/LONGLONG to U/INT64 so things work properly.
+
+#if (NPY_INT64 == NPY_LONGLONG) && (NPY_SIZEOF_LONGLONG == 8)
+ if (type_num == NPY_LONGLONG) { type_num = NPY_INT64; }
+ if (type_num == NPY_ULONGLONG) { type_num = NPY_UINT64; }
+#endif
+
+ return type_num;
+}
+
+NumPyBuffer::NumPyBuffer(PyObject* ao) : Buffer(nullptr, 0) {
+ arr_ = ao;
+ Py_INCREF(ao);
+
+ if (PyArray_Check(ao)) {
+ PyArrayObject* ndarray = reinterpret_cast<PyArrayObject*>(ao);
+ data_ = reinterpret_cast<const uint8_t*>(PyArray_DATA(ndarray));
+ size_ = PyArray_SIZE(ndarray) * PyArray_DESCR(ndarray)->elsize;
+ capacity_ = size_;
+
+ if (PyArray_FLAGS(ndarray) & NPY_ARRAY_WRITEABLE) { is_mutable_ = true; }
+ }
+}
+
+NumPyBuffer::~NumPyBuffer() {
+ Py_XDECREF(arr_);
+}
+
+#define TO_ARROW_TYPE_CASE(NPY_NAME, FACTORY) \
+ case NPY_##NPY_NAME: \
+ *out = FACTORY(); \
+ break;
+
+Status GetTensorType(PyObject* dtype, std::shared_ptr<DataType>* out) {
+ PyArray_Descr* descr = reinterpret_cast<PyArray_Descr*>(dtype);
+ int type_num = cast_npy_type_compat(descr->type_num);
+
+ switch (type_num) {
+ TO_ARROW_TYPE_CASE(BOOL, uint8);
+ TO_ARROW_TYPE_CASE(INT8, int8);
+ TO_ARROW_TYPE_CASE(INT16, int16);
+ TO_ARROW_TYPE_CASE(INT32, int32);
+ TO_ARROW_TYPE_CASE(INT64, int64);
+#if (NPY_INT64 != NPY_LONGLONG)
+ TO_ARROW_TYPE_CASE(LONGLONG, int64);
+#endif
+ TO_ARROW_TYPE_CASE(UINT8, uint8);
+ TO_ARROW_TYPE_CASE(UINT16, uint16);
+ TO_ARROW_TYPE_CASE(UINT32, uint32);
+ TO_ARROW_TYPE_CASE(UINT64, uint64);
+#if (NPY_UINT64 != NPY_ULONGLONG)
+ TO_ARROW_CASE(ULONGLONG);
+#endif
+ TO_ARROW_TYPE_CASE(FLOAT16, float16);
+ TO_ARROW_TYPE_CASE(FLOAT32, float32);
+ TO_ARROW_TYPE_CASE(FLOAT64, float64);
+ default: {
+ std::stringstream ss;
+ ss << "Unsupported numpy type " << descr->type_num << std::endl;
+ return Status::NotImplemented(ss.str());
+ }
+ }
+ return Status::OK();
+}
+
+Status GetNumPyType(const DataType& type, int* type_num) {
+#define NUMPY_TYPE_CASE(ARROW_NAME, NPY_NAME) \
+ case Type::ARROW_NAME: \
+ *type_num = NPY_##NPY_NAME; \
+ break;
+
+ switch (type.type) {
+ NUMPY_TYPE_CASE(UINT8, UINT8);
+ NUMPY_TYPE_CASE(INT8, INT8);
+ NUMPY_TYPE_CASE(UINT16, UINT16);
+ NUMPY_TYPE_CASE(INT16, INT16);
+ NUMPY_TYPE_CASE(UINT32, UINT32);
+ NUMPY_TYPE_CASE(INT32, INT32);
+ NUMPY_TYPE_CASE(UINT64, UINT64);
+ NUMPY_TYPE_CASE(INT64, INT64);
+ NUMPY_TYPE_CASE(HALF_FLOAT, FLOAT16);
+ NUMPY_TYPE_CASE(FLOAT, FLOAT32);
+ NUMPY_TYPE_CASE(DOUBLE, FLOAT64);
+ default: {
+ std::stringstream ss;
+ ss << "Unsupported tensor type: " << type.ToString() << std::endl;
+ return Status::NotImplemented(ss.str());
+ }
+ }
+#undef NUMPY_TYPE_CASE
+
+ return Status::OK();
+}
+
+Status NumPyDtypeToArrow(PyObject* dtype, std::shared_ptr<DataType>* out) {
+ PyArray_Descr* descr = reinterpret_cast<PyArray_Descr*>(dtype);
+
+ int type_num = cast_npy_type_compat(descr->type_num);
+
+ switch (type_num) {
+ TO_ARROW_TYPE_CASE(BOOL, boolean);
+ TO_ARROW_TYPE_CASE(INT8, int8);
+ TO_ARROW_TYPE_CASE(INT16, int16);
+ TO_ARROW_TYPE_CASE(INT32, int32);
+ TO_ARROW_TYPE_CASE(INT64, int64);
+#if (NPY_INT64 != NPY_LONGLONG)
+ TO_ARROW_TYPE_CASE(LONGLONG, int64);
+#endif
+ TO_ARROW_TYPE_CASE(UINT8, uint8);
+ TO_ARROW_TYPE_CASE(UINT16, uint16);
+ TO_ARROW_TYPE_CASE(UINT32, uint32);
+ TO_ARROW_TYPE_CASE(UINT64, uint64);
+#if (NPY_UINT64 != NPY_ULONGLONG)
+ TO_ARROW_CASE(ULONGLONG);
+#endif
+ TO_ARROW_TYPE_CASE(FLOAT32, float32);
+ TO_ARROW_TYPE_CASE(FLOAT64, float64);
+ case NPY_DATETIME: {
+ auto date_dtype =
+ reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(descr->c_metadata);
+ TimeUnit unit;
+ switch (date_dtype->meta.base) {
+ case NPY_FR_s:
+ unit = TimeUnit::SECOND;
+ break;
+ case NPY_FR_ms:
+ unit = TimeUnit::MILLI;
+ break;
+ case NPY_FR_us:
+ unit = TimeUnit::MICRO;
+ break;
+ case NPY_FR_ns:
+ unit = TimeUnit::NANO;
+ break;
+ default:
+ return Status::NotImplemented("Unsupported datetime64 time unit");
+ }
+ *out = timestamp(unit);
+ } break;
+ default: {
+ std::stringstream ss;
+ ss << "Unsupported numpy type " << descr->type_num << std::endl;
+ return Status::NotImplemented(ss.str());
+ }
+ }
+
+ return Status::OK();
+}
+
+#undef TO_ARROW_TYPE_CASE
+
+Status NdarrayToTensor(MemoryPool* pool, PyObject* ao, std::shared_ptr<Tensor>* out) {
+ if (!PyArray_Check(ao)) { return Status::TypeError("Did not pass ndarray object"); }
+
+ PyArrayObject* ndarray = reinterpret_cast<PyArrayObject*>(ao);
+
+ // TODO(wesm): What do we want to do with non-contiguous memory and negative strides?
+
+ int ndim = PyArray_NDIM(ndarray);
+
+ std::shared_ptr<Buffer> data = std::make_shared<NumPyBuffer>(ao);
+ std::vector<int64_t> shape(ndim);
+ std::vector<int64_t> strides(ndim);
+
+ npy_intp* array_strides = PyArray_STRIDES(ndarray);
+ npy_intp* array_shape = PyArray_SHAPE(ndarray);
+ for (int i = 0; i < ndim; ++i) {
+ if (array_strides[i] < 0) {
+ return Status::Invalid("Negative ndarray strides not supported");
+ }
+ shape[i] = array_shape[i];
+ strides[i] = array_strides[i];
+ }
+
+ std::shared_ptr<DataType> type;
+ RETURN_NOT_OK(
+ GetTensorType(reinterpret_cast<PyObject*>(PyArray_DESCR(ndarray)), &type));
+ return MakeTensor(type, data, shape, strides, {}, out);
+}
+
+Status TensorToNdarray(const Tensor& tensor, PyObject* base, PyObject** out) {
+ int type_num;
+ RETURN_NOT_OK(GetNumPyType(*tensor.type(), &type_num));
+ PyArray_Descr* dtype = PyArray_DescrNewFromType(type_num);
+ RETURN_IF_PYERROR();
+
+ std::vector<npy_intp> npy_shape(tensor.ndim());
+ std::vector<npy_intp> npy_strides(tensor.ndim());
+
+ for (int i = 0; i < tensor.ndim(); ++i) {
+ npy_shape[i] = tensor.shape()[i];
+ npy_strides[i] = tensor.strides()[i];
+ }
+
+ const void* immutable_data = nullptr;
+ if (tensor.data()) { immutable_data = tensor.data()->data(); }
+
+ // Remove const =(
+ void* mutable_data = const_cast<void*>(immutable_data);
+
+ int array_flags = 0;
+ if (tensor.is_row_major()) { array_flags |= NPY_ARRAY_C_CONTIGUOUS; }
+ if (tensor.is_column_major()) { array_flags |= NPY_ARRAY_F_CONTIGUOUS; }
+ if (tensor.is_mutable()) { array_flags |= NPY_ARRAY_WRITEABLE; }
+
+ PyObject* result = PyArray_NewFromDescr(&PyArray_Type, dtype, tensor.ndim(),
+ npy_shape.data(), npy_strides.data(), mutable_data, array_flags, nullptr);
+ RETURN_IF_PYERROR()
+
+ if (base != Py_None) {
+ PyArray_SetBaseObject(reinterpret_cast<PyArrayObject*>(result), base);
+ }
+ *out = result;
+ return Status::OK();
+}
+
+} // namespace py
+} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/e333576a/cpp/src/arrow/python/numpy_convert.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/numpy_convert.h b/cpp/src/arrow/python/numpy_convert.h
new file mode 100644
index 0000000..685a626
--- /dev/null
+++ b/cpp/src/arrow/python/numpy_convert.h
@@ -0,0 +1,69 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Functions for converting between pandas's NumPy-based data representation
+// and Arrow data structures
+
+#ifndef ARROW_PYTHON_NUMPY_CONVERT_H
+#define ARROW_PYTHON_NUMPY_CONVERT_H
+
+#include <Python.h>
+
+#include <memory>
+#include <string>
+
+#include "arrow/buffer.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+struct DataType;
+class MemoryPool;
+class Status;
+class Tensor;
+
+namespace py {
+
+class ARROW_EXPORT NumPyBuffer : public Buffer {
+ public:
+ explicit NumPyBuffer(PyObject* arr);
+ virtual ~NumPyBuffer();
+
+ private:
+ PyObject* arr_;
+};
+
+// Handle misbehaved types like LONGLONG and ULONGLONG
+int cast_npy_type_compat(int type_num);
+
+bool is_contiguous(PyObject* array);
+
+ARROW_EXPORT
+Status NumPyDtypeToArrow(PyObject* dtype, std::shared_ptr<DataType>* out);
+
+Status GetTensorType(PyObject* dtype, std::shared_ptr<DataType>* out);
+Status GetNumPyType(const DataType& type, int* type_num);
+
+ARROW_EXPORT Status NdarrayToTensor(
+ MemoryPool* pool, PyObject* ao, std::shared_ptr<Tensor>* out);
+
+ARROW_EXPORT Status TensorToNdarray(const Tensor& tensor, PyObject* base, PyObject** out);
+
+} // namespace py
+} // namespace arrow
+
+#endif // ARROW_PYTHON_NUMPY_CONVERT_H
http://git-wip-us.apache.org/repos/asf/arrow/blob/e333576a/cpp/src/arrow/python/pandas_convert.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/pandas_convert.cc b/cpp/src/arrow/python/pandas_convert.cc
index ddfec1b..01019e5 100644
--- a/cpp/src/arrow/python/pandas_convert.cc
+++ b/cpp/src/arrow/python/pandas_convert.cc
@@ -36,11 +36,6 @@
#include "arrow/array.h"
#include "arrow/loader.h"
-#include "arrow/python/builtin_convert.h"
-#include "arrow/python/common.h"
-#include "arrow/python/config.h"
-#include "arrow/python/type_traits.h"
-#include "arrow/python/util/datetime.h"
#include "arrow/status.h"
#include "arrow/table.h"
#include "arrow/type_fwd.h"
@@ -49,24 +44,19 @@
#include "arrow/util/logging.h"
#include "arrow/util/macros.h"
+#include "arrow/python/builtin_convert.h"
+#include "arrow/python/common.h"
+#include "arrow/python/config.h"
+#include "arrow/python/numpy_convert.h"
+#include "arrow/python/type_traits.h"
+#include "arrow/python/util/datetime.h"
+
namespace arrow {
namespace py {
// ----------------------------------------------------------------------
// Utility code
-int cast_npy_type_compat(int type_num) {
-// Both LONGLONG and INT64 can be observed in the wild, which is buggy. We set
-// U/LONGLONG to U/INT64 so things work properly.
-
-#if (NPY_INT64 == NPY_LONGLONG) && (NPY_SIZEOF_LONGLONG == 8)
- if (type_num == NPY_LONGLONG) { type_num = NPY_INT64; }
- if (type_num == NPY_ULONGLONG) { type_num = NPY_UINT64; }
-#endif
-
- return type_num;
-}
-
static inline bool PyObject_is_null(const PyObject* obj) {
return obj == Py_None || obj == numpy_nan;
}
@@ -395,7 +385,7 @@ inline Status PandasConverter::ConvertData(std::shared_ptr<Buffer>* data) {
return Status::NotImplemented("NumPy type casts not yet implemented");
}
- *data = std::make_shared<NumPyBuffer>(arr_);
+ *data = std::make_shared<NumPyBuffer>(reinterpret_cast<PyObject*>(arr_));
return Status::OK();
}
@@ -730,68 +720,6 @@ Status PandasObjectsToArrow(MemoryPool* pool, PyObject* ao, PyObject* mo,
return converter.ConvertObjects(out);
}
-Status PandasDtypeToArrow(PyObject* dtype, std::shared_ptr<DataType>* out) {
- PyArray_Descr* descr = reinterpret_cast<PyArray_Descr*>(dtype);
-
- int type_num = cast_npy_type_compat(descr->type_num);
-
-#define TO_ARROW_TYPE_CASE(NPY_NAME, FACTORY) \
- case NPY_##NPY_NAME: \
- *out = FACTORY(); \
- break;
-
- switch (type_num) {
- TO_ARROW_TYPE_CASE(BOOL, boolean);
- TO_ARROW_TYPE_CASE(INT8, int8);
- TO_ARROW_TYPE_CASE(INT16, int16);
- TO_ARROW_TYPE_CASE(INT32, int32);
- TO_ARROW_TYPE_CASE(INT64, int64);
-#if (NPY_INT64 != NPY_LONGLONG)
- TO_ARROW_TYPE_CASE(LONGLONG, int64);
-#endif
- TO_ARROW_TYPE_CASE(UINT8, uint8);
- TO_ARROW_TYPE_CASE(UINT16, uint16);
- TO_ARROW_TYPE_CASE(UINT32, uint32);
- TO_ARROW_TYPE_CASE(UINT64, uint64);
-#if (NPY_UINT64 != NPY_ULONGLONG)
- TO_ARROW_CASE(ULONGLONG);
-#endif
- TO_ARROW_TYPE_CASE(FLOAT32, float32);
- TO_ARROW_TYPE_CASE(FLOAT64, float64);
- case NPY_DATETIME: {
- auto date_dtype =
- reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(descr->c_metadata);
- TimeUnit unit;
- switch (date_dtype->meta.base) {
- case NPY_FR_s:
- unit = TimeUnit::SECOND;
- break;
- case NPY_FR_ms:
- unit = TimeUnit::MILLI;
- break;
- case NPY_FR_us:
- unit = TimeUnit::MICRO;
- break;
- case NPY_FR_ns:
- unit = TimeUnit::NANO;
- break;
- default:
- return Status::NotImplemented("Unsupported datetime64 time unit");
- }
- *out = timestamp(unit);
- } break;
- default: {
- std::stringstream ss;
- ss << "Unsupported numpy type " << descr->type_num << std::endl;
- return Status::NotImplemented(ss.str());
- }
- }
-
-#undef TO_ARROW_TYPE_CASE
-
- return Status::OK();
-}
-
// ----------------------------------------------------------------------
// pandas 0.x DataFrame conversion internals
http://git-wip-us.apache.org/repos/asf/arrow/blob/e333576a/cpp/src/arrow/python/pandas_convert.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/pandas_convert.h b/cpp/src/arrow/python/pandas_convert.h
index 105c159..8fd3107 100644
--- a/cpp/src/arrow/python/pandas_convert.h
+++ b/cpp/src/arrow/python/pandas_convert.h
@@ -62,9 +62,6 @@ Status ConvertTableToPandas(
const std::shared_ptr<Table>& table, int nthreads, PyObject** out);
ARROW_EXPORT
-Status PandasDtypeToArrow(PyObject* dtype, std::shared_ptr<DataType>* out);
-
-ARROW_EXPORT
Status PandasToArrow(MemoryPool* pool, PyObject* ao, PyObject* mo,
const std::shared_ptr<DataType>& type, std::shared_ptr<Array>* out);
http://git-wip-us.apache.org/repos/asf/arrow/blob/e333576a/cpp/src/arrow/python/python-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/python-test.cc b/cpp/src/arrow/python/python-test.cc
index 01e30f5..f269ebf 100644
--- a/cpp/src/arrow/python/python-test.cc
+++ b/cpp/src/arrow/python/python-test.cc
@@ -26,9 +26,9 @@
#include "arrow/table.h"
#include "arrow/test-util.h"
+#include "arrow/python/builtin_convert.h"
#include "arrow/python/common.h"
#include "arrow/python/pandas_convert.h"
-#include "arrow/python/builtin_convert.h"
namespace arrow {
namespace py {
http://git-wip-us.apache.org/repos/asf/arrow/blob/e333576a/cpp/src/arrow/tensor.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/tensor.cc b/cpp/src/arrow/tensor.cc
index 9a8de51..8bbb97b 100644
--- a/cpp/src/arrow/tensor.cc
+++ b/cpp/src/arrow/tensor.cc
@@ -90,13 +90,21 @@ int64_t Tensor::size() const {
}
bool Tensor::is_contiguous() const {
- std::vector<int64_t> c_strides;
- std::vector<int64_t> f_strides;
+ return is_row_major() || is_column_major();
+}
+bool Tensor::is_row_major() const {
+ std::vector<int64_t> c_strides;
const auto& fw_type = static_cast<const FixedWidthType&>(*type_);
ComputeRowMajorStrides(fw_type, shape_, &c_strides);
+ return strides_ == c_strides;
+}
+
+bool Tensor::is_column_major() const {
+ std::vector<int64_t> f_strides;
+ const auto& fw_type = static_cast<const FixedWidthType&>(*type_);
ComputeColumnMajorStrides(fw_type, shape_, &f_strides);
- return strides_ == c_strides || strides_ == f_strides;
+ return strides_ == f_strides;
}
bool Tensor::Equals(const Tensor& other) const {
http://git-wip-us.apache.org/repos/asf/arrow/blob/e333576a/cpp/src/arrow/tensor.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/tensor.h b/cpp/src/arrow/tensor.h
index eeb5c3e..12015f1 100644
--- a/cpp/src/arrow/tensor.h
+++ b/cpp/src/arrow/tensor.h
@@ -89,8 +89,15 @@ class ARROW_EXPORT Tensor {
/// Return true if the underlying data buffer is mutable
bool is_mutable() const { return data_->is_mutable(); }
+ /// Either row major or column major
bool is_contiguous() const;
+ /// AKA "C order"
+ bool is_row_major() const;
+
+ /// AKA "Fortran order"
+ bool is_column_major() const;
+
Type::type type_enum() const { return type_->type; }
bool Equals(const Tensor& other) const;
http://git-wip-us.apache.org/repos/asf/arrow/blob/e333576a/python/pyarrow/__init__.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 3df2a1d..5215028 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -28,8 +28,7 @@ except DistributionNotFound:
import pyarrow.config
from pyarrow.config import cpu_count, set_cpu_count
-from pyarrow.array import (Array,
- from_pandas_series, from_pylist,
+from pyarrow.array import (Array, Tensor, from_pylist,
NumericArray, IntegerArray, FloatingPointArray,
BooleanArray,
Int8Array, UInt8Array,
@@ -63,7 +62,8 @@ from pyarrow.schema import (null, bool_,
int8, int16, int32, int64,
uint8, uint16, uint32, uint64,
timestamp, date32, date64,
- float_, double, binary, string,
+ float16, float32, float64,
+ binary, string,
list_, struct, dictionary, field,
DataType, FixedSizeBinaryType,
Field, Schema, schema)
http://git-wip-us.apache.org/repos/asf/arrow/blob/e333576a/python/pyarrow/array.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/array.pxd b/python/pyarrow/array.pxd
index 0b5f33d..4267563 100644
--- a/python/pyarrow/array.pxd
+++ b/python/pyarrow/array.pxd
@@ -16,7 +16,7 @@
# under the License.
from pyarrow.includes.common cimport shared_ptr, int64_t
-from pyarrow.includes.libarrow cimport CArray
+from pyarrow.includes.libarrow cimport CArray, CTensor
from pyarrow.scalar import NA
@@ -41,6 +41,17 @@ cdef class Array:
cdef getitem(self, int64_t i)
+cdef class Tensor:
+ cdef:
+ shared_ptr[CTensor] sp_tensor
+ CTensor* tp
+
+ cdef readonly:
+ DataType type
+
+ cdef init(self, const shared_ptr[CTensor]& sp_tensor)
+
+
cdef object box_array(const shared_ptr[CArray]& sp_array)
http://git-wip-us.apache.org/repos/asf/arrow/blob/e333576a/python/pyarrow/array.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/array.pyx b/python/pyarrow/array.pyx
index b9799f1..398e4cb 100644
--- a/python/pyarrow/array.pyx
+++ b/python/pyarrow/array.pyx
@@ -81,7 +81,7 @@ cdef class Array:
self.type = box_data_type(self.sp_array.get().type())
@staticmethod
- def from_pandas(obj, mask=None, DataType type=None,
+ def from_numpy(obj, mask=None, DataType type=None,
timestamps_to_ms=False,
MemoryPool memory_pool=None):
"""
@@ -116,7 +116,7 @@ cdef class Array:
>>> import pandas as pd
>>> import pyarrow as pa
- >>> pa.Array.from_pandas(pd.Series([1, 2]))
+ >>> pa.Array.from_numpy(pd.Series([1, 2]))
<pyarrow.array.Int64Array object at 0x7f674e4c0e10>
[
1,
@@ -124,7 +124,7 @@ cdef class Array:
]
>>> import numpy as np
- >>> pa.Array.from_pandas(pd.Series([1, 2]), np.array([0, 1],
+ >>> pa.Array.from_numpy(pd.Series([1, 2]), np.array([0, 1],
... dtype=bool))
<pyarrow.array.Int64Array object at 0x7f9019e11208>
[
@@ -166,7 +166,7 @@ cdef class Array:
values, obj.dtype, type, timestamps_to_ms=timestamps_to_ms)
if type is None:
- check_status(pyarrow.PandasDtypeToArrow(values.dtype, &c_type))
+ check_status(pyarrow.NumPyDtypeToArrow(values.dtype, &c_type))
else:
c_type = type.sp_type
@@ -316,6 +316,77 @@ cdef class Array:
return [x.as_py() for x in self]
+cdef class Tensor:
+
+ cdef init(self, const shared_ptr[CTensor]& sp_tensor):
+ self.sp_tensor = sp_tensor
+ self.tp = sp_tensor.get()
+ self.type = box_data_type(self.tp.type())
+
+ def __repr__(self):
+ return """<pyarrow.Tensor>
+type: {0}
+shape: {1}
+strides: {2}""".format(self.type, self.shape, self.strides)
+
+ @staticmethod
+ def from_numpy(obj):
+ cdef shared_ptr[CTensor] ctensor
+ check_status(pyarrow.NdarrayToTensor(default_memory_pool(),
+ obj, &ctensor))
+ return box_tensor(ctensor)
+
+ def to_numpy(self):
+ """
+ Convert arrow::Tensor to numpy.ndarray with zero copy
+ """
+ cdef:
+ PyObject* out
+
+ check_status(pyarrow.TensorToNdarray(deref(self.tp), <PyObject*> self,
+ &out))
+ return PyObject_to_object(out)
+
+ property is_mutable:
+
+ def __get__(self):
+ return self.tp.is_mutable()
+
+ property is_contiguous:
+
+ def __get__(self):
+ return self.tp.is_contiguous()
+
+ property ndim:
+
+ def __get__(self):
+ return self.tp.ndim()
+
+ property size:
+
+ def __get__(self):
+ return self.tp.size()
+
+ property shape:
+
+ def __get__(self):
+ cdef size_t i
+ py_shape = []
+ for i in range(self.tp.shape().size()):
+ py_shape.append(self.tp.shape()[i])
+ return py_shape
+
+ property strides:
+
+ def __get__(self):
+ cdef size_t i
+ py_strides = []
+ for i in range(self.tp.strides().size()):
+ py_strides.append(self.tp.strides()[i])
+ return py_strides
+
+
+
cdef wrap_array_output(PyObject* output):
cdef object obj = PyObject_to_object(output)
@@ -479,10 +550,10 @@ cdef class DictionaryArray(Array):
else:
mask = mask | (indices == -1)
- arrow_indices = Array.from_pandas(indices, mask=mask,
- memory_pool=memory_pool)
- arrow_dictionary = Array.from_pandas(dictionary,
- memory_pool=memory_pool)
+ arrow_indices = Array.from_numpy(indices, mask=mask,
+ memory_pool=memory_pool)
+ arrow_dictionary = Array.from_numpy(dictionary,
+ memory_pool=memory_pool)
if not isinstance(arrow_indices, IntegerArray):
raise ValueError('Indices must be integer type')
@@ -535,6 +606,15 @@ cdef object box_array(const shared_ptr[CArray]& sp_array):
return arr
+cdef object box_tensor(const shared_ptr[CTensor]& sp_tensor):
+ if sp_tensor.get() == NULL:
+ raise ValueError('Tensor was NULL')
+
+ cdef Tensor tensor = Tensor()
+ tensor.init(sp_tensor)
+ return tensor
+
+
cdef object get_series_values(object obj):
import pandas as pd
@@ -549,4 +629,3 @@ cdef object get_series_values(object obj):
from_pylist = Array.from_list
-from_pandas_series = Array.from_pandas
http://git-wip-us.apache.org/repos/asf/arrow/blob/e333576a/python/pyarrow/includes/libarrow.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index f549884..8da063c 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -35,6 +35,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
Type_UINT64" arrow::Type::UINT64"
Type_INT64" arrow::Type::INT64"
+ Type_HALF_FLOAT" arrow::Type::HALF_FLOAT"
Type_FLOAT" arrow::Type::FLOAT"
Type_DOUBLE" arrow::Type::DOUBLE"
@@ -282,6 +283,22 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
CStatus RemoveColumn(int i, shared_ptr[CTable]* out)
+ cdef cppclass CTensor" arrow::Tensor":
+ shared_ptr[CDataType] type()
+ shared_ptr[CBuffer] data()
+
+ const vector[int64_t]& shape()
+ const vector[int64_t]& strides()
+ int64_t size()
+
+ int ndim()
+ const c_string& dim_name(int i)
+
+ c_bool is_mutable()
+ c_bool is_contiguous()
+ Type type_enum()
+ c_bool Equals(const CTensor& other)
+
CStatus ConcatenateTables(const vector[shared_ptr[CTable]]& tables,
shared_ptr[CTable]* result)
http://git-wip-us.apache.org/repos/asf/arrow/blob/e333576a/python/pyarrow/includes/pyarrow.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/includes/pyarrow.pxd b/python/pyarrow/includes/pyarrow.pxd
index 8142c1c..9b64435 100644
--- a/python/pyarrow/includes/pyarrow.pxd
+++ b/python/pyarrow/includes/pyarrow.pxd
@@ -18,8 +18,8 @@
# distutils: language = c++
from pyarrow.includes.common cimport *
-from pyarrow.includes.libarrow cimport (CArray, CBuffer, CColumn,
- CTable, CDataType, CStatus, Type,
+from pyarrow.includes.libarrow cimport (CArray, CBuffer, CColumn, CDataType,
+ CTable, CTensor, CStatus, Type,
CMemoryPool, TimeUnit)
cimport pyarrow.includes.libarrow_io as arrow_io
@@ -34,7 +34,7 @@ cdef extern from "arrow/python/api.h" namespace "arrow::py" nogil:
shared_ptr[CArray]* out,
const shared_ptr[CDataType]& type)
- CStatus PandasDtypeToArrow(object dtype, shared_ptr[CDataType]* type)
+ CStatus NumPyDtypeToArrow(object dtype, shared_ptr[CDataType]* type)
CStatus PandasToArrow(CMemoryPool* pool, object ao, object mo,
const shared_ptr[CDataType]& type,
@@ -44,6 +44,12 @@ cdef extern from "arrow/python/api.h" namespace "arrow::py" nogil:
const shared_ptr[CDataType]& type,
shared_ptr[CArray]* out)
+ CStatus NdarrayToTensor(CMemoryPool* pool, object ao,
+ shared_ptr[CTensor]* out);
+
+ CStatus TensorToNdarray(const CTensor& tensor, PyObject* base,
+ PyObject** out)
+
CStatus ConvertArrayToPandas(const shared_ptr[CArray]& arr,
PyObject* py_ref, PyObject** out)
http://git-wip-us.apache.org/repos/asf/arrow/blob/e333576a/python/pyarrow/io.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/io.pyx b/python/pyarrow/io.pyx
index 0b27379..608b20d 100644
--- a/python/pyarrow/io.pyx
+++ b/python/pyarrow/io.pyx
@@ -1095,7 +1095,7 @@ cdef class FeatherWriter:
if isinstance(col, Array):
arr = col
else:
- arr = Array.from_pandas(col, mask=mask)
+ arr = Array.from_numpy(col, mask=mask)
cdef c_string c_name = tobytes(name)
http://git-wip-us.apache.org/repos/asf/arrow/blob/e333576a/python/pyarrow/schema.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/schema.pyx b/python/pyarrow/schema.pyx
index 06df644..253be45 100644
--- a/python/pyarrow/schema.pyx
+++ b/python/pyarrow/schema.pyx
@@ -241,7 +241,9 @@ cdef set PRIMITIVE_TYPES = set([
la.Type_UINT32, la.Type_INT32,
la.Type_UINT64, la.Type_INT64,
la.Type_TIMESTAMP, la.Type_DATE32,
- la.Type_DATE64, la.Type_FLOAT,
+ la.Type_DATE64,
+ la.Type_HALF_FLOAT,
+ la.Type_FLOAT,
la.Type_DOUBLE])
@@ -340,11 +342,15 @@ def date64():
return primitive_type(la.Type_DATE64)
-def float_():
+def float16():
+ return primitive_type(la.Type_HALF_FLOAT)
+
+
+def float32():
return primitive_type(la.Type_FLOAT)
-def double():
+def float64():
return primitive_type(la.Type_DOUBLE)
@@ -452,6 +458,6 @@ cdef Schema box_schema(const shared_ptr[CSchema]& type):
def type_from_numpy_dtype(object dtype):
cdef shared_ptr[CDataType] c_type
with nogil:
- check_status(pyarrow.PandasDtypeToArrow(dtype, &c_type))
+ check_status(pyarrow.NumPyDtypeToArrow(dtype, &c_type))
return box_data_type(c_type)
http://git-wip-us.apache.org/repos/asf/arrow/blob/e333576a/python/pyarrow/table.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/table.pyx b/python/pyarrow/table.pyx
index e6fddbd..94389a7 100644
--- a/python/pyarrow/table.pyx
+++ b/python/pyarrow/table.pyx
@@ -309,7 +309,7 @@ cdef _dataframe_to_arrays(df, timestamps_to_ms, Schema schema):
if schema is not None:
type = schema.field_by_name(name).type
- arr = Array.from_pandas(col, type=type,
+ arr = Array.from_numpy(col, type=type,
timestamps_to_ms=timestamps_to_ms)
names.append(name)
arrays.append(arr)
http://git-wip-us.apache.org/repos/asf/arrow/blob/e333576a/python/pyarrow/tests/pandas_examples.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/pandas_examples.py b/python/pyarrow/tests/pandas_examples.py
index c9343fc..e081c38 100644
--- a/python/pyarrow/tests/pandas_examples.py
+++ b/python/pyarrow/tests/pandas_examples.py
@@ -37,7 +37,7 @@ def dataframe_with_arrays():
('i4', pa.int32()), ('i8', pa.int64()),
('u1', pa.uint8()), ('u2', pa.uint16()),
('u4', pa.uint32()), ('u8', pa.uint64()),
- ('f4', pa.float_()), ('f8', pa.double())]
+ ('f4', pa.float32()), ('f8', pa.float64())]
arrays = OrderedDict()
fields = []
@@ -77,6 +77,7 @@ def dataframe_with_arrays():
return df, schema
+
def dataframe_with_lists():
"""
Dataframe with list columns of every possible primtive type.
@@ -97,7 +98,7 @@ def dataframe_with_lists():
None,
[0]
]
- fields.append(pa.field('double', pa.list_(pa.double())))
+ fields.append(pa.field('double', pa.list_(pa.float64())))
arrays['double'] = [
[0., 1., 2., 3., 4., 5., 6., 7., 8., 9.],
[0., 1., 2., 3., 4.],
http://git-wip-us.apache.org/repos/asf/arrow/blob/e333576a/python/pyarrow/tests/test_convert_builtin.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py
index bb6d2d1..15fca56 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -70,7 +70,7 @@ class TestConvertList(unittest.TestCase):
arr = pyarrow.from_pylist(data)
assert len(arr) == 6
assert arr.null_count == 3
- assert arr.type == pyarrow.double()
+ assert arr.type == pyarrow.float64()
assert arr.to_pylist() == data
def test_unicode(self):
http://git-wip-us.apache.org/repos/asf/arrow/blob/e333576a/python/pyarrow/tests/test_convert_pandas.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py
index c472ee6..0b3c02e 100644
--- a/python/pyarrow/tests/test_convert_pandas.py
+++ b/python/pyarrow/tests/test_convert_pandas.py
@@ -22,7 +22,6 @@ import datetime
import unittest
import numpy as np
-import numpy.testing as npt
import pandas as pd
import pandas.util.testing as tm
@@ -78,8 +77,8 @@ class TestPandasConversion(unittest.TestCase):
def _check_array_roundtrip(self, values, expected=None,
timestamps_to_ms=False, type=None):
- arr = A.Array.from_pandas(values, timestamps_to_ms=timestamps_to_ms,
- type=type)
+ arr = A.Array.from_numpy(values, timestamps_to_ms=timestamps_to_ms,
+ type=type)
result = arr.to_pandas()
assert arr.null_count == pd.isnull(values).sum()
@@ -90,7 +89,7 @@ class TestPandasConversion(unittest.TestCase):
def test_float_no_nulls(self):
data = {}
fields = []
- dtypes = [('f4', A.float_()), ('f8', A.double())]
+ dtypes = [('f4', A.float32()), ('f8', A.float64())]
num_values = 100
for numpy_dtype, arrow_dtype in dtypes:
@@ -106,7 +105,7 @@ class TestPandasConversion(unittest.TestCase):
num_values = 100
null_mask = np.random.randint(0, 10, size=num_values) < 3
- dtypes = [('f4', A.float_()), ('f8', A.double())]
+ dtypes = [('f4', A.float32()), ('f8', A.float64())]
names = ['f4', 'f8']
expected_cols = []
@@ -115,7 +114,7 @@ class TestPandasConversion(unittest.TestCase):
for name, arrow_dtype in dtypes:
values = np.random.randn(num_values).astype(name)
- arr = A.from_pandas_series(values, null_mask)
+ arr = A.Array.from_numpy(values, null_mask)
arrays.append(arr)
fields.append(A.Field.from_py(name, arrow_dtype))
values[null_mask] = np.nan
@@ -168,7 +167,7 @@ class TestPandasConversion(unittest.TestCase):
for name in int_dtypes:
values = np.random.randint(0, 100, size=num_values)
- arr = A.from_pandas_series(values, null_mask)
+ arr = A.Array.from_numpy(values, null_mask)
arrays.append(arr)
expected = values.astype('f8')
@@ -202,7 +201,7 @@ class TestPandasConversion(unittest.TestCase):
mask = np.random.randint(0, 10, size=num_values) < 3
values = np.random.randint(0, 10, size=num_values) < 5
- arr = A.from_pandas_series(values, mask)
+ arr = A.Array.from_numpy(values, mask)
expected = values.astype(object)
expected[mask] = None
http://git-wip-us.apache.org/repos/asf/arrow/blob/e333576a/python/pyarrow/tests/test_feather.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_feather.py b/python/pyarrow/tests/test_feather.py
index dd6888f..525da34 100644
--- a/python/pyarrow/tests/test_feather.py
+++ b/python/pyarrow/tests/test_feather.py
@@ -15,8 +15,6 @@
import os
import unittest
-import pytest
-
from numpy.testing import assert_array_equal
import numpy as np
http://git-wip-us.apache.org/repos/asf/arrow/blob/e333576a/python/pyarrow/tests/test_jemalloc.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_jemalloc.py b/python/pyarrow/tests/test_jemalloc.py
index 8efd514..c6cc2cc 100644
--- a/python/pyarrow/tests/test_jemalloc.py
+++ b/python/pyarrow/tests/test_jemalloc.py
@@ -33,11 +33,15 @@ def test_different_memory_pool():
gc.collect()
bytes_before_default = pyarrow.total_allocated_bytes()
bytes_before_jemalloc = pyarrow.jemalloc.default_pool().bytes_allocated()
- array = pyarrow.from_pylist([1, None, 3, None],
- memory_pool=pyarrow.jemalloc.default_pool())
+
+ # it works
+ array = pyarrow.from_pylist([1, None, 3, None], # noqa
+ memory_pool=pyarrow.jemalloc.default_pool())
gc.collect()
assert pyarrow.total_allocated_bytes() == bytes_before_default
- assert pyarrow.jemalloc.default_pool().bytes_allocated() > bytes_before_jemalloc
+ assert (pyarrow.jemalloc.default_pool().bytes_allocated() >
+ bytes_before_jemalloc)
+
@jemalloc
def test_default_memory_pool():
@@ -47,10 +51,13 @@ def test_default_memory_pool():
old_memory_pool = pyarrow.memory.default_pool()
pyarrow.memory.set_default_pool(pyarrow.jemalloc.default_pool())
- array = pyarrow.from_pylist([1, None, 3, None])
+
+ array = pyarrow.from_pylist([1, None, 3, None]) # noqa
+
pyarrow.memory.set_default_pool(old_memory_pool)
gc.collect()
assert pyarrow.total_allocated_bytes() == bytes_before_default
- assert pyarrow.jemalloc.default_pool().bytes_allocated() > bytes_before_jemalloc
+ assert (pyarrow.jemalloc.default_pool().bytes_allocated() >
+ bytes_before_jemalloc)