You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2019/06/28 03:32:16 UTC
[arrow] branch master updated: ARROW-4788: [C++] Less verbose API
for constructing StructArray
This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new fec7a0e ARROW-4788: [C++] Less verbose API for constructing StructArray
fec7a0e is described below
commit fec7a0eb06ff033ceede7aff75865fc1eceab7bb
Author: Antoine Pitrou <an...@python.org>
AuthorDate: Thu Jun 27 22:32:08 2019 -0500
ARROW-4788: [C++] Less verbose API for constructing StructArray
Also our first API to return a `Result<>`.
Includes appropriate massaging in the test helpers so that ASSERT_OK and ASSERT_RAISES can handle `Result<>` instances.
Author: Antoine Pitrou <an...@python.org>
Closes #4707 from pitrou/ARROW-4788-struct-array-factory and squashes the following commits:
ae5435857 <Antoine Pitrou> ARROW-4788: Less verbose API for constructing StructArray
---
cpp/src/arrow/array-struct-test.cc | 57 +++++++++++++++++++++
cpp/src/arrow/array-view-test.cc | 1 +
cpp/src/arrow/array.cc | 28 ++++++++++
cpp/src/arrow/array.h | 12 +++++
cpp/src/arrow/compute/kernels/cast-test.cc | 6 +--
cpp/src/arrow/csv/converter-test.cc | 1 +
cpp/src/arrow/extension_type-test.cc | 1 +
cpp/src/arrow/flight/flight-test.cc | 2 +-
cpp/src/arrow/python/common.h | 17 +++++++
cpp/src/arrow/python/pyarrow.cc | 5 ++
cpp/src/arrow/python/pyarrow.h | 5 ++
cpp/src/arrow/python/pyarrow_api.h | 18 +++++--
cpp/src/arrow/python/pyarrow_lib.h | 13 +----
cpp/src/arrow/python/python-test.cc | 1 +
cpp/src/arrow/result.h | 8 +++
cpp/src/arrow/testing/gtest_util.h | 62 ++++++++++++++++-------
cpp/src/arrow/util/trie-test.cc | 2 +-
cpp/src/gandiva/precompiled/decimal_ops_test.cc | 2 +-
cpp/src/parquet/arrow/arrow-reader-writer-test.cc | 1 +
python/pyarrow/_flight.pyx | 12 +++--
python/pyarrow/array.pxi | 42 ++++++---------
python/pyarrow/error.pxi | 6 +++
python/pyarrow/includes/common.pxd | 8 +++
python/pyarrow/includes/libarrow.pxd | 12 ++++-
python/pyarrow/includes/libarrow_flight.pxd | 11 ++--
python/pyarrow/tests/test_array.py | 20 ++++++++
26 files changed, 279 insertions(+), 74 deletions(-)
diff --git a/cpp/src/arrow/array-struct-test.cc b/cpp/src/arrow/array-struct-test.cc
index a182202..a5fce6e 100644
--- a/cpp/src/arrow/array-struct-test.cc
+++ b/cpp/src/arrow/array-struct-test.cc
@@ -76,6 +76,63 @@ void ValidateBasicStructArray(const StructArray* result,
}
}
+TEST(StructArray, FromFieldNames) {
+ std::shared_ptr<Array> a, b, c, array, expected;
+ a = ArrayFromJSON(int32(), "[4, null]");
+ b = ArrayFromJSON(utf8(), R"([null, "foo"])");
+
+ auto res = StructArray::Make({a, b}, {"a", "b"});
+ ASSERT_OK(res);
+ array = *res;
+ expected = ArrayFromJSON(struct_({field("a", int32()), field("b", utf8())}),
+ R"([{"a": 4, "b": null}, {"a": null, "b": "foo"}])");
+ AssertArraysEqual(*array, *expected);
+
+ // With non-zero offsets
+ res = StructArray::Make({a, b}, {"a", "b"}, /*null_bitmap =*/nullptr, /*null_count =*/0,
+ /*offset =*/1);
+ ASSERT_OK(res);
+ array = *res;
+ expected = ArrayFromJSON(struct_({field("a", int32()), field("b", utf8())}),
+ R"([{"a": null, "b": "foo"}])");
+ AssertArraysEqual(*array, *expected);
+
+ res = StructArray::Make({a, b}, {"a", "b"}, /*null_bitmap =*/nullptr, /*null_count =*/0,
+ /*offset =*/2);
+ ASSERT_OK(res);
+ array = *res;
+ expected = ArrayFromJSON(struct_({field("a", int32()), field("b", utf8())}), R"([])");
+ AssertArraysEqual(*array, *expected);
+
+ // Offset greater than length
+ res = StructArray::Make({a, b}, {"a", "b"}, /*null_bitmap =*/nullptr, /*null_count =*/0,
+ /*offset =*/3);
+ ASSERT_RAISES(IndexError, res);
+
+ // With null bitmap
+ std::shared_ptr<Buffer> null_bitmap;
+ BitmapFromVector<bool>({false, true}, &null_bitmap);
+ res = StructArray::Make({a, b}, {"a", "b"}, null_bitmap);
+ ASSERT_OK(res);
+ array = *res;
+ expected = ArrayFromJSON(struct_({field("a", int32()), field("b", utf8())}),
+ R"([null, {"a": null, "b": "foo"}])");
+ AssertArraysEqual(*array, *expected);
+
+ // Mismatching array lengths
+ c = ArrayFromJSON(int64(), "[1, 2, 3]");
+ res = StructArray::Make({a, c}, {"a", "c"});
+ ASSERT_RAISES(Invalid, res);
+
+ // Mismatching number of fields
+ res = StructArray::Make({a, b}, {"a", "b", "c"});
+ ASSERT_RAISES(Invalid, res);
+
+ // Fail on 0 children (cannot infer array length)
+ res = StructArray::Make({}, {});
+ ASSERT_RAISES(Invalid, res);
+}
+
// ----------------------------------------------------------------------------------
// Struct test
class TestStructBuilder : public TestBuilder {
diff --git a/cpp/src/arrow/array-view-test.cc b/cpp/src/arrow/array-view-test.cc
index df26ad3..f466c1d 100644
--- a/cpp/src/arrow/array-view-test.cc
+++ b/cpp/src/arrow/array-view-test.cc
@@ -24,6 +24,7 @@
#include "arrow/testing/gtest_util.h"
#include "arrow/testing/util.h"
#include "arrow/type.h"
+#include "arrow/util/logging.h"
namespace arrow {
diff --git a/cpp/src/arrow/array.cc b/cpp/src/arrow/array.cc
index 9b66af2..0f63aba 100644
--- a/cpp/src/arrow/array.cc
+++ b/cpp/src/arrow/array.cc
@@ -497,6 +497,34 @@ StructArray::StructArray(const std::shared_ptr<DataType>& type, int64_t length,
boxed_fields_.resize(children.size());
}
+Result<std::shared_ptr<Array>> StructArray::Make(
+ const std::vector<std::shared_ptr<Array>>& children,
+ const std::vector<std::string>& field_names, std::shared_ptr<Buffer> null_bitmap,
+ int64_t null_count, int64_t offset) {
+ if (children.size() != field_names.size()) {
+ return Status::Invalid("Mismatching number of field names and child arrays");
+ }
+ int64_t length = 0;
+ if (children.size() == 0) {
+ return Status::Invalid("Can't infer struct array length with 0 child arrays");
+ }
+ length = children.front()->length();
+ for (const auto& child : children) {
+ if (length != child->length()) {
+ return Status::Invalid("Mismatching child array lengths");
+ }
+ }
+ if (offset > length) {
+ return Status::IndexError("Offset greater than length of child arrays");
+ }
+ std::vector<std::shared_ptr<Field>> fields(children.size());
+ for (size_t i = 0; i < children.size(); ++i) {
+ fields[i] = ::arrow::field(field_names[i], children[i]->type());
+ }
+ return std::make_shared<StructArray>(struct_(fields), length - offset, children,
+ null_bitmap, null_count, offset);
+}
+
const StructType* StructArray::struct_type() const {
return checked_cast<const StructType*>(data_->type.get());
}
diff --git a/cpp/src/arrow/array.h b/cpp/src/arrow/array.h
index 1e163b7..2a1ce7a 100644
--- a/cpp/src/arrow/array.h
+++ b/cpp/src/arrow/array.h
@@ -28,6 +28,7 @@
#include "arrow/buffer.h"
#include "arrow/compare.h"
+#include "arrow/result.h"
#include "arrow/type.h"
#include "arrow/type_traits.h"
#include "arrow/util/bit-util.h"
@@ -822,6 +823,16 @@ class ARROW_EXPORT StructArray : public Array {
std::shared_ptr<Buffer> null_bitmap = NULLPTR,
int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+ /// \brief Return a StructArray from child arrays and field names.
+ ///
+ /// The length and data type are automatically inferred from the arguments.
+ /// There should be at least one child array.
+ static Result<std::shared_ptr<Array>> Make(
+ const std::vector<std::shared_ptr<Array>>& children,
+ const std::vector<std::string>& field_names,
+ std::shared_ptr<Buffer> null_bitmap = NULLPTR,
+ int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+
const StructType* struct_type() const;
// Return a shared pointer in case the requestor desires to share ownership
@@ -840,6 +851,7 @@ class ARROW_EXPORT StructArray : public Array {
private:
// For caching boxed child data
+ // XXX This is not handled in a thread-safe manner.
mutable std::vector<std::shared_ptr<Array>> boxed_fields_;
};
diff --git a/cpp/src/arrow/compute/kernels/cast-test.cc b/cpp/src/arrow/compute/kernels/cast-test.cc
index 778544c..510b9a4 100644
--- a/cpp/src/arrow/compute/kernels/cast-test.cc
+++ b/cpp/src/arrow/compute/kernels/cast-test.cc
@@ -99,10 +99,10 @@ class TestCast : public ComputeFixture, public TestBase {
const std::vector<I_TYPE>& in_values, const std::vector<bool>& is_valid,
const std::shared_ptr<DataType>& out_type,
const std::vector<O_TYPE>& out_values, const CastOptions& options) {
- DCHECK_EQ(in_values.size(), out_values.size());
+ ASSERT_EQ(in_values.size(), out_values.size());
std::shared_ptr<Array> input, expected;
if (is_valid.size() > 0) {
- DCHECK_EQ(is_valid.size(), out_values.size());
+ ASSERT_EQ(is_valid.size(), out_values.size());
ArrayFromVector<InType, I_TYPE>(in_type, is_valid, in_values, &input);
ArrayFromVector<OutType, O_TYPE>(out_type, is_valid, out_values, &expected);
} else {
@@ -123,7 +123,7 @@ class TestCast : public ComputeFixture, public TestBase {
const CastOptions& options = CastOptions()) {
std::shared_ptr<Array> input = ArrayFromJSON(in_type, in_json);
std::shared_ptr<Array> expected = ArrayFromJSON(out_type, expected_json);
- DCHECK_EQ(input->length(), expected->length());
+ ASSERT_EQ(input->length(), expected->length());
CheckPass(*input, *expected, out_type, options);
// Check a sliced variant
diff --git a/cpp/src/arrow/csv/converter-test.cc b/cpp/src/arrow/csv/converter-test.cc
index 531f40b..a5e4c03 100644
--- a/cpp/src/arrow/csv/converter-test.cc
+++ b/cpp/src/arrow/csv/converter-test.cc
@@ -31,6 +31,7 @@
#include "arrow/testing/gtest_util.h"
#include "arrow/type.h"
#include "arrow/util/decimal.h"
+#include "arrow/util/logging.h"
namespace arrow {
namespace csv {
diff --git a/cpp/src/arrow/extension_type-test.cc b/cpp/src/arrow/extension_type-test.cc
index 6b632a9..29c5356 100644
--- a/cpp/src/arrow/extension_type-test.cc
+++ b/cpp/src/arrow/extension_type-test.cc
@@ -43,6 +43,7 @@
#include "arrow/testing/util.h"
#include "arrow/type.h"
#include "arrow/util/key_value_metadata.h"
+#include "arrow/util/logging.h"
namespace arrow {
diff --git a/cpp/src/arrow/flight/flight-test.cc b/cpp/src/arrow/flight/flight-test.cc
index c0f0c7f..d036b9a 100644
--- a/cpp/src/arrow/flight/flight-test.cc
+++ b/cpp/src/arrow/flight/flight-test.cc
@@ -197,7 +197,7 @@ class TestFlightClient : public ::testing::Test {
void CheckDoGet(const FlightDescriptor& descr, const BatchVector& expected_batches,
EndpointCheckFunc&& check_endpoints) {
auto num_batches = static_cast<int>(expected_batches.size());
- DCHECK_GE(num_batches, 2);
+ ASSERT_GE(num_batches, 2);
auto expected_schema = expected_batches[0]->schema();
std::unique_ptr<FlightInfo> info;
diff --git a/cpp/src/arrow/python/common.h b/cpp/src/arrow/python/common.h
index a10e3bb..766b764 100644
--- a/cpp/src/arrow/python/common.h
+++ b/cpp/src/arrow/python/common.h
@@ -24,12 +24,15 @@
#include "arrow/python/config.h"
#include "arrow/buffer.h"
+#include "arrow/python/pyarrow.h"
#include "arrow/python/visibility.h"
#include "arrow/util/macros.h"
namespace arrow {
class MemoryPool;
+template <class T>
+class Result;
namespace py {
@@ -52,6 +55,20 @@ ARROW_PYTHON_EXPORT Status PassPyError();
#define PY_RETURN_IF_ERROR(CODE) ARROW_RETURN_NOT_OK(CheckPyError(CODE));
+// For Cython, as you can't define template C++ functions in Cython, only use them.
+// This function can set a Python exception. It assumes that T has a (cheap)
+// default constructor.
+template <class T>
+T GetResultValue(Result<T>& result) {
+ if (ARROW_PREDICT_TRUE(result.ok())) {
+ return *std::move(result);
+ } else {
+ int r = internal::check_status(result.status());
+ assert(r == -1); // should have errored out
+ return {};
+ }
+}
+
// A RAII-style helper that ensures the GIL is acquired inside a lexical block.
class ARROW_PYTHON_EXPORT PyAcquireGIL {
public:
diff --git a/cpp/src/arrow/python/pyarrow.cc b/cpp/src/arrow/python/pyarrow.cc
index d080cc0..1cedc54 100644
--- a/cpp/src/arrow/python/pyarrow.cc
+++ b/cpp/src/arrow/python/pyarrow.cc
@@ -168,5 +168,10 @@ PyObject* wrap_record_batch(const std::shared_ptr<RecordBatch>& batch) {
return ::pyarrow_wrap_batch(batch);
}
+namespace internal {
+
+int check_status(const Status& status) { return ::pyarrow_internal_check_status(status); }
+
+} // namespace internal
} // namespace py
} // namespace arrow
diff --git a/cpp/src/arrow/python/pyarrow.h b/cpp/src/arrow/python/pyarrow.h
index 5e42333..ff5bf8f 100644
--- a/cpp/src/arrow/python/pyarrow.h
+++ b/cpp/src/arrow/python/pyarrow.h
@@ -81,6 +81,11 @@ ARROW_PYTHON_EXPORT Status unwrap_record_batch(PyObject* batch,
ARROW_PYTHON_EXPORT PyObject* wrap_record_batch(
const std::shared_ptr<RecordBatch>& batch);
+namespace internal {
+
+ARROW_PYTHON_EXPORT int check_status(const Status& status);
+
+} // namespace internal
} // namespace py
} // namespace arrow
diff --git a/cpp/src/arrow/python/pyarrow_api.h b/cpp/src/arrow/python/pyarrow_api.h
index f6a2112..b76e961 100644
--- a/cpp/src/arrow/python/pyarrow_api.h
+++ b/cpp/src/arrow/python/pyarrow_api.h
@@ -16,16 +16,22 @@
// under the License.
// DO NOT EDIT THIS FILE. Update from pyarrow/lib_api.h after pyarrow build
+// This is used to be able to call back into Cython code from C++.
-/* Generated by Cython 0.29 */
+/* Generated by Cython 0.29.8 */
#ifndef __PYX_HAVE_API__pyarrow__lib
#define __PYX_HAVE_API__pyarrow__lib
+#ifdef __MINGW64__
+#define MS_WIN64
+#endif
#include "Python.h"
#include "pyarrow_lib.h"
static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_array)(std::shared_ptr< arrow::Array> const &) = 0;
#define pyarrow_wrap_array __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_array
+static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_chunked_array)(std::shared_ptr< arrow::ChunkedArray> const &) = 0;
+#define pyarrow_wrap_chunked_array __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_chunked_array
static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_batch)(std::shared_ptr< arrow::RecordBatch> const &) = 0;
#define pyarrow_wrap_batch __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_batch
static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_buffer)(std::shared_ptr< arrow::Buffer> const &) = 0;
@@ -62,6 +68,8 @@ static std::shared_ptr< arrow::Table> (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwra
#define pyarrow_unwrap_table __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_table
static std::shared_ptr< arrow::Tensor> (*__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_tensor)(PyObject *) = 0;
#define pyarrow_unwrap_tensor __pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_tensor
+static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_internal_check_status)(arrow::Status const &) = 0;
+#define pyarrow_internal_check_status __pyx_api_f_7pyarrow_3lib_pyarrow_internal_check_status
static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_buffer)(PyObject *) = 0;
#define pyarrow_is_buffer __pyx_api_f_7pyarrow_3lib_pyarrow_is_buffer
static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_data_type)(PyObject *) = 0;
@@ -72,8 +80,8 @@ static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_schema)(PyObject *) = 0;
#define pyarrow_is_schema __pyx_api_f_7pyarrow_3lib_pyarrow_is_schema
static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_array)(PyObject *) = 0;
#define pyarrow_is_array __pyx_api_f_7pyarrow_3lib_pyarrow_is_array
-static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_chunked_array)(std::shared_ptr< arrow::ChunkedArray> const &) = 0;
-#define pyarrow_wrap_chunked_array __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_chunked_array
+static PyObject *(*__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_scalar)(std::shared_ptr< arrow::Scalar> const &) = 0;
+#define pyarrow_wrap_scalar __pyx_api_f_7pyarrow_3lib_pyarrow_wrap_scalar
static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_tensor)(PyObject *) = 0;
#define pyarrow_is_tensor __pyx_api_f_7pyarrow_3lib_pyarrow_is_tensor
static int (*__pyx_api_f_7pyarrow_3lib_pyarrow_is_column)(PyObject *) = 0;
@@ -149,6 +157,7 @@ static int import_pyarrow__lib(void) {
module = PyImport_ImportModule("pyarrow.lib");
if (!module) goto bad;
if (__Pyx_ImportFunction(module, "pyarrow_wrap_array", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_array, "PyObject *(std::shared_ptr< arrow::Array> const &)") < 0) goto bad;
+ if (__Pyx_ImportFunction(module, "pyarrow_wrap_chunked_array", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_chunked_array, "PyObject *(std::shared_ptr< arrow::ChunkedArray> const &)") < 0) goto bad;
if (__Pyx_ImportFunction(module, "pyarrow_wrap_batch", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_batch, "PyObject *(std::shared_ptr< arrow::RecordBatch> const &)") < 0) goto bad;
if (__Pyx_ImportFunction(module, "pyarrow_wrap_buffer", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_buffer, "PyObject *(std::shared_ptr< arrow::Buffer> const &)") < 0) goto bad;
if (__Pyx_ImportFunction(module, "pyarrow_wrap_column", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_column, "PyObject *(std::shared_ptr< arrow::Column> const &)") < 0) goto bad;
@@ -167,12 +176,13 @@ static int import_pyarrow__lib(void) {
if (__Pyx_ImportFunction(module, "pyarrow_unwrap_schema", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_schema, "std::shared_ptr< arrow::Schema> (PyObject *)") < 0) goto bad;
if (__Pyx_ImportFunction(module, "pyarrow_unwrap_table", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_table, "std::shared_ptr< arrow::Table> (PyObject *)") < 0) goto bad;
if (__Pyx_ImportFunction(module, "pyarrow_unwrap_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_unwrap_tensor, "std::shared_ptr< arrow::Tensor> (PyObject *)") < 0) goto bad;
+ if (__Pyx_ImportFunction(module, "pyarrow_internal_check_status", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_internal_check_status, "int (arrow::Status const &)") < 0) goto bad;
if (__Pyx_ImportFunction(module, "pyarrow_is_buffer", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_buffer, "int (PyObject *)") < 0) goto bad;
if (__Pyx_ImportFunction(module, "pyarrow_is_data_type", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_data_type, "int (PyObject *)") < 0) goto bad;
if (__Pyx_ImportFunction(module, "pyarrow_is_field", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_field, "int (PyObject *)") < 0) goto bad;
if (__Pyx_ImportFunction(module, "pyarrow_is_schema", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_schema, "int (PyObject *)") < 0) goto bad;
if (__Pyx_ImportFunction(module, "pyarrow_is_array", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_array, "int (PyObject *)") < 0) goto bad;
- if (__Pyx_ImportFunction(module, "pyarrow_wrap_chunked_array", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_chunked_array, "PyObject *(std::shared_ptr< arrow::ChunkedArray> const &)") < 0) goto bad;
+ if (__Pyx_ImportFunction(module, "pyarrow_wrap_scalar", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_wrap_scalar, "PyObject *(std::shared_ptr< arrow::Scalar> const &)") < 0) goto bad;
if (__Pyx_ImportFunction(module, "pyarrow_is_tensor", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_tensor, "int (PyObject *)") < 0) goto bad;
if (__Pyx_ImportFunction(module, "pyarrow_is_column", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_column, "int (PyObject *)") < 0) goto bad;
if (__Pyx_ImportFunction(module, "pyarrow_is_table", (void (**)(void))&__pyx_api_f_7pyarrow_3lib_pyarrow_is_table, "int (PyObject *)") < 0) goto bad;
diff --git a/cpp/src/arrow/python/pyarrow_lib.h b/cpp/src/arrow/python/pyarrow_lib.h
index 4a99a07..5f5fc4c 100644
--- a/cpp/src/arrow/python/pyarrow_lib.h
+++ b/cpp/src/arrow/python/pyarrow_lib.h
@@ -17,7 +17,7 @@
// DO NOT EDIT THIS FILE. Update from pyarrow/lib.h after pyarrow build
-/* Generated by Cython 0.29 */
+/* Generated by Cython 0.29.8 */
#ifndef __PYX_HAVE__pyarrow__lib
#define __PYX_HAVE__pyarrow__lib
@@ -38,6 +38,7 @@
#endif
__PYX_EXTERN_C PyObject *__pyx_f_7pyarrow_3lib_pyarrow_wrap_array(std::shared_ptr< arrow::Array> const &);
+__PYX_EXTERN_C PyObject *__pyx_f_7pyarrow_3lib_pyarrow_wrap_chunked_array(std::shared_ptr< arrow::ChunkedArray> const &);
__PYX_EXTERN_C PyObject *__pyx_f_7pyarrow_3lib_pyarrow_wrap_batch(std::shared_ptr< arrow::RecordBatch> const &);
__PYX_EXTERN_C PyObject *__pyx_f_7pyarrow_3lib_pyarrow_wrap_buffer(std::shared_ptr< arrow::Buffer> const &);
__PYX_EXTERN_C PyObject *__pyx_f_7pyarrow_3lib_pyarrow_wrap_column(std::shared_ptr< arrow::Column> const &);
@@ -56,16 +57,6 @@ __PYX_EXTERN_C std::shared_ptr< arrow::Field> __pyx_f_7pyarrow_3lib_pyarrow_unw
__PYX_EXTERN_C std::shared_ptr< arrow::Schema> __pyx_f_7pyarrow_3lib_pyarrow_unwrap_schema(PyObject *);
__PYX_EXTERN_C std::shared_ptr< arrow::Table> __pyx_f_7pyarrow_3lib_pyarrow_unwrap_table(PyObject *);
__PYX_EXTERN_C std::shared_ptr< arrow::Tensor> __pyx_f_7pyarrow_3lib_pyarrow_unwrap_tensor(PyObject *);
-__PYX_EXTERN_C int pyarrow_is_buffer(PyObject *);
-__PYX_EXTERN_C int pyarrow_is_data_type(PyObject *);
-__PYX_EXTERN_C int pyarrow_is_field(PyObject *);
-__PYX_EXTERN_C int pyarrow_is_schema(PyObject *);
-__PYX_EXTERN_C int pyarrow_is_array(PyObject *);
-__PYX_EXTERN_C PyObject *pyarrow_wrap_chunked_array(std::shared_ptr< arrow::ChunkedArray> const &);
-__PYX_EXTERN_C int pyarrow_is_tensor(PyObject *);
-__PYX_EXTERN_C int pyarrow_is_column(PyObject *);
-__PYX_EXTERN_C int pyarrow_is_table(PyObject *);
-__PYX_EXTERN_C int pyarrow_is_batch(PyObject *);
#endif /* !__PYX_HAVE_API__pyarrow__lib */
diff --git a/cpp/src/arrow/python/python-test.cc b/cpp/src/arrow/python/python-test.cc
index a8282a5..5de613f 100644
--- a/cpp/src/arrow/python/python-test.cc
+++ b/cpp/src/arrow/python/python-test.cc
@@ -32,6 +32,7 @@
#include "arrow/python/helpers.h"
#include "arrow/python/python_to_arrow.h"
#include "arrow/util/checked_cast.h"
+#include "arrow/util/logging.h"
namespace arrow {
diff --git a/cpp/src/arrow/result.h b/cpp/src/arrow/result.h
index f01513a..c1fbd0a 100644
--- a/cpp/src/arrow/result.h
+++ b/cpp/src/arrow/result.h
@@ -333,4 +333,12 @@ class Result {
#define ARROW_ASSIGN_OR_RAISE(lhs, rexpr) \
ARROW_ASSIGN_OR_RAISE_IMPL(ARROW_ASSIGN_OR_RAISE_NAME(_error_or_value, __COUNTER__), \
lhs, rexpr);
+
+namespace internal {
+
+// For Cython, because of https://github.com/cython/cython/issues/3018
+template <class T>
+using CResult = Result<T>;
+
+} // namespace internal
} // namespace arrow
diff --git a/cpp/src/arrow/testing/gtest_util.h b/cpp/src/arrow/testing/gtest_util.h
index c44bb17..914aeb0 100644
--- a/cpp/src/arrow/testing/gtest_util.h
+++ b/cpp/src/arrow/testing/gtest_util.h
@@ -34,13 +34,35 @@
#include "arrow/type_fwd.h"
#include "arrow/type_traits.h"
#include "arrow/util/bit-util.h"
-#include "arrow/util/logging.h"
#include "arrow/util/macros.h"
#include "arrow/util/visibility.h"
+namespace arrow {
+
+template <typename T>
+class Result;
+
+namespace internal {
+
+// Helpers for the ASSERT* macros below
+
+inline Status GenericToStatus(const Status& st) { return st; }
+
+template <typename T>
+inline Status GenericToStatus(const Result<T>& res) {
+ return res.status();
+}
+
+} // namespace internal
+} // namespace arrow
+
+// NOTE: failing must be inline in the macros below, to get correct file / line number
+// reporting on test failures.
+
#define ASSERT_RAISES(ENUM, expr) \
do { \
- ::arrow::Status _st = (expr); \
+ auto _res = (expr); \
+ ::arrow::Status _st = ::arrow::internal::GenericToStatus(_res); \
if (!_st.Is##ENUM()) { \
FAIL() << "Expected '" ARROW_STRINGIFY(expr) "' to fail with " ARROW_STRINGIFY( \
ENUM) ", but got " \
@@ -50,7 +72,8 @@
#define ASSERT_RAISES_WITH_MESSAGE(ENUM, message, expr) \
do { \
- ::arrow::Status _st = (expr); \
+ auto _res = (expr); \
+ ::arrow::Status _st = ::arrow::internal::GenericToStatus(_res); \
if (!_st.Is##ENUM()) { \
FAIL() << "Expected '" ARROW_STRINGIFY(expr) "' to fail with " ARROW_STRINGIFY( \
ENUM) ", but got " \
@@ -61,7 +84,8 @@
#define ASSERT_OK(expr) \
do { \
- ::arrow::Status _st = (expr); \
+ auto _res = (expr); \
+ ::arrow::Status _st = ::arrow::internal::GenericToStatus(_res); \
if (!_st.ok()) { \
FAIL() << "'" ARROW_STRINGIFY(expr) "' failed with " << _st.ToString(); \
} \
@@ -69,18 +93,20 @@
#define ASSERT_OK_NO_THROW(expr) ASSERT_NO_THROW(ASSERT_OK(expr))
-#define ARROW_EXPECT_OK(expr) \
- do { \
- ::arrow::Status _st = (expr); \
- EXPECT_TRUE(_st.ok()); \
+#define ARROW_EXPECT_OK(expr) \
+ do { \
+ auto _res = (expr); \
+ ::arrow::Status _st = ::arrow::internal::GenericToStatus(_res); \
+ EXPECT_TRUE(_st.ok()); \
} while (false)
-#define ABORT_NOT_OK(s) \
- do { \
- ::arrow::Status _st = (s); \
- if (ARROW_PREDICT_FALSE(!_st.ok())) { \
- _st.Abort(); \
- } \
+#define ABORT_NOT_OK(expr) \
+ do { \
+ auto _res = (expr); \
+ ::arrow::Status _st = ::arrow::internal::GenericToStatus(_res); \
+ if (ARROW_PREDICT_FALSE(!_st.ok())) { \
+ _st.Abort(); \
+ } \
} while (false);
namespace arrow {
@@ -170,7 +196,8 @@ template <typename TYPE, typename C_TYPE = typename TYPE::c_type>
void ArrayFromVector(const std::shared_ptr<DataType>& type,
const std::vector<bool>& is_valid, const std::vector<C_TYPE>& values,
std::shared_ptr<Array>* out) {
- DCHECK_EQ(TYPE::type_id, type->id())
+ auto type_id = TYPE::type_id;
+ ASSERT_EQ(type_id, type->id())
<< "template parameter and concrete DataType instance don't agree";
std::unique_ptr<ArrayBuilder> builder_ptr;
@@ -191,7 +218,8 @@ void ArrayFromVector(const std::shared_ptr<DataType>& type,
template <typename TYPE, typename C_TYPE = typename TYPE::c_type>
void ArrayFromVector(const std::shared_ptr<DataType>& type,
const std::vector<C_TYPE>& values, std::shared_ptr<Array>* out) {
- DCHECK_EQ(TYPE::type_id, type->id())
+ auto type_id = TYPE::type_id;
+ ASSERT_EQ(type_id, type->id())
<< "template parameter and concrete DataType instance don't agree";
std::unique_ptr<ArrayBuilder> builder_ptr;
@@ -228,7 +256,7 @@ void ChunkedArrayFromVector(const std::shared_ptr<DataType>& type,
const std::vector<std::vector<C_TYPE>>& values,
std::shared_ptr<ChunkedArray>* out) {
ArrayVector chunks;
- DCHECK_EQ(is_valid.size(), values.size());
+ ASSERT_EQ(is_valid.size(), values.size());
for (size_t i = 0; i < values.size(); ++i) {
std::shared_ptr<Array> array;
ArrayFromVector<TYPE, C_TYPE>(type, is_valid[i], values[i], &array);
diff --git a/cpp/src/arrow/util/trie-test.cc b/cpp/src/arrow/util/trie-test.cc
index bcbc77e..80a8a72 100644
--- a/cpp/src/arrow/util/trie-test.cc
+++ b/cpp/src/arrow/util/trie-test.cc
@@ -267,7 +267,7 @@ TEST(Trie, CapacityError) {
s[2] = third;
auto st = builder.Append(reinterpret_cast<const char*>(s));
if (st.IsCapacityError()) {
- DCHECK_GE(first, 2);
+ ASSERT_GE(first, 2);
had_capacity_error = true;
break;
} else {
diff --git a/cpp/src/gandiva/precompiled/decimal_ops_test.cc b/cpp/src/gandiva/precompiled/decimal_ops_test.cc
index 6c8d762..b4fe2e6 100644
--- a/cpp/src/gandiva/precompiled/decimal_ops_test.cc
+++ b/cpp/src/gandiva/precompiled/decimal_ops_test.cc
@@ -180,7 +180,7 @@ void TestDecimalSql::VerifyAllSign(DecimalTypeUtil::Op op, const DecimalScalar12
// both -ve
Verify(op, -left, -right, -expected_output, expected_overflow);
} else {
- DCHECK(op == DecimalTypeUtil::kOpMultiply || op == DecimalTypeUtil::kOpDivide);
+ ASSERT_TRUE(op == DecimalTypeUtil::kOpMultiply || op == DecimalTypeUtil::kOpDivide);
// right -ve
Verify(op, left, -right, -expected_output, expected_overflow);
diff --git a/cpp/src/parquet/arrow/arrow-reader-writer-test.cc b/cpp/src/parquet/arrow/arrow-reader-writer-test.cc
index 792f760..2cc8b0d 100644
--- a/cpp/src/parquet/arrow/arrow-reader-writer-test.cc
+++ b/cpp/src/parquet/arrow/arrow-reader-writer-test.cc
@@ -34,6 +34,7 @@
#include "arrow/testing/util.h"
#include "arrow/type_traits.h"
#include "arrow/util/decimal.h"
+#include "arrow/util/logging.h"
#include "parquet/api/reader.h"
#include "parquet/api/writer.h"
diff --git a/python/pyarrow/_flight.pyx b/python/pyarrow/_flight.pyx
index 7fc4ed4..a0d08f8 100644
--- a/python/pyarrow/_flight.pyx
+++ b/python/pyarrow/_flight.pyx
@@ -127,7 +127,7 @@ class ActionType(_ActionType):
cdef class Result:
"""A result from executing an Action."""
cdef:
- unique_ptr[CResult] result
+ unique_ptr[CFlightResult] result
def __init__(self, buf):
"""Create a new result.
@@ -136,7 +136,7 @@ cdef class Result:
----------
buf : Buffer or bytes-like object
"""
- self.result.reset(new CResult())
+ self.result.reset(new CFlightResult())
self.result.get().body = pyarrow_unwrap_buffer(as_buffer(buf))
@property
@@ -1145,14 +1145,18 @@ cdef void _do_get(void* self, const CServerCallContext& context,
cdef void _do_action_result_next(void* self,
- unique_ptr[CResult]* result) except *:
+ unique_ptr[CFlightResult]* result) except *:
"""Callback for implementing Flight servers in Python."""
+ cdef:
+ CFlightResult* c_result
+
try:
action_result = next(<object> self)
if not isinstance(action_result, Result):
raise TypeError("Result of FlightServerBase.do_action must "
"return an iterator of Result objects")
- result.reset(new CResult(deref((<Result> action_result).result.get())))
+ c_result = (<Result> action_result).result.get()
+ result.reset(new CFlightResult(deref(c_result)))
except StopIteration:
result.reset(nullptr)
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 189e8a0..5ae178d 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -1456,10 +1456,10 @@ cdef class StructArray(Array):
result : StructArray
"""
cdef:
- Array array
shared_ptr[CArray] c_array
vector[shared_ptr[CArray]] c_arrays
- shared_ptr[CArray] c_result
+ vector[c_string] c_names
+ CResult[shared_ptr[CArray]] c_result
ssize_t num_arrays
ssize_t length
ssize_t i
@@ -1469,30 +1469,20 @@ cdef class StructArray(Array):
raise ValueError('Names are currently required')
arrays = [asarray(x) for x in arrays]
-
- num_arrays = len(arrays)
- if num_arrays != len(names):
- raise ValueError("The number of names should be equal to the "
- "number of arrays")
-
- if num_arrays > 0:
- length = len(arrays[0])
- c_arrays.resize(num_arrays)
- for i in range(num_arrays):
- array = arrays[i]
- if len(array) != length:
- raise ValueError("All arrays must have the same length")
- c_arrays[i] = array.sp_array
- else:
- length = 0
-
- struct_type = struct([
- field(name, array.type) for name, array in zip(names, arrays)
- ])
-
- c_result.reset(new CStructArray(struct_type.sp_type, length, c_arrays))
-
- return pyarrow_wrap_array(c_result)
+ for arr in arrays:
+ c_arrays.push_back(pyarrow_unwrap_array(arr))
+ for name in names:
+ c_names.push_back(tobytes(name))
+
+ if c_arrays.size() == 0 and c_names.size() == 0:
+ # The C++ side doesn't allow this
+ return array([], struct([]))
+
+ # XXX Cannot pass "nullptr" for a shared_ptr<T> argument:
+ # https://github.com/cython/cython/issues/3020
+ c_result = CStructArray.Make(c_arrays, c_names,
+ shared_ptr[CBuffer](), -1, 0)
+ return pyarrow_wrap_array(GetResultValue(c_result))
cdef class ExtensionArray(Array):
diff --git a/python/pyarrow/error.pxi b/python/pyarrow/error.pxi
index 43b1933..7b5e8d4 100644
--- a/python/pyarrow/error.pxi
+++ b/python/pyarrow/error.pxi
@@ -108,3 +108,9 @@ cdef int check_status(const CStatus& status) nogil except -1:
else:
message = frombytes(status.ToString())
raise ArrowException(message)
+
+
+# This is an API function for C++ PyArrow
+cdef api int pyarrow_internal_check_status(const CStatus& status) \
+ nogil except -1:
+ return check_status(status)
diff --git a/python/pyarrow/includes/common.pxd b/python/pyarrow/includes/common.pxd
index 452b796..4a06fc8 100644
--- a/python/pyarrow/includes/common.pxd
+++ b/python/pyarrow/includes/common.pxd
@@ -69,6 +69,14 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
c_bool IsPlasmaObjectNonexistent()
c_bool IsPlasmaStoreFull()
+cdef extern from "arrow/result.h" namespace "arrow::internal" nogil:
+ cdef cppclass CResult[T]:
+ c_bool ok()
+ CStatus status()
+ T operator*()
+
+cdef extern from "arrow/python/common.h" namespace "arrow::py":
+ T GetResultValue[T](CResult[T]) except *
cdef inline object PyObject_to_object(PyObject* o):
# Cast to "object" increments reference count
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 79389ca..8798834 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -453,9 +453,19 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
CStructArray(shared_ptr[CDataType] type, int64_t length,
vector[shared_ptr[CArray]] children,
shared_ptr[CBuffer] null_bitmap=nullptr,
- int64_t null_count=0,
+ int64_t null_count=-1,
int64_t offset=0)
+ # XXX Cython crashes if default argument values are declared here
+ # https://github.com/cython/cython/issues/2167
+ @staticmethod
+ CResult[shared_ptr[CArray]] Make(
+ vector[shared_ptr[CArray]] children,
+ vector[c_string] field_names,
+ shared_ptr[CBuffer] null_bitmap,
+ int64_t null_count,
+ int64_t offset)
+
shared_ptr[CArray] field(int pos)
shared_ptr[CArray] GetFieldByName(const c_string& name) const
diff --git a/python/pyarrow/includes/libarrow_flight.pxd b/python/pyarrow/includes/libarrow_flight.pxd
index 49a5153..0639574 100644
--- a/python/pyarrow/includes/libarrow_flight.pxd
+++ b/python/pyarrow/includes/libarrow_flight.pxd
@@ -46,13 +46,13 @@ cdef extern from "arrow/flight/api.h" namespace "arrow" nogil:
c_string type
shared_ptr[CBuffer] body
- cdef cppclass CResult" arrow::flight::Result":
- CResult()
- CResult(CResult)
+ cdef cppclass CFlightResult" arrow::flight::Result":
+ CFlightResult()
+ CFlightResult(CFlightResult)
shared_ptr[CBuffer] body
cdef cppclass CResultStream" arrow::flight::ResultStream":
- CStatus Next(unique_ptr[CResult]* result)
+ CStatus Next(unique_ptr[CFlightResult]* result)
cdef cppclass CDescriptorType \
" arrow::flight::FlightDescriptor::DescriptorType":
@@ -247,7 +247,7 @@ ctypedef void cb_do_action(object, const CServerCallContext&, const CAction&,
unique_ptr[CResultStream]*)
ctypedef void cb_list_actions(object, const CServerCallContext&,
vector[CActionType]*)
-ctypedef void cb_result_next(object, unique_ptr[CResult]*)
+ctypedef void cb_result_next(object, unique_ptr[CFlightResult]*)
ctypedef void cb_data_stream_next(object, CFlightPayload*)
ctypedef void cb_server_authenticate(object, CServerAuthSender*,
CServerAuthReader*)
@@ -316,6 +316,7 @@ cdef extern from "arrow/python/flight.h" namespace "arrow::py::flight" nogil:
int64_t total_bytes,
unique_ptr[CFlightInfo]* out)
+
cdef extern from "<utility>" namespace "std":
unique_ptr[CFlightDataStream] move(unique_ptr[CFlightDataStream]) nogil
unique_ptr[CServerAuthHandler] move(unique_ptr[CServerAuthHandler]) nogil
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index 80625ba..9d66d96 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -387,6 +387,26 @@ def test_struct_from_buffers():
children=children[:1])
+def test_struct_from_arrays():
+ a = pa.array([4, 5, None])
+ b = pa.array(["bar", None, ""])
+ c = pa.array([[1, 2], None, [3, None]])
+
+ arr = pa.StructArray.from_arrays([a, b, c], ["a", "b", "c"])
+ assert arr.to_pylist() == [
+ {'a': 4, 'b': 'bar', 'c': [1, 2]},
+ {'a': 5, 'b': None, 'c': None},
+ {'a': None, 'b': '', 'c': [3, None]},
+ ]
+
+ with pytest.raises(ValueError):
+ pa.StructArray.from_arrays([a, b, c], ["a", "b"])
+
+ arr = pa.StructArray.from_arrays([], [])
+ assert arr.type == pa.struct([])
+ assert arr.to_pylist() == []
+
+
def test_dictionary_from_numpy():
indices = np.repeat([0, 1, 2], 2)
dictionary = np.array(['foo', 'bar', 'baz'], dtype=object)