You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2018/01/30 20:05:19 UTC
[arrow] branch master updated: ARROW-1705: [Python] allow building
array from dicts
This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 5c704bc ARROW-1705: [Python] allow building array from dicts
5c704bc is described below
commit 5c704bce42e3fa71ea4586368962d41173b3e17b
Author: Antoine Pitrou <an...@python.org>
AuthorDate: Tue Jan 30 15:05:14 2018 -0500
ARROW-1705: [Python] allow building array from dicts
Accept passing a list of dicts to pa.array() if a struct type is given.
Based on PR #1513.
Author: Antoine Pitrou <an...@python.org>
Closes #1530 from pitrou/ARROW-1705-struct-array-from-dicts and squashes the following commits:
2b9133af [Antoine Pitrou] ARROW-1705: [Python] allow building array from dicts
---
cpp/src/arrow/python/builtin_convert.cc | 289 ++++++++++++++++++---------
python/pyarrow/tests/test_convert_builtin.py | 25 +++
2 files changed, 220 insertions(+), 94 deletions(-)
diff --git a/cpp/src/arrow/python/builtin_convert.cc b/cpp/src/arrow/python/builtin_convert.cc
index 63d3889..1b3c101 100644
--- a/cpp/src/arrow/python/builtin_convert.cc
+++ b/cpp/src/arrow/python/builtin_convert.cc
@@ -23,6 +23,8 @@
#include <limits>
#include <sstream>
#include <string>
+#include <utility>
+#include <vector>
#include "arrow/python/builtin_convert.h"
@@ -356,7 +358,11 @@ class SeqConverter {
return Status::OK();
}
- virtual Status AppendData(PyObject* seq, int64_t size) = 0;
+ // Append a single (non-sequence) Python datum to the underlying builder
+ virtual Status AppendSingle(PyObject* obj) = 0;
+
+ // Append the contents of a Python sequence to the underlying builder
+ virtual Status AppendMultiple(PyObject* seq, int64_t size) = 0;
virtual ~SeqConverter() = default;
@@ -377,47 +383,57 @@ class TypedConverter : public SeqConverter {
BuilderType* typed_builder_;
};
+// We use the CRTP trick here to devirtualize the AppendItem() and AppendNull()
+// method calls.
template <typename BuilderType, class Derived>
class TypedConverterVisitor : public TypedConverter<BuilderType> {
public:
- Status AppendData(PyObject* obj, int64_t size) override {
+ Status AppendSingle(PyObject* obj) override {
+ if (obj == Py_None) {
+ return static_cast<Derived*>(this)->AppendNull();
+ } else {
+ return static_cast<Derived*>(this)->AppendItem(obj);
+ }
+ }
+
+ Status AppendMultiple(PyObject* obj, int64_t size) override {
/// Ensure we've allocated enough space
RETURN_NOT_OK(this->typed_builder_->Reserve(size));
// Iterate over the items adding each one
if (PySequence_Check(obj)) {
for (int64_t i = 0; i < size; ++i) {
OwnedRef ref(PySequence_GetItem(obj, i));
- if (ref.obj() == Py_None) {
- RETURN_NOT_OK(this->typed_builder_->AppendNull());
- } else {
- RETURN_NOT_OK(static_cast<Derived*>(this)->AppendItem(ref));
- }
+ RETURN_NOT_OK(static_cast<Derived*>(this)->AppendSingle(ref.obj()));
}
} else {
return Status::TypeError("Object is not a sequence");
}
return Status::OK();
}
+
+ // Append a missing item (default implementation)
+ Status AppendNull() { return this->typed_builder_->AppendNull(); }
};
class NullConverter : public TypedConverterVisitor<NullBuilder, NullConverter> {
public:
- Status AppendItem(const OwnedRef& item) {
+ // Append a non-missing item
+ Status AppendItem(PyObject* obj) {
return Status::Invalid("NullConverter: passed non-None value");
}
};
class BoolConverter : public TypedConverterVisitor<BooleanBuilder, BoolConverter> {
public:
- Status AppendItem(const OwnedRef& item) {
- return typed_builder_->Append(item.obj() == Py_True);
- }
+ // Append a non-missing item
+ Status AppendItem(PyObject* obj) { return typed_builder_->Append(obj == Py_True); }
};
class Int8Converter : public TypedConverterVisitor<Int8Builder, Int8Converter> {
public:
- Status AppendItem(const OwnedRef& item) {
- const auto val = static_cast<int64_t>(PyLong_AsLongLong(item.obj()));
+ // Append a non-missing item
+ Status AppendItem(PyObject* obj) {
+ const auto val = static_cast<int64_t>(PyLong_AsLongLong(obj));
if (ARROW_PREDICT_FALSE(val > std::numeric_limits<int8_t>::max() ||
val < std::numeric_limits<int8_t>::min())) {
@@ -432,8 +448,9 @@ class Int8Converter : public TypedConverterVisitor<Int8Builder, Int8Converter> {
class Int16Converter : public TypedConverterVisitor<Int16Builder, Int16Converter> {
public:
- Status AppendItem(const OwnedRef& item) {
- const auto val = static_cast<int64_t>(PyLong_AsLongLong(item.obj()));
+ // Append a non-missing item
+ Status AppendItem(PyObject* obj) {
+ const auto val = static_cast<int64_t>(PyLong_AsLongLong(obj));
if (ARROW_PREDICT_FALSE(val > std::numeric_limits<int16_t>::max() ||
val < std::numeric_limits<int16_t>::min())) {
@@ -448,8 +465,9 @@ class Int16Converter : public TypedConverterVisitor<Int16Builder, Int16Converter
class Int32Converter : public TypedConverterVisitor<Int32Builder, Int32Converter> {
public:
- Status AppendItem(const OwnedRef& item) {
- const auto val = static_cast<int64_t>(PyLong_AsLongLong(item.obj()));
+ // Append a non-missing item
+ Status AppendItem(PyObject* obj) {
+ const auto val = static_cast<int64_t>(PyLong_AsLongLong(obj));
if (ARROW_PREDICT_FALSE(val > std::numeric_limits<int32_t>::max() ||
val < std::numeric_limits<int32_t>::min())) {
@@ -464,8 +482,9 @@ class Int32Converter : public TypedConverterVisitor<Int32Builder, Int32Converter
class Int64Converter : public TypedConverterVisitor<Int64Builder, Int64Converter> {
public:
- Status AppendItem(const OwnedRef& item) {
- const auto val = static_cast<int64_t>(PyLong_AsLongLong(item.obj()));
+ // Append a non-missing item
+ Status AppendItem(PyObject* obj) {
+ const auto val = static_cast<int64_t>(PyLong_AsLongLong(obj));
RETURN_IF_PYERROR();
return typed_builder_->Append(val);
}
@@ -473,8 +492,9 @@ class Int64Converter : public TypedConverterVisitor<Int64Builder, Int64Converter
class UInt8Converter : public TypedConverterVisitor<UInt8Builder, UInt8Converter> {
public:
- Status AppendItem(const OwnedRef& item) {
- const auto val = static_cast<uint64_t>(PyLong_AsLongLong(item.obj()));
+ // Append a non-missing item
+ Status AppendItem(PyObject* obj) {
+ const auto val = static_cast<uint64_t>(PyLong_AsLongLong(obj));
RETURN_IF_PYERROR();
if (ARROW_PREDICT_FALSE(val > std::numeric_limits<uint8_t>::max())) {
@@ -488,8 +508,9 @@ class UInt8Converter : public TypedConverterVisitor<UInt8Builder, UInt8Converter
class UInt16Converter : public TypedConverterVisitor<UInt16Builder, UInt16Converter> {
public:
- Status AppendItem(const OwnedRef& item) {
- const auto val = static_cast<uint64_t>(PyLong_AsLongLong(item.obj()));
+ // Append a non-missing item
+ Status AppendItem(PyObject* obj) {
+ const auto val = static_cast<uint64_t>(PyLong_AsLongLong(obj));
RETURN_IF_PYERROR();
if (ARROW_PREDICT_FALSE(val > std::numeric_limits<uint16_t>::max())) {
@@ -503,8 +524,9 @@ class UInt16Converter : public TypedConverterVisitor<UInt16Builder, UInt16Conver
class UInt32Converter : public TypedConverterVisitor<UInt32Builder, UInt32Converter> {
public:
- Status AppendItem(const OwnedRef& item) {
- const auto val = static_cast<uint64_t>(PyLong_AsLongLong(item.obj()));
+ // Append a non-missing item
+ Status AppendItem(PyObject* obj) {
+ const auto val = static_cast<uint64_t>(PyLong_AsLongLong(obj));
RETURN_IF_PYERROR();
if (ARROW_PREDICT_FALSE(val > std::numeric_limits<uint32_t>::max())) {
@@ -518,8 +540,9 @@ class UInt32Converter : public TypedConverterVisitor<UInt32Builder, UInt32Conver
class UInt64Converter : public TypedConverterVisitor<UInt64Builder, UInt64Converter> {
public:
- Status AppendItem(const OwnedRef& item) {
- const auto val = static_cast<int64_t>(PyLong_AsUnsignedLongLong(item.obj()));
+ // Append a non-missing item
+ Status AppendItem(PyObject* obj) {
+ const auto val = static_cast<int64_t>(PyLong_AsUnsignedLongLong(obj));
RETURN_IF_PYERROR();
return typed_builder_->Append(val);
}
@@ -527,13 +550,14 @@ class UInt64Converter : public TypedConverterVisitor<UInt64Builder, UInt64Conver
class Date32Converter : public TypedConverterVisitor<Date32Builder, Date32Converter> {
public:
- Status AppendItem(const OwnedRef& item) {
+ // Append a non-missing item
+ Status AppendItem(PyObject* obj) {
int32_t t;
- if (PyDate_Check(item.obj())) {
- auto pydate = reinterpret_cast<PyDateTime_Date*>(item.obj());
+ if (PyDate_Check(obj)) {
+ auto pydate = reinterpret_cast<PyDateTime_Date*>(obj);
t = static_cast<int32_t>(PyDate_to_s(pydate));
} else {
- const auto casted_val = static_cast<int64_t>(PyLong_AsLongLong(item.obj()));
+ const auto casted_val = static_cast<int64_t>(PyLong_AsLongLong(obj));
RETURN_IF_PYERROR();
if (casted_val > std::numeric_limits<int32_t>::max()) {
return Status::Invalid("Integer as date32 larger than INT32_MAX");
@@ -546,13 +570,14 @@ class Date32Converter : public TypedConverterVisitor<Date32Builder, Date32Conver
class Date64Converter : public TypedConverterVisitor<Date64Builder, Date64Converter> {
public:
- Status AppendItem(const OwnedRef& item) {
+ // Append a non-missing item
+ Status AppendItem(PyObject* obj) {
int64_t t;
- if (PyDate_Check(item.obj())) {
- auto pydate = reinterpret_cast<PyDateTime_Date*>(item.obj());
+ if (PyDate_Check(obj)) {
+ auto pydate = reinterpret_cast<PyDateTime_Date*>(obj);
t = PyDate_to_ms(pydate);
} else {
- t = static_cast<int64_t>(PyLong_AsLongLong(item.obj()));
+ t = static_cast<int64_t>(PyLong_AsLongLong(obj));
RETURN_IF_PYERROR();
}
return typed_builder_->Append(t);
@@ -564,10 +589,11 @@ class TimestampConverter
public:
explicit TimestampConverter(TimeUnit::type unit) : unit_(unit) {}
- Status AppendItem(const OwnedRef& item) {
+ // Append a non-missing item
+ Status AppendItem(PyObject* obj) {
int64_t t;
- if (PyDateTime_Check(item.obj())) {
- auto pydatetime = reinterpret_cast<PyDateTime_DateTime*>(item.obj());
+ if (PyDateTime_Check(obj)) {
+ auto pydatetime = reinterpret_cast<PyDateTime_DateTime*>(obj);
switch (unit_) {
case TimeUnit::SECOND:
@@ -585,10 +611,10 @@ class TimestampConverter
default:
return Status::UnknownError("Invalid time unit");
}
- } else if (PyArray_CheckAnyScalarExact(item.obj())) {
+ } else if (PyArray_CheckAnyScalarExact(obj)) {
// numpy.datetime64
std::shared_ptr<DataType> type;
- RETURN_NOT_OK(NumPyDtypeToArrow(PyArray_DescrFromScalar(item.obj()), &type));
+ RETURN_NOT_OK(NumPyDtypeToArrow(PyArray_DescrFromScalar(obj), &type));
if (type->id() != Type::TIMESTAMP) {
std::ostringstream ss;
ss << "Expected np.datetime64 but got: ";
@@ -601,10 +627,9 @@ class TimestampConverter
"Cannot convert NumPy datetime64 objects with differing unit");
}
- PyDatetimeScalarObject* obj = reinterpret_cast<PyDatetimeScalarObject*>(item.obj());
- t = obj->obval;
+ t = reinterpret_cast<PyDatetimeScalarObject*>(obj)->obval;
} else {
- t = static_cast<int64_t>(PyLong_AsLongLong(item.obj()));
+ t = static_cast<int64_t>(PyLong_AsLongLong(obj));
RETURN_IF_PYERROR();
}
return typed_builder_->Append(t);
@@ -616,8 +641,9 @@ class TimestampConverter
class Float32Converter : public TypedConverterVisitor<FloatBuilder, Float32Converter> {
public:
- Status AppendItem(const OwnedRef& item) {
- float val = static_cast<float>(PyFloat_AsDouble(item.obj()));
+ // Append a non-missing item
+ Status AppendItem(PyObject* obj) {
+ float val = static_cast<float>(PyFloat_AsDouble(obj));
RETURN_IF_PYERROR();
return typed_builder_->Append(val);
}
@@ -625,8 +651,9 @@ class Float32Converter : public TypedConverterVisitor<FloatBuilder, Float32Conve
class DoubleConverter : public TypedConverterVisitor<DoubleBuilder, DoubleConverter> {
public:
- Status AppendItem(const OwnedRef& item) {
- double val = PyFloat_AsDouble(item.obj());
+ // Append a non-missing item
+ Status AppendItem(PyObject* obj) {
+ double val = PyFloat_AsDouble(obj);
RETURN_IF_PYERROR();
return typed_builder_->Append(val);
}
@@ -634,22 +661,23 @@ class DoubleConverter : public TypedConverterVisitor<DoubleBuilder, DoubleConver
class BytesConverter : public TypedConverterVisitor<BinaryBuilder, BytesConverter> {
public:
- Status AppendItem(const OwnedRef& item) {
+ // Append a non-missing item
+ Status AppendItem(PyObject* obj) {
PyObject* bytes_obj;
const char* bytes;
Py_ssize_t length;
OwnedRef tmp;
- if (PyUnicode_Check(item.obj())) {
- tmp.reset(PyUnicode_AsUTF8String(item.obj()));
+ if (PyUnicode_Check(obj)) {
+ tmp.reset(PyUnicode_AsUTF8String(obj));
RETURN_IF_PYERROR();
bytes_obj = tmp.obj();
- } else if (PyBytes_Check(item.obj())) {
- bytes_obj = item.obj();
+ } else if (PyBytes_Check(obj)) {
+ bytes_obj = obj;
} else {
std::stringstream ss;
ss << "Error converting to Binary type: ";
- RETURN_NOT_OK(InvalidConversion(item.obj(), "bytes", &ss));
+ RETURN_NOT_OK(InvalidConversion(obj, "bytes", &ss));
return Status::Invalid(ss.str());
}
// No error checking
@@ -662,22 +690,23 @@ class BytesConverter : public TypedConverterVisitor<BinaryBuilder, BytesConverte
class FixedWidthBytesConverter
: public TypedConverterVisitor<FixedSizeBinaryBuilder, FixedWidthBytesConverter> {
public:
- Status AppendItem(const OwnedRef& item) {
+ // Append a non-missing item
+ Status AppendItem(PyObject* obj) {
PyObject* bytes_obj;
OwnedRef tmp;
Py_ssize_t expected_length =
std::dynamic_pointer_cast<FixedSizeBinaryType>(typed_builder_->type())
->byte_width();
- if (PyUnicode_Check(item.obj())) {
- tmp.reset(PyUnicode_AsUTF8String(item.obj()));
+ if (PyUnicode_Check(obj)) {
+ tmp.reset(PyUnicode_AsUTF8String(obj));
RETURN_IF_PYERROR();
bytes_obj = tmp.obj();
- } else if (PyBytes_Check(item.obj())) {
- bytes_obj = item.obj();
+ } else if (PyBytes_Check(obj)) {
+ bytes_obj = obj;
} else {
std::stringstream ss;
ss << "Error converting to FixedSizeBinary type: ";
- RETURN_NOT_OK(InvalidConversion(item.obj(), "bytes", &ss));
+ RETURN_NOT_OK(InvalidConversion(obj, "bytes", &ss));
return Status::Invalid(ss.str());
}
// No error checking
@@ -689,13 +718,13 @@ class FixedWidthBytesConverter
class UTF8Converter : public TypedConverterVisitor<StringBuilder, UTF8Converter> {
public:
- Status AppendItem(const OwnedRef& item) {
+ // Append a non-missing item
+ Status AppendItem(PyObject* obj) {
PyObject* bytes_obj;
OwnedRef tmp;
const char* bytes;
Py_ssize_t length;
- PyObject* obj = item.obj();
if (PyBytes_Check(obj)) {
tmp.reset(
PyUnicode_FromStringAndSize(PyBytes_AS_STRING(obj), PyBytes_GET_SIZE(obj)));
@@ -724,75 +753,114 @@ class ListConverter : public TypedConverterVisitor<ListBuilder, ListConverter> {
public:
Status Init(ArrayBuilder* builder) override;
- Status AppendItem(const OwnedRef& item) {
+ // Append a non-missing item
+ Status AppendItem(PyObject* obj) {
RETURN_NOT_OK(typed_builder_->Append());
- PyObject* item_obj = item.obj();
- const auto list_size = static_cast<int64_t>(PySequence_Size(item_obj));
- return value_converter_->AppendData(item_obj, list_size);
+ const auto list_size = static_cast<int64_t>(PySequence_Size(obj));
+ return value_converter_->AppendMultiple(obj, list_size);
}
protected:
- std::shared_ptr<SeqConverter> value_converter_;
+ std::unique_ptr<SeqConverter> value_converter_;
+};
+
+class StructConverter : public TypedConverterVisitor<StructBuilder, StructConverter> {
+ public:
+ Status Init(ArrayBuilder* builder) override;
+
+ // Append a non-missing item
+ Status AppendItem(PyObject* obj) {
+ RETURN_NOT_OK(typed_builder_->Append());
+ if (!PyDict_Check(obj)) {
+ return Status::TypeError("dict value expected for struct type");
+ }
+ // NOTE we're ignoring any extraneous dict items
+ for (int i = 0; i < num_fields_; i++) {
+ PyObject* nameobj = PyList_GET_ITEM(field_name_list_.obj(), i);
+ PyObject* valueobj = PyDict_GetItem(obj, nameobj); // borrowed
+ RETURN_IF_PYERROR();
+ RETURN_NOT_OK(value_converters_[i]->AppendSingle(valueobj ? valueobj : Py_None));
+ }
+
+ return Status::OK();
+ }
+
+ // Append a missing item
+ Status AppendNull() {
+ RETURN_NOT_OK(typed_builder_->AppendNull());
+ // Need to also insert a missing item on all child builders
+ // (compare with ListConverter)
+ for (int i = 0; i < num_fields_; i++) {
+ RETURN_NOT_OK(value_converters_[i]->AppendSingle(Py_None));
+ }
+ return Status::OK();
+ }
+
+ protected:
+ std::vector<std::unique_ptr<SeqConverter>> value_converters_;
+ OwnedRef field_name_list_;
+ int num_fields_;
};
class DecimalConverter
: public TypedConverterVisitor<arrow::Decimal128Builder, DecimalConverter> {
public:
- Status AppendItem(const OwnedRef& item) {
+ // Append a non-missing item
+ Status AppendItem(PyObject* obj) {
/// TODO(phillipc): Check for nan?
Decimal128 value;
const auto& type = static_cast<const DecimalType&>(*typed_builder_->type());
- RETURN_NOT_OK(internal::DecimalFromPythonDecimal(item.obj(), type, &value));
+ RETURN_NOT_OK(internal::DecimalFromPythonDecimal(obj, type, &value));
return typed_builder_->Append(value);
}
};
// Dynamic constructor for sequence converters
-std::shared_ptr<SeqConverter> GetConverter(const std::shared_ptr<DataType>& type) {
+std::unique_ptr<SeqConverter> GetConverter(const std::shared_ptr<DataType>& type) {
switch (type->id()) {
case Type::NA:
- return std::make_shared<NullConverter>();
+ return std::unique_ptr<SeqConverter>(new NullConverter);
case Type::BOOL:
- return std::make_shared<BoolConverter>();
+ return std::unique_ptr<SeqConverter>(new BoolConverter);
case Type::INT8:
- return std::make_shared<Int8Converter>();
+ return std::unique_ptr<SeqConverter>(new Int8Converter);
case Type::INT16:
- return std::make_shared<Int16Converter>();
+ return std::unique_ptr<SeqConverter>(new Int16Converter);
case Type::INT32:
- return std::make_shared<Int32Converter>();
+ return std::unique_ptr<SeqConverter>(new Int32Converter);
case Type::INT64:
- return std::make_shared<Int64Converter>();
+ return std::unique_ptr<SeqConverter>(new Int64Converter);
case Type::UINT8:
- return std::make_shared<UInt8Converter>();
+ return std::unique_ptr<SeqConverter>(new UInt8Converter);
case Type::UINT16:
- return std::make_shared<UInt16Converter>();
+ return std::unique_ptr<SeqConverter>(new UInt16Converter);
case Type::UINT32:
- return std::make_shared<UInt32Converter>();
+ return std::unique_ptr<SeqConverter>(new UInt32Converter);
case Type::UINT64:
- return std::make_shared<UInt64Converter>();
+ return std::unique_ptr<SeqConverter>(new UInt64Converter);
case Type::DATE32:
- return std::make_shared<Date32Converter>();
+ return std::unique_ptr<SeqConverter>(new Date32Converter);
case Type::DATE64:
- return std::make_shared<Date64Converter>();
+ return std::unique_ptr<SeqConverter>(new Date64Converter);
case Type::TIMESTAMP:
- return std::make_shared<TimestampConverter>(
- static_cast<const TimestampType&>(*type).unit());
+ return std::unique_ptr<SeqConverter>(
+ new TimestampConverter(static_cast<const TimestampType&>(*type).unit()));
case Type::FLOAT:
- return std::make_shared<Float32Converter>();
+ return std::unique_ptr<SeqConverter>(new Float32Converter);
case Type::DOUBLE:
- return std::make_shared<DoubleConverter>();
+ return std::unique_ptr<SeqConverter>(new DoubleConverter);
case Type::BINARY:
- return std::make_shared<BytesConverter>();
+ return std::unique_ptr<SeqConverter>(new BytesConverter);
case Type::FIXED_SIZE_BINARY:
- return std::make_shared<FixedWidthBytesConverter>();
+ return std::unique_ptr<SeqConverter>(new FixedWidthBytesConverter);
case Type::STRING:
- return std::make_shared<UTF8Converter>();
+ return std::unique_ptr<SeqConverter>(new UTF8Converter);
case Type::LIST:
- return std::make_shared<ListConverter>();
- case Type::DECIMAL: {
- return std::make_shared<DecimalConverter>();
- }
+ return std::unique_ptr<SeqConverter>(new ListConverter);
case Type::STRUCT:
+ return std::unique_ptr<SeqConverter>(new StructConverter);
+ case Type::DECIMAL:
+ return std::unique_ptr<SeqConverter>(new DecimalConverter);
default:
return nullptr;
}
@@ -811,17 +879,50 @@ Status ListConverter::Init(ArrayBuilder* builder) {
return value_converter_->Init(typed_builder_->value_builder());
}
+Status StructConverter::Init(ArrayBuilder* builder) {
+ builder_ = builder;
+ typed_builder_ = static_cast<StructBuilder*>(builder);
+ StructType* struct_type = static_cast<StructType*>(builder->type().get());
+
+ num_fields_ = typed_builder_->num_fields();
+ DCHECK_EQ(num_fields_, struct_type->num_children());
+
+ field_name_list_.reset(PyList_New(num_fields_));
+ RETURN_IF_PYERROR();
+
+ // Initialize the child converters and field names
+ for (int i = 0; i < num_fields_; i++) {
+ const std::string& field_name(struct_type->child(i)->name());
+ std::shared_ptr<DataType> field_type(struct_type->child(i)->type());
+
+ auto value_converter = GetConverter(field_type);
+ if (value_converter == nullptr) {
+ return Status::NotImplemented("value type not implemented");
+ }
+ RETURN_NOT_OK(value_converter->Init(typed_builder_->field_builder(i)));
+ value_converters_.push_back(std::move(value_converter));
+
+ // Store the field name as a PyObject, for dict matching
+ PyObject* nameobj =
+ PyUnicode_FromStringAndSize(field_name.c_str(), field_name.size());
+ RETURN_IF_PYERROR();
+ PyList_SET_ITEM(field_name_list_.obj(), i, nameobj);
+ }
+
+ return Status::OK();
+}
+
Status AppendPySequence(PyObject* obj, int64_t size,
const std::shared_ptr<DataType>& type, ArrayBuilder* builder) {
PyDateTime_IMPORT;
- std::shared_ptr<SeqConverter> converter = GetConverter(type);
+ auto converter = GetConverter(type);
if (converter == nullptr) {
std::stringstream ss;
ss << "No type converter implemented for " << type->ToString();
return Status::NotImplemented(ss.str());
}
RETURN_NOT_OK(converter->Init(builder));
- return converter->AppendData(obj, size);
+ return converter->AppendMultiple(obj, size);
}
static Status ConvertPySequenceReal(PyObject* obj, int64_t size,
diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py
index 2b317df..bbdf6e7 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -504,3 +504,28 @@ def test_structarray():
pylist = arr.to_pylist()
assert pylist == expected, (pylist, expected)
+
+
+def test_struct_from_dicts():
+ ty = pa.struct([pa.field('a', pa.int32()),
+ pa.field('b', pa.string()),
+ pa.field('c', pa.bool_())])
+ arr = pa.array([], type=ty)
+ assert arr.to_pylist() == []
+
+ data = [{'a': 5, 'b': 'foo', 'c': True},
+ {'a': 6, 'b': 'bar', 'c': False}]
+ arr = pa.array(data, type=ty)
+ assert arr.to_pylist() == data
+
+ # With omitted values
+ data = [{'a': 5, 'c': True},
+ None,
+ {},
+ {'a': None, 'b': 'bar'}]
+ arr = pa.array(data, type=ty)
+ expected = [{'a': 5, 'b': None, 'c': True},
+ None,
+ {'a': None, 'b': None, 'c': None},
+ {'a': None, 'b': 'bar', 'c': None}]
+ assert arr.to_pylist() == expected
--
To stop receiving notification emails like this one, please contact
wesm@apache.org.