You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by uw...@apache.org on 2017/06/29 20:01:06 UTC
arrow git commit: ARROW-1165: [C++] Refactor
PythonDecimalToArrowDecimal to not use templates
Repository: arrow
Updated Branches:
refs/heads/master 65558dbb9 -> 69582528e
ARROW-1165: [C++] Refactor PythonDecimalToArrowDecimal to not use templates
Author: Phillip Cloud <cp...@gmail.com>
Closes #794 from cpcloud/ARROW-1165 and squashes the following commits:
53565ea [Phillip Cloud] ARROW-1165: [C++] Refactor PythonDecimalToArrowDecimal to not use templates
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/69582528
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/69582528
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/69582528
Branch: refs/heads/master
Commit: 69582528e00f465b6f2b16d39ebe20dc6811ec6c
Parents: 65558db
Author: Phillip Cloud <cp...@gmail.com>
Authored: Thu Jun 29 22:01:00 2017 +0200
Committer: Uwe L. Korn <uw...@xhochy.com>
Committed: Thu Jun 29 22:01:00 2017 +0200
----------------------------------------------------------------------
cpp/src/arrow/python/builtin_convert.cc | 112 ++++++++++++---------------
cpp/src/arrow/python/builtin_convert.h | 3 +-
cpp/src/arrow/python/helpers.cc | 14 +---
cpp/src/arrow/python/helpers.h | 11 +--
cpp/src/arrow/python/pandas_convert.cc | 14 ++--
cpp/src/arrow/python/python-test.cc | 12 +--
6 files changed, 71 insertions(+), 95 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/69582528/cpp/src/arrow/python/builtin_convert.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/builtin_convert.cc b/cpp/src/arrow/python/builtin_convert.cc
index d3bfb37..97e2bb7 100644
--- a/cpp/src/arrow/python/builtin_convert.cc
+++ b/cpp/src/arrow/python/builtin_convert.cc
@@ -160,16 +160,16 @@ class SeqVisitor {
if (PySequence_Check(obj)) {
Py_ssize_t size = PySequence_Size(obj);
for (int64_t i = 0; i < size; ++i) {
- // TODO(wesm): Specialize for PyList_GET_ITEM?
- OwnedRef ref = OwnedRef(PySequence_GetItem(obj, i));
- RETURN_NOT_OK(VisitElem(ref, level));
+ // TODO(wesm): Specialize for PyList_GET_ITEM?
+ OwnedRef ref = OwnedRef(PySequence_GetItem(obj, i));
+ RETURN_NOT_OK(VisitElem(ref, level));
}
} else if (PyObject_HasAttrString(obj, "__iter__")) {
OwnedRef iter = OwnedRef(PyObject_GetIter(obj));
PyObject* item;
while ((item = PyIter_Next(iter.obj()))) {
- OwnedRef ref = OwnedRef(item);
- RETURN_NOT_OK(VisitElem(ref, level));
+ OwnedRef ref = OwnedRef(item);
+ RETURN_NOT_OK(VisitElem(ref, level));
}
} else {
return Status::TypeError("Object is not a sequence or iterable");
@@ -181,10 +181,10 @@ class SeqVisitor {
// If all the non-list inputs were null (or there were no inputs)
if (scalars_.total_count() == 0) {
if (max_nesting_level_ == 0) {
- // If its just a single empty list or list of nulls, return null.
+ // If its just a single empty list or list of nulls, return null.
return null();
} else {
- // Error, if we have nesting but no concrete base type.
+ // Error, if we have nesting but no concrete base type.
return nullptr;
}
} else {
@@ -201,7 +201,7 @@ class SeqVisitor {
if (scalars_.total_count() > 0) {
if (num_nesting_levels() > 1) {
return Status::Invalid("Mixed nesting levels not supported");
- // If the nesting goes deeper than the deepest scalar
+ // If the nesting goes deeper than the deepest scalar
} else if (max_observed_level() < max_nesting_level_) {
return Status::Invalid("Mixed nesting levels not supported");
}
@@ -238,7 +238,7 @@ class SeqVisitor {
int nesting_histogram_[MAX_NESTING_LEVELS];
// Visits a specific element (inner part of the loop).
- Status VisitElem(const OwnedRef &item_ref, int level) {
+ Status VisitElem(const OwnedRef& item_ref, int level) {
if (PyList_Check(item_ref.obj())) {
RETURN_NOT_OK(Visit(item_ref.obj(), level + 1));
} else if (PyDict_Check(item_ref.obj())) {
@@ -246,10 +246,10 @@ class SeqVisitor {
} else {
// We permit nulls at any level of nesting
if (item_ref.obj() == Py_None) {
- // TODO
+ // TODO
} else {
- ++nesting_histogram_[level];
- scalars_.Visit(item_ref.obj());
+ ++nesting_histogram_[level];
+ scalars_.Visit(item_ref.obj());
}
}
return Status::OK();
@@ -282,7 +282,6 @@ Status InferArrowSize(PyObject* obj, int64_t* size) {
// Non-exhaustive type inference
Status InferArrowTypeAndSize(
PyObject* obj, int64_t* size, std::shared_ptr<DataType>* out_type) {
-
RETURN_NOT_OK(InferArrowSize(obj, size));
// For 0-length sequences, refuse to guess
@@ -338,8 +337,8 @@ class TypedConverterVisitor : public TypedConverter<BuilderType> {
// Iterate over the items adding each one
if (PySequence_Check(obj)) {
for (int64_t i = 0; i < size; ++i) {
- OwnedRef ref(PySequence_GetItem(obj, i));
- RETURN_NOT_OK(static_cast<Derived*>(this)->AppendItem(ref));
+ OwnedRef ref(PySequence_GetItem(obj, i));
+ RETURN_NOT_OK(static_cast<Derived*>(this)->AppendItem(ref));
}
} else if (PyObject_HasAttrString(obj, "__iter__")) {
PyObject* iter = PyObject_GetIter(obj);
@@ -349,13 +348,11 @@ class TypedConverterVisitor : public TypedConverter<BuilderType> {
// To allow people with long generators to only convert a subset, stop
// consuming at size.
while ((item = PyIter_Next(iter)) && i < size) {
- OwnedRef ref(item);
- RETURN_NOT_OK(static_cast<Derived*>(this)->AppendItem(ref));
- ++i;
- }
- if (size != i) {
- RETURN_NOT_OK(this->typed_builder_->Resize(i));
+ OwnedRef ref(item);
+ RETURN_NOT_OK(static_cast<Derived*>(this)->AppendItem(ref));
+ ++i;
}
+ if (size != i) { RETURN_NOT_OK(this->typed_builder_->Resize(i)); }
} else {
return Status::TypeError("Object is not a sequence or iterable");
}
@@ -365,24 +362,22 @@ class TypedConverterVisitor : public TypedConverter<BuilderType> {
virtual Status AppendItem(const OwnedRef& item) = 0;
};
-class BoolConverter : public TypedConverterVisitor<
- BooleanBuilder, BoolConverter> {
+class BoolConverter : public TypedConverterVisitor<BooleanBuilder, BoolConverter> {
public:
inline Status AppendItem(const OwnedRef& item) {
if (item.obj() == Py_None) {
return typed_builder_->AppendNull();
} else {
if (item.obj() == Py_True) {
- return typed_builder_->Append(true);
+ return typed_builder_->Append(true);
} else {
- return typed_builder_->Append(false);
+ return typed_builder_->Append(false);
}
}
}
};
-class Int64Converter : public TypedConverterVisitor<
- Int64Builder, Int64Converter> {
+class Int64Converter : public TypedConverterVisitor<Int64Builder, Int64Converter> {
public:
inline Status AppendItem(const OwnedRef& item) {
int64_t val;
@@ -396,8 +391,7 @@ class Int64Converter : public TypedConverterVisitor<
}
};
-class DateConverter : public TypedConverterVisitor<
- Date64Builder, DateConverter> {
+class DateConverter : public TypedConverterVisitor<Date64Builder, DateConverter> {
public:
inline Status AppendItem(const OwnedRef& item) {
if (item.obj() == Py_None) {
@@ -409,22 +403,21 @@ class DateConverter : public TypedConverterVisitor<
}
};
-class TimestampConverter : public TypedConverterVisitor<
- Date64Builder, TimestampConverter> {
+class TimestampConverter
+ : public TypedConverterVisitor<Date64Builder, TimestampConverter> {
public:
inline Status AppendItem(const OwnedRef& item) {
if (item.obj() == Py_None) {
return typed_builder_->AppendNull();
} else {
PyDateTime_DateTime* pydatetime =
- reinterpret_cast<PyDateTime_DateTime*>(item.obj());
+ reinterpret_cast<PyDateTime_DateTime*>(item.obj());
return typed_builder_->Append(PyDateTime_to_us(pydatetime));
}
}
};
-class DoubleConverter : public TypedConverterVisitor<
- DoubleBuilder, DoubleConverter> {
+class DoubleConverter : public TypedConverterVisitor<DoubleBuilder, DoubleConverter> {
public:
inline Status AppendItem(const OwnedRef& item) {
double val;
@@ -438,8 +431,7 @@ class DoubleConverter : public TypedConverterVisitor<
}
};
-class BytesConverter : public TypedConverterVisitor<
- BinaryBuilder, BytesConverter> {
+class BytesConverter : public TypedConverterVisitor<BinaryBuilder, BytesConverter> {
public:
inline Status AppendItem(const OwnedRef& item) {
PyObject* bytes_obj;
@@ -466,8 +458,8 @@ class BytesConverter : public TypedConverterVisitor<
}
};
-class FixedWidthBytesConverter : public TypedConverterVisitor<
- FixedSizeBinaryBuilder, FixedWidthBytesConverter> {
+class FixedWidthBytesConverter
+ : public TypedConverterVisitor<FixedSizeBinaryBuilder, FixedWidthBytesConverter> {
public:
inline Status AppendItem(const OwnedRef& item) {
PyObject* bytes_obj;
@@ -489,12 +481,11 @@ class FixedWidthBytesConverter : public TypedConverterVisitor<
// No error checking
RETURN_NOT_OK(CheckPythonBytesAreFixedLength(bytes_obj, expected_length));
return typed_builder_->Append(
- reinterpret_cast<const uint8_t*>(PyBytes_AS_STRING(bytes_obj)));
+ reinterpret_cast<const uint8_t*>(PyBytes_AS_STRING(bytes_obj)));
}
};
-class UTF8Converter : public TypedConverterVisitor<
- StringBuilder, UTF8Converter> {
+class UTF8Converter : public TypedConverterVisitor<StringBuilder, UTF8Converter> {
public:
inline Status AppendItem(const OwnedRef& item) {
PyObject* bytes_obj;
@@ -518,19 +509,17 @@ class UTF8Converter : public TypedConverterVisitor<
}
};
-class ListConverter : public TypedConverterVisitor<
- ListBuilder, ListConverter> {
+class ListConverter : public TypedConverterVisitor<ListBuilder, ListConverter> {
public:
Status Init(const std::shared_ptr<ArrayBuilder>& builder) override;
- inline Status AppendItem(const OwnedRef& item) {
+ inline Status AppendItem(const OwnedRef& item) override {
if (item.obj() == Py_None) {
return typed_builder_->AppendNull();
} else {
typed_builder_->Append();
PyObject* item_obj = item.obj();
- int64_t list_size =
- static_cast<int64_t>(PySequence_Size(item_obj));
+ int64_t list_size = static_cast<int64_t>(PySequence_Size(item_obj));
return value_converter_->AppendData(item_obj, list_size);
}
}
@@ -539,16 +528,18 @@ class ListConverter : public TypedConverterVisitor<
std::shared_ptr<SeqConverter> value_converter_;
};
-#define DECIMAL_CONVERT_CASE(bit_width, item, builder) \
- case bit_width: { \
- arrow::decimal::Decimal##bit_width out; \
- RETURN_NOT_OK(PythonDecimalToArrowDecimal((item), &out)); \
- return ((builder)->Append(out)); \
- break; \
+#define DECIMAL_CONVERT_CASE(bit_width, item, builder) \
+ case bit_width: { \
+ arrow::decimal::Decimal##bit_width out; \
+ std::string string_out; \
+ RETURN_NOT_OK(PythonDecimalToString((item), &string_out)); \
+ RETURN_NOT_OK(FromString(string_out, &out)); \
+ return ((builder)->Append(out)); \
+ break; \
}
-class DecimalConverter : public TypedConverterVisitor<
- arrow::DecimalBuilder, DecimalConverter> {
+class DecimalConverter
+ : public TypedConverterVisitor<arrow::DecimalBuilder, DecimalConverter> {
public:
inline Status AppendItem(const OwnedRef& item) {
/// Can the compiler figure out that the case statement below isn't necessary
@@ -560,11 +551,11 @@ class DecimalConverter : public TypedConverterVisitor<
/// TODO(phillipc): Check for nan?
if (item.obj() != Py_None) {
switch (bit_width) {
- DECIMAL_CONVERT_CASE(32, item.obj(), typed_builder_)
- DECIMAL_CONVERT_CASE(64, item.obj(), typed_builder_)
- DECIMAL_CONVERT_CASE(128, item.obj(), typed_builder_)
- default:
- return Status::OK();
+ DECIMAL_CONVERT_CASE(32, item.obj(), typed_builder_)
+ DECIMAL_CONVERT_CASE(64, item.obj(), typed_builder_)
+ DECIMAL_CONVERT_CASE(128, item.obj(), typed_builder_)
+ default:
+ return Status::OK();
}
RETURN_IF_PYERROR();
} else {
@@ -620,8 +611,7 @@ Status ListConverter::Init(const std::shared_ptr<ArrayBuilder>& builder) {
}
Status AppendPySequence(PyObject* obj, const std::shared_ptr<DataType>& type,
- const std::shared_ptr<ArrayBuilder>& builder,
- int64_t size) {
+ const std::shared_ptr<ArrayBuilder>& builder, int64_t size) {
PyDateTime_IMPORT;
std::shared_ptr<SeqConverter> converter = GetConverter(type);
if (converter == nullptr) {
http://git-wip-us.apache.org/repos/asf/arrow/blob/69582528/cpp/src/arrow/python/builtin_convert.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/builtin_convert.h b/cpp/src/arrow/python/builtin_convert.h
index 7f42c33..54860b3 100644
--- a/cpp/src/arrow/python/builtin_convert.h
+++ b/cpp/src/arrow/python/builtin_convert.h
@@ -44,8 +44,7 @@ ARROW_EXPORT arrow::Status InferArrowSize(PyObject* obj, int64_t* size);
ARROW_EXPORT arrow::Status AppendPySequence(PyObject* obj,
const std::shared_ptr<arrow::DataType>& type,
- const std::shared_ptr<arrow::ArrayBuilder>& builder,
- int64_t size);
+ const std::shared_ptr<arrow::ArrayBuilder>& builder, int64_t size);
// Type and size inference
ARROW_EXPORT
http://git-wip-us.apache.org/repos/asf/arrow/blob/69582528/cpp/src/arrow/python/helpers.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/helpers.cc b/cpp/src/arrow/python/helpers.cc
index e5d1d38..76ec3a1 100644
--- a/cpp/src/arrow/python/helpers.cc
+++ b/cpp/src/arrow/python/helpers.cc
@@ -71,9 +71,7 @@ Status ImportFromModule(const OwnedRef& module, const std::string& name, OwnedRe
return Status::OK();
}
-template <typename T>
-Status PythonDecimalToArrowDecimal(
- PyObject* python_decimal, decimal::Decimal<T>* arrow_decimal) {
+Status PythonDecimalToString(PyObject* python_decimal, std::string* out) {
// Call Python's str(decimal_object)
OwnedRef str_obj(PyObject_Str(python_decimal));
RETURN_IF_PYERROR();
@@ -87,16 +85,10 @@ Status PythonDecimalToArrowDecimal(
Py_ssize_t size = str.size;
std::string c_string(bytes, size);
- return FromString(c_string, arrow_decimal);
+ *out = c_string;
+ return Status::OK();
}
-template Status ARROW_TEMPLATE_EXPORT PythonDecimalToArrowDecimal(
- PyObject* python_decimal, decimal::Decimal32* arrow_decimal);
-template Status ARROW_TEMPLATE_EXPORT PythonDecimalToArrowDecimal(
- PyObject* python_decimal, decimal::Decimal64* arrow_decimal);
-template Status ARROW_TEMPLATE_EXPORT PythonDecimalToArrowDecimal(
- PyObject* python_decimal, decimal::Decimal128* arrow_decimal);
-
Status InferDecimalPrecisionAndScale(
PyObject* python_decimal, int* precision, int* scale) {
// Call Python's str(decimal_object)
http://git-wip-us.apache.org/repos/asf/arrow/blob/69582528/cpp/src/arrow/python/helpers.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/helpers.h b/cpp/src/arrow/python/helpers.h
index c6402d8..e065669 100644
--- a/cpp/src/arrow/python/helpers.h
+++ b/cpp/src/arrow/python/helpers.h
@@ -29,13 +29,6 @@
namespace arrow {
-namespace decimal {
-
-template <typename T>
-struct Decimal;
-
-} // namespace decimal
-
namespace py {
class OwnedRef;
@@ -46,9 +39,7 @@ Status ARROW_EXPORT ImportModule(const std::string& module_name, OwnedRef* ref);
Status ARROW_EXPORT ImportFromModule(
const OwnedRef& module, const std::string& module_name, OwnedRef* ref);
-template <typename T>
-Status ARROW_EXPORT PythonDecimalToArrowDecimal(
- PyObject* python_decimal, decimal::Decimal<T>* arrow_decimal);
+Status ARROW_EXPORT PythonDecimalToString(PyObject* python_decimal, std::string* out);
Status ARROW_EXPORT InferDecimalPrecisionAndScale(
PyObject* python_decimal, int* precision = nullptr, int* scale = nullptr);
http://git-wip-us.apache.org/repos/asf/arrow/blob/69582528/cpp/src/arrow/python/pandas_convert.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/pandas_convert.cc b/cpp/src/arrow/python/pandas_convert.cc
index ea23496..2918f9e 100644
--- a/cpp/src/arrow/python/pandas_convert.cc
+++ b/cpp/src/arrow/python/pandas_convert.cc
@@ -545,12 +545,14 @@ Status PandasConverter::ConvertDates() {
return date_builder.Finish(&out_);
}
-#define CONVERT_DECIMAL_CASE(bit_width, builder, object) \
- case bit_width: { \
- decimal::Decimal##bit_width d; \
- RETURN_NOT_OK(PythonDecimalToArrowDecimal((object), &d)); \
- RETURN_NOT_OK((builder).Append(d)); \
- break; \
+#define CONVERT_DECIMAL_CASE(bit_width, builder, object) \
+ case bit_width: { \
+ decimal::Decimal##bit_width d; \
+ std::string string_out; \
+ RETURN_NOT_OK(PythonDecimalToString((object), &string_out)); \
+ RETURN_NOT_OK(FromString(string_out, &d)); \
+ RETURN_NOT_OK((builder).Append(d)); \
+ break; \
}
Status PandasConverter::ConvertDecimals() {
http://git-wip-us.apache.org/repos/asf/arrow/blob/69582528/cpp/src/arrow/python/python-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/python-test.cc b/cpp/src/arrow/python/python-test.cc
index cbc9377..43c7f60 100644
--- a/cpp/src/arrow/python/python-test.cc
+++ b/cpp/src/arrow/python/python-test.cc
@@ -40,7 +40,7 @@ TEST(PyBuffer, InvalidInputObject) {
PyBuffer buffer(Py_None);
}
-TEST(DecimalTest, TestPythonDecimalToArrowDecimal128) {
+TEST(DecimalTest, TestPythonDecimalToString) {
PyAcquireGIL lock;
OwnedRef decimal;
@@ -63,11 +63,13 @@ TEST(DecimalTest, TestPythonDecimalToArrowDecimal128) {
ASSERT_NE(pydecimal.obj(), nullptr);
ASSERT_EQ(PyErr_Occurred(), nullptr);
- decimal::Decimal128 arrow_decimal;
boost::multiprecision::int128_t boost_decimal(decimal_string);
- PyObject* obj = pydecimal.obj();
- ASSERT_OK(PythonDecimalToArrowDecimal(obj, &arrow_decimal));
- ASSERT_EQ(boost_decimal, arrow_decimal.value);
+ PyObject* python_object = pydecimal.obj();
+ ASSERT_NE(python_object, nullptr);
+
+ std::string string_result;
+ ASSERT_OK(PythonDecimalToString(python_object, &string_result));
+ ASSERT_EQ(boost_decimal.str(), string_result);
}
TEST(PandasConversionTest, TestObjectBlockWriteFails) {