You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by uw...@apache.org on 2017/06/29 20:01:06 UTC

arrow git commit: ARROW-1165: [C++] Refactor PythonDecimalToArrowDecimal to not use templates

Repository: arrow
Updated Branches:
  refs/heads/master 65558dbb9 -> 69582528e


ARROW-1165: [C++] Refactor PythonDecimalToArrowDecimal to not use templates

Author: Phillip Cloud <cp...@gmail.com>

Closes #794 from cpcloud/ARROW-1165 and squashes the following commits:

53565ea [Phillip Cloud] ARROW-1165: [C++] Refactor PythonDecimalToArrowDecimal to not use templates


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/69582528
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/69582528
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/69582528

Branch: refs/heads/master
Commit: 69582528e00f465b6f2b16d39ebe20dc6811ec6c
Parents: 65558db
Author: Phillip Cloud <cp...@gmail.com>
Authored: Thu Jun 29 22:01:00 2017 +0200
Committer: Uwe L. Korn <uw...@xhochy.com>
Committed: Thu Jun 29 22:01:00 2017 +0200

----------------------------------------------------------------------
 cpp/src/arrow/python/builtin_convert.cc | 112 ++++++++++++---------------
 cpp/src/arrow/python/builtin_convert.h  |   3 +-
 cpp/src/arrow/python/helpers.cc         |  14 +---
 cpp/src/arrow/python/helpers.h          |  11 +--
 cpp/src/arrow/python/pandas_convert.cc  |  14 ++--
 cpp/src/arrow/python/python-test.cc     |  12 +--
 6 files changed, 71 insertions(+), 95 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/69582528/cpp/src/arrow/python/builtin_convert.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/builtin_convert.cc b/cpp/src/arrow/python/builtin_convert.cc
index d3bfb37..97e2bb7 100644
--- a/cpp/src/arrow/python/builtin_convert.cc
+++ b/cpp/src/arrow/python/builtin_convert.cc
@@ -160,16 +160,16 @@ class SeqVisitor {
     if (PySequence_Check(obj)) {
       Py_ssize_t size = PySequence_Size(obj);
       for (int64_t i = 0; i < size; ++i) {
-	// TODO(wesm): Specialize for PyList_GET_ITEM?
-	OwnedRef ref = OwnedRef(PySequence_GetItem(obj, i));
-	RETURN_NOT_OK(VisitElem(ref, level));
+        // TODO(wesm): Specialize for PyList_GET_ITEM?
+        OwnedRef ref = OwnedRef(PySequence_GetItem(obj, i));
+        RETURN_NOT_OK(VisitElem(ref, level));
       }
     } else if (PyObject_HasAttrString(obj, "__iter__")) {
       OwnedRef iter = OwnedRef(PyObject_GetIter(obj));
       PyObject* item;
       while ((item = PyIter_Next(iter.obj()))) {
-	OwnedRef ref = OwnedRef(item);
-	RETURN_NOT_OK(VisitElem(ref, level));
+        OwnedRef ref = OwnedRef(item);
+        RETURN_NOT_OK(VisitElem(ref, level));
       }
     } else {
       return Status::TypeError("Object is not a sequence or iterable");
@@ -181,10 +181,10 @@ class SeqVisitor {
     // If all the non-list inputs were null (or there were no inputs)
     if (scalars_.total_count() == 0) {
       if (max_nesting_level_ == 0) {
-	// If its just a single empty list or list of nulls, return null.
+        // If its just a single empty list or list of nulls, return null.
         return null();
       } else {
-	// Error, if we have nesting but no concrete base type.
+        // Error, if we have nesting but no concrete base type.
         return nullptr;
       }
     } else {
@@ -201,7 +201,7 @@ class SeqVisitor {
     if (scalars_.total_count() > 0) {
       if (num_nesting_levels() > 1) {
         return Status::Invalid("Mixed nesting levels not supported");
-      // If the nesting goes deeper than the deepest scalar
+        // If the nesting goes deeper than the deepest scalar
       } else if (max_observed_level() < max_nesting_level_) {
         return Status::Invalid("Mixed nesting levels not supported");
       }
@@ -238,7 +238,7 @@ class SeqVisitor {
   int nesting_histogram_[MAX_NESTING_LEVELS];
 
   // Visits a specific element (inner part of the loop).
-  Status VisitElem(const OwnedRef &item_ref, int level) {
+  Status VisitElem(const OwnedRef& item_ref, int level) {
     if (PyList_Check(item_ref.obj())) {
       RETURN_NOT_OK(Visit(item_ref.obj(), level + 1));
     } else if (PyDict_Check(item_ref.obj())) {
@@ -246,10 +246,10 @@ class SeqVisitor {
     } else {
       // We permit nulls at any level of nesting
       if (item_ref.obj() == Py_None) {
-	// TODO
+        // TODO
       } else {
-	++nesting_histogram_[level];
-	scalars_.Visit(item_ref.obj());
+        ++nesting_histogram_[level];
+        scalars_.Visit(item_ref.obj());
       }
     }
     return Status::OK();
@@ -282,7 +282,6 @@ Status InferArrowSize(PyObject* obj, int64_t* size) {
 // Non-exhaustive type inference
 Status InferArrowTypeAndSize(
     PyObject* obj, int64_t* size, std::shared_ptr<DataType>* out_type) {
-
   RETURN_NOT_OK(InferArrowSize(obj, size));
 
   // For 0-length sequences, refuse to guess
@@ -338,8 +337,8 @@ class TypedConverterVisitor : public TypedConverter<BuilderType> {
     // Iterate over the items adding each one
     if (PySequence_Check(obj)) {
       for (int64_t i = 0; i < size; ++i) {
-	OwnedRef ref(PySequence_GetItem(obj, i));
-	RETURN_NOT_OK(static_cast<Derived*>(this)->AppendItem(ref));
+        OwnedRef ref(PySequence_GetItem(obj, i));
+        RETURN_NOT_OK(static_cast<Derived*>(this)->AppendItem(ref));
       }
     } else if (PyObject_HasAttrString(obj, "__iter__")) {
       PyObject* iter = PyObject_GetIter(obj);
@@ -349,13 +348,11 @@ class TypedConverterVisitor : public TypedConverter<BuilderType> {
       // To allow people with long generators to only convert a subset, stop
       // consuming at size.
       while ((item = PyIter_Next(iter)) && i < size) {
-	OwnedRef ref(item);
-	RETURN_NOT_OK(static_cast<Derived*>(this)->AppendItem(ref));
-	++i;
-      }
-      if (size != i) {
-	RETURN_NOT_OK(this->typed_builder_->Resize(i));
+        OwnedRef ref(item);
+        RETURN_NOT_OK(static_cast<Derived*>(this)->AppendItem(ref));
+        ++i;
       }
+      if (size != i) { RETURN_NOT_OK(this->typed_builder_->Resize(i)); }
     } else {
       return Status::TypeError("Object is not a sequence or iterable");
     }
@@ -365,24 +362,22 @@ class TypedConverterVisitor : public TypedConverter<BuilderType> {
   virtual Status AppendItem(const OwnedRef& item) = 0;
 };
 
-class BoolConverter : public TypedConverterVisitor<
-  BooleanBuilder, BoolConverter> {
+class BoolConverter : public TypedConverterVisitor<BooleanBuilder, BoolConverter> {
  public:
   inline Status AppendItem(const OwnedRef& item) {
     if (item.obj() == Py_None) {
       return typed_builder_->AppendNull();
     } else {
       if (item.obj() == Py_True) {
-	return typed_builder_->Append(true);
+        return typed_builder_->Append(true);
       } else {
-	return typed_builder_->Append(false);
+        return typed_builder_->Append(false);
       }
     }
   }
 };
 
-class Int64Converter : public TypedConverterVisitor<
-  Int64Builder, Int64Converter> {
+class Int64Converter : public TypedConverterVisitor<Int64Builder, Int64Converter> {
  public:
   inline Status AppendItem(const OwnedRef& item) {
     int64_t val;
@@ -396,8 +391,7 @@ class Int64Converter : public TypedConverterVisitor<
   }
 };
 
-class DateConverter : public TypedConverterVisitor<
-  Date64Builder, DateConverter> {
+class DateConverter : public TypedConverterVisitor<Date64Builder, DateConverter> {
  public:
   inline Status AppendItem(const OwnedRef& item) {
     if (item.obj() == Py_None) {
@@ -409,22 +403,21 @@ class DateConverter : public TypedConverterVisitor<
   }
 };
 
-class TimestampConverter : public TypedConverterVisitor<
-  Date64Builder, TimestampConverter> {
+class TimestampConverter
+    : public TypedConverterVisitor<Date64Builder, TimestampConverter> {
  public:
   inline Status AppendItem(const OwnedRef& item) {
     if (item.obj() == Py_None) {
       return typed_builder_->AppendNull();
     } else {
       PyDateTime_DateTime* pydatetime =
-	reinterpret_cast<PyDateTime_DateTime*>(item.obj());
+          reinterpret_cast<PyDateTime_DateTime*>(item.obj());
       return typed_builder_->Append(PyDateTime_to_us(pydatetime));
     }
   }
 };
 
-class DoubleConverter : public TypedConverterVisitor<
-  DoubleBuilder, DoubleConverter> {
+class DoubleConverter : public TypedConverterVisitor<DoubleBuilder, DoubleConverter> {
  public:
   inline Status AppendItem(const OwnedRef& item) {
     double val;
@@ -438,8 +431,7 @@ class DoubleConverter : public TypedConverterVisitor<
   }
 };
 
-class BytesConverter : public TypedConverterVisitor<
-  BinaryBuilder, BytesConverter> {
+class BytesConverter : public TypedConverterVisitor<BinaryBuilder, BytesConverter> {
  public:
   inline Status AppendItem(const OwnedRef& item) {
     PyObject* bytes_obj;
@@ -466,8 +458,8 @@ class BytesConverter : public TypedConverterVisitor<
   }
 };
 
-class FixedWidthBytesConverter : public TypedConverterVisitor<
-  FixedSizeBinaryBuilder, FixedWidthBytesConverter> {
+class FixedWidthBytesConverter
+    : public TypedConverterVisitor<FixedSizeBinaryBuilder, FixedWidthBytesConverter> {
  public:
   inline Status AppendItem(const OwnedRef& item) {
     PyObject* bytes_obj;
@@ -489,12 +481,11 @@ class FixedWidthBytesConverter : public TypedConverterVisitor<
     // No error checking
     RETURN_NOT_OK(CheckPythonBytesAreFixedLength(bytes_obj, expected_length));
     return typed_builder_->Append(
-	reinterpret_cast<const uint8_t*>(PyBytes_AS_STRING(bytes_obj)));
+        reinterpret_cast<const uint8_t*>(PyBytes_AS_STRING(bytes_obj)));
   }
 };
 
-class UTF8Converter : public TypedConverterVisitor<
-  StringBuilder, UTF8Converter> {
+class UTF8Converter : public TypedConverterVisitor<StringBuilder, UTF8Converter> {
  public:
   inline Status AppendItem(const OwnedRef& item) {
     PyObject* bytes_obj;
@@ -518,19 +509,17 @@ class UTF8Converter : public TypedConverterVisitor<
   }
 };
 
-class ListConverter : public TypedConverterVisitor<
-  ListBuilder, ListConverter> {
+class ListConverter : public TypedConverterVisitor<ListBuilder, ListConverter> {
  public:
   Status Init(const std::shared_ptr<ArrayBuilder>& builder) override;
 
-  inline Status AppendItem(const OwnedRef& item) {
+  inline Status AppendItem(const OwnedRef& item) override {
     if (item.obj() == Py_None) {
       return typed_builder_->AppendNull();
     } else {
       typed_builder_->Append();
       PyObject* item_obj = item.obj();
-      int64_t list_size =
-	static_cast<int64_t>(PySequence_Size(item_obj));
+      int64_t list_size = static_cast<int64_t>(PySequence_Size(item_obj));
       return value_converter_->AppendData(item_obj, list_size);
     }
   }
@@ -539,16 +528,18 @@ class ListConverter : public TypedConverterVisitor<
   std::shared_ptr<SeqConverter> value_converter_;
 };
 
-#define DECIMAL_CONVERT_CASE(bit_width, item, builder)        \
-  case bit_width: {                                           \
-    arrow::decimal::Decimal##bit_width out;                   \
-    RETURN_NOT_OK(PythonDecimalToArrowDecimal((item), &out)); \
-    return ((builder)->Append(out));                          \
-    break;                                                    \
+#define DECIMAL_CONVERT_CASE(bit_width, item, builder)         \
+  case bit_width: {                                            \
+    arrow::decimal::Decimal##bit_width out;                    \
+    std::string string_out;                                    \
+    RETURN_NOT_OK(PythonDecimalToString((item), &string_out)); \
+    RETURN_NOT_OK(FromString(string_out, &out));               \
+    return ((builder)->Append(out));                           \
+    break;                                                     \
   }
 
-class DecimalConverter : public TypedConverterVisitor<
-  arrow::DecimalBuilder, DecimalConverter> {
+class DecimalConverter
+    : public TypedConverterVisitor<arrow::DecimalBuilder, DecimalConverter> {
  public:
   inline Status AppendItem(const OwnedRef& item) {
     /// Can the compiler figure out that the case statement below isn't necessary
@@ -560,11 +551,11 @@ class DecimalConverter : public TypedConverterVisitor<
     /// TODO(phillipc): Check for nan?
     if (item.obj() != Py_None) {
       switch (bit_width) {
-	  DECIMAL_CONVERT_CASE(32, item.obj(), typed_builder_)
-	  DECIMAL_CONVERT_CASE(64, item.obj(), typed_builder_)
-	  DECIMAL_CONVERT_CASE(128, item.obj(), typed_builder_)
-      default:
-	  return Status::OK();
+        DECIMAL_CONVERT_CASE(32, item.obj(), typed_builder_)
+        DECIMAL_CONVERT_CASE(64, item.obj(), typed_builder_)
+        DECIMAL_CONVERT_CASE(128, item.obj(), typed_builder_)
+        default:
+          return Status::OK();
       }
       RETURN_IF_PYERROR();
     } else {
@@ -620,8 +611,7 @@ Status ListConverter::Init(const std::shared_ptr<ArrayBuilder>& builder) {
 }
 
 Status AppendPySequence(PyObject* obj, const std::shared_ptr<DataType>& type,
-			const std::shared_ptr<ArrayBuilder>& builder,
-			int64_t size) {
+    const std::shared_ptr<ArrayBuilder>& builder, int64_t size) {
   PyDateTime_IMPORT;
   std::shared_ptr<SeqConverter> converter = GetConverter(type);
   if (converter == nullptr) {

http://git-wip-us.apache.org/repos/asf/arrow/blob/69582528/cpp/src/arrow/python/builtin_convert.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/builtin_convert.h b/cpp/src/arrow/python/builtin_convert.h
index 7f42c33..54860b3 100644
--- a/cpp/src/arrow/python/builtin_convert.h
+++ b/cpp/src/arrow/python/builtin_convert.h
@@ -44,8 +44,7 @@ ARROW_EXPORT arrow::Status InferArrowSize(PyObject* obj, int64_t* size);
 
 ARROW_EXPORT arrow::Status AppendPySequence(PyObject* obj,
     const std::shared_ptr<arrow::DataType>& type,
-    const std::shared_ptr<arrow::ArrayBuilder>& builder,
-    int64_t size);
+    const std::shared_ptr<arrow::ArrayBuilder>& builder, int64_t size);
 
 // Type and size inference
 ARROW_EXPORT

http://git-wip-us.apache.org/repos/asf/arrow/blob/69582528/cpp/src/arrow/python/helpers.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/helpers.cc b/cpp/src/arrow/python/helpers.cc
index e5d1d38..76ec3a1 100644
--- a/cpp/src/arrow/python/helpers.cc
+++ b/cpp/src/arrow/python/helpers.cc
@@ -71,9 +71,7 @@ Status ImportFromModule(const OwnedRef& module, const std::string& name, OwnedRe
   return Status::OK();
 }
 
-template <typename T>
-Status PythonDecimalToArrowDecimal(
-    PyObject* python_decimal, decimal::Decimal<T>* arrow_decimal) {
+Status PythonDecimalToString(PyObject* python_decimal, std::string* out) {
   // Call Python's str(decimal_object)
   OwnedRef str_obj(PyObject_Str(python_decimal));
   RETURN_IF_PYERROR();
@@ -87,16 +85,10 @@ Status PythonDecimalToArrowDecimal(
   Py_ssize_t size = str.size;
 
   std::string c_string(bytes, size);
-  return FromString(c_string, arrow_decimal);
+  *out = c_string;
+  return Status::OK();
 }
 
-template Status ARROW_TEMPLATE_EXPORT PythonDecimalToArrowDecimal(
-    PyObject* python_decimal, decimal::Decimal32* arrow_decimal);
-template Status ARROW_TEMPLATE_EXPORT PythonDecimalToArrowDecimal(
-    PyObject* python_decimal, decimal::Decimal64* arrow_decimal);
-template Status ARROW_TEMPLATE_EXPORT PythonDecimalToArrowDecimal(
-    PyObject* python_decimal, decimal::Decimal128* arrow_decimal);
-
 Status InferDecimalPrecisionAndScale(
     PyObject* python_decimal, int* precision, int* scale) {
   // Call Python's str(decimal_object)

http://git-wip-us.apache.org/repos/asf/arrow/blob/69582528/cpp/src/arrow/python/helpers.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/helpers.h b/cpp/src/arrow/python/helpers.h
index c6402d8..e065669 100644
--- a/cpp/src/arrow/python/helpers.h
+++ b/cpp/src/arrow/python/helpers.h
@@ -29,13 +29,6 @@
 
 namespace arrow {
 
-namespace decimal {
-
-template <typename T>
-struct Decimal;
-
-}  // namespace decimal
-
 namespace py {
 
 class OwnedRef;
@@ -46,9 +39,7 @@ Status ARROW_EXPORT ImportModule(const std::string& module_name, OwnedRef* ref);
 Status ARROW_EXPORT ImportFromModule(
     const OwnedRef& module, const std::string& module_name, OwnedRef* ref);
 
-template <typename T>
-Status ARROW_EXPORT PythonDecimalToArrowDecimal(
-    PyObject* python_decimal, decimal::Decimal<T>* arrow_decimal);
+Status ARROW_EXPORT PythonDecimalToString(PyObject* python_decimal, std::string* out);
 
 Status ARROW_EXPORT InferDecimalPrecisionAndScale(
     PyObject* python_decimal, int* precision = nullptr, int* scale = nullptr);

http://git-wip-us.apache.org/repos/asf/arrow/blob/69582528/cpp/src/arrow/python/pandas_convert.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/pandas_convert.cc b/cpp/src/arrow/python/pandas_convert.cc
index ea23496..2918f9e 100644
--- a/cpp/src/arrow/python/pandas_convert.cc
+++ b/cpp/src/arrow/python/pandas_convert.cc
@@ -545,12 +545,14 @@ Status PandasConverter::ConvertDates() {
   return date_builder.Finish(&out_);
 }
 
-#define CONVERT_DECIMAL_CASE(bit_width, builder, object)      \
-  case bit_width: {                                           \
-    decimal::Decimal##bit_width d;                            \
-    RETURN_NOT_OK(PythonDecimalToArrowDecimal((object), &d)); \
-    RETURN_NOT_OK((builder).Append(d));                       \
-    break;                                                    \
+#define CONVERT_DECIMAL_CASE(bit_width, builder, object)         \
+  case bit_width: {                                              \
+    decimal::Decimal##bit_width d;                               \
+    std::string string_out;                                      \
+    RETURN_NOT_OK(PythonDecimalToString((object), &string_out)); \
+    RETURN_NOT_OK(FromString(string_out, &d));                   \
+    RETURN_NOT_OK((builder).Append(d));                          \
+    break;                                                       \
   }
 
 Status PandasConverter::ConvertDecimals() {

http://git-wip-us.apache.org/repos/asf/arrow/blob/69582528/cpp/src/arrow/python/python-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/python-test.cc b/cpp/src/arrow/python/python-test.cc
index cbc9377..43c7f60 100644
--- a/cpp/src/arrow/python/python-test.cc
+++ b/cpp/src/arrow/python/python-test.cc
@@ -40,7 +40,7 @@ TEST(PyBuffer, InvalidInputObject) {
   PyBuffer buffer(Py_None);
 }
 
-TEST(DecimalTest, TestPythonDecimalToArrowDecimal128) {
+TEST(DecimalTest, TestPythonDecimalToString) {
   PyAcquireGIL lock;
 
   OwnedRef decimal;
@@ -63,11 +63,13 @@ TEST(DecimalTest, TestPythonDecimalToArrowDecimal128) {
   ASSERT_NE(pydecimal.obj(), nullptr);
   ASSERT_EQ(PyErr_Occurred(), nullptr);
 
-  decimal::Decimal128 arrow_decimal;
   boost::multiprecision::int128_t boost_decimal(decimal_string);
-  PyObject* obj = pydecimal.obj();
-  ASSERT_OK(PythonDecimalToArrowDecimal(obj, &arrow_decimal));
-  ASSERT_EQ(boost_decimal, arrow_decimal.value);
+  PyObject* python_object = pydecimal.obj();
+  ASSERT_NE(python_object, nullptr);
+
+  std::string string_result;
+  ASSERT_OK(PythonDecimalToString(python_object, &string_result));
+  ASSERT_EQ(boost_decimal.str(), string_result);
 }
 
 TEST(PandasConversionTest, TestObjectBlockWriteFails) {