You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2020/08/05 13:58:07 UTC

[GitHub] [arrow] pitrou commented on a change in pull request #7901: ARROW-9543: [C++] Simplify parsing/formatting utilities

pitrou commented on a change in pull request #7901:
URL: https://github.com/apache/arrow/pull/7901#discussion_r465734624



##########
File path: cpp/src/arrow/util/formatting.h
##########
@@ -40,12 +40,51 @@ namespace arrow {
 namespace internal {
 
 /// \brief The entry point for conversion to strings.
+///
+/// Specializations of FormatValueTraits for `ARROW_TYPE` must define:
+/// - A member type `value_type` which will be formatted.
+/// - A static member function `MaxSize(const ARROW_TYPE& t)` indicating the
+///   maximum number of characters which formatting a `value_type` may yield.
+/// - The static member function `Convert`, callable with signature

Review comment:
       Why not call it `Format`?

##########
File path: cpp/src/arrow/util/formatting.cc
##########
@@ -36,38 +31,29 @@ const char digit_pairs[] =
     "6061626364656667686970717273747576777879"
     "8081828384858687888990919293949596979899";
 
-}  // namespace detail
-
-struct FloatToStringFormatter::Impl {
-  Impl()
-      : converter_(DoubleToStringConverter::EMIT_POSITIVE_EXPONENT_SIGN, "inf", "nan",
-                   'e', -6, 10, 6, 0) {}
-
-  DoubleToStringConverter converter_;
-};
-
-FloatToStringFormatter::FloatToStringFormatter() : impl_(new Impl()) {}
+using util::double_conversion::DoubleToStringConverter;
 
-FloatToStringFormatter::~FloatToStringFormatter() {}
+static DoubleToStringConverter g_double_converter(
+    DoubleToStringConverter::EMIT_POSITIVE_EXPONENT_SIGN, "inf", "nan", 'e', -6, 10, 6,
+    0);
 
-int FloatToStringFormatter::FormatFloat(float v, char* out_buffer, int out_size) {
-  DCHECK_GE(out_size, kMinBufferSize);
+int FormatFloat(float v, size_t buffer_size, char* buffer) {
   // StringBuilder checks bounds in debug mode for us
-  util::double_conversion::StringBuilder builder(out_buffer, out_size);
-  bool result = impl_->converter_.ToShortestSingle(v, &builder);
+  util::double_conversion::StringBuilder builder(buffer, static_cast<int>(buffer_size));
+  bool result = g_double_converter.ToShortestSingle(v, &builder);
   DCHECK(result);
   ARROW_UNUSED(result);
   return builder.position();
 }
 
-int FloatToStringFormatter::FormatFloat(double v, char* out_buffer, int out_size) {
-  DCHECK_GE(out_size, kMinBufferSize);
-  util::double_conversion::StringBuilder builder(out_buffer, out_size);
-  bool result = impl_->converter_.ToShortest(v, &builder);
+int FormatFloat(double v, size_t buffer_size, char* buffer) {
+  util::double_conversion::StringBuilder builder(buffer, static_cast<int>(buffer_size));
+  bool result = g_double_converter.ToShortest(v, &builder);
   DCHECK(result);
   ARROW_UNUSED(result);
   return builder.position();
 }
 
+}  // namespace detail

Review comment:
       Why not use the anonymous namespace for private functions?

##########
File path: cpp/src/arrow/util/formatting.cc
##########
@@ -36,38 +31,29 @@ const char digit_pairs[] =
     "6061626364656667686970717273747576777879"
     "8081828384858687888990919293949596979899";
 
-}  // namespace detail
-
-struct FloatToStringFormatter::Impl {
-  Impl()
-      : converter_(DoubleToStringConverter::EMIT_POSITIVE_EXPONENT_SIGN, "inf", "nan",
-                   'e', -6, 10, 6, 0) {}
-
-  DoubleToStringConverter converter_;
-};
-
-FloatToStringFormatter::FloatToStringFormatter() : impl_(new Impl()) {}
+using util::double_conversion::DoubleToStringConverter;
 
-FloatToStringFormatter::~FloatToStringFormatter() {}
+static DoubleToStringConverter g_double_converter(
+    DoubleToStringConverter::EMIT_POSITIVE_EXPONENT_SIGN, "inf", "nan", 'e', -6, 10, 6,
+    0);
 
-int FloatToStringFormatter::FormatFloat(float v, char* out_buffer, int out_size) {
-  DCHECK_GE(out_size, kMinBufferSize);
+int FormatFloat(float v, size_t buffer_size, char* buffer) {

Review comment:
       It's weird to take a `size_t` buffer size and return a `int` for the number of bytes written.

##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_internal.cc
##########
@@ -270,6 +272,30 @@ void AddCommonCasts(Type::type out_type_id, OutputType out_ty, CastFunction* fun
                             MemAllocation::NO_PREALLOCATE));
 }
 
+// TODO(bkietz): provide a generic from-string parsing cast

Review comment:
       TODOs should be kept in JIRA, not sprinkled around source code, IMHO.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org