You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2019/05/10 08:58:50 UTC

[arrow] branch master updated: ARROW-4505: [C++] adding pretty print for dates, times, and timestamps

This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new f88474c  ARROW-4505: [C++] adding pretty print for dates, times, and timestamps
f88474c is described below

commit f88474c84e7f02e226eb4cc32afef5e2bbc6e5b4
Author: Benjamin Kietzman <be...@gmail.com>
AuthorDate: Fri May 10 10:55:22 2019 +0200

    ARROW-4505: [C++] adding pretty print for dates, times, and timestamps
    
    | type | fmt |
    |---|---|
    | DATE32 (days since epoch) | YYYY-MM-DD |
    | DATE64 (days since epoch, measured in ms) | YYYY-MM-DD |
    | TIMESTAMP (`<TimeUnit>` since epoch) | YYYY-MM-DD HH:mm:ss.xxx<timezone> |
    | TIME32 (`<TimeUnit>` since midnight) | HH:mm:ss.xxx |
    | TIME64 (`<TimeUnit>` since midnight) | HH:mm:ss.xxx |
    
    Author: Benjamin Kietzman <be...@gmail.com>
    
    Closes #4268 from bkietz/4505-Nicer-PrettyPrint-for-date32 and squashes the following commits:
    
    95822dc80 <Benjamin Kietzman> remove timezone from timestamp pretty print
    9086c9d1b <Benjamin Kietzman> fix: sub-day increments in date64 are invalid
    e7267027f <Benjamin Kietzman> remove unused enable_if, time64 default precision -> nano
    ddc930a59 <Benjamin Kietzman> adding pretty print for dates, times, and timestamps
---
 cpp/src/arrow/pretty_print-test.cc | 84 +++++++++++++++++++++++++++++++++++++-
 cpp/src/arrow/pretty_print.cc      | 67 +++++++++++++++++++++++++++++-
 cpp/src/arrow/type.h               |  2 +-
 3 files changed, 148 insertions(+), 5 deletions(-)

diff --git a/cpp/src/arrow/pretty_print-test.cc b/cpp/src/arrow/pretty_print-test.cc
index d914a75..76b1a31 100644
--- a/cpp/src/arrow/pretty_print-test.cc
+++ b/cpp/src/arrow/pretty_print-test.cc
@@ -71,14 +71,23 @@ void Check(const T& obj, const PrettyPrintOptions& options, const char* expected
 }
 
 template <typename TYPE, typename C_TYPE>
-void CheckPrimitive(const PrettyPrintOptions& options, const std::vector<bool>& is_valid,
+void CheckPrimitive(const std::shared_ptr<DataType>& type,
+                    const PrettyPrintOptions& options, const std::vector<bool>& is_valid,
                     const std::vector<C_TYPE>& values, const char* expected,
                     bool check_operator = true) {
   std::shared_ptr<Array> array;
-  ArrayFromVector<TYPE, C_TYPE>(is_valid, values, &array);
+  ArrayFromVector<TYPE, C_TYPE>(type, is_valid, values, &array);
   CheckArray(*array, options, expected, check_operator);
 }
 
+template <typename TYPE, typename C_TYPE>
+void CheckPrimitive(const PrettyPrintOptions& options, const std::vector<bool>& is_valid,
+                    const std::vector<C_TYPE>& values, const char* expected,
+                    bool check_operator = true) {
+  CheckPrimitive<TYPE, C_TYPE>(TypeTraits<TYPE>::type_singleton(), options, is_valid,
+                               values, expected, check_operator);
+}
+
 TEST_F(TestPrettyPrint, PrimitiveType) {
   std::vector<bool> is_valid = {true, true, false, true, false};
 
@@ -156,6 +165,77 @@ TEST_F(TestPrettyPrint, PrimitiveType) {
   CheckPrimitive<StringType, std::string>({2, 10}, is_valid, values3, ex3_in2);
 }
 
+TEST_F(TestPrettyPrint, DateTimeTypes) {
+  std::vector<bool> is_valid = {true, true, false, true, false};
+
+  {
+    std::vector<int32_t> values = {0, 1, 2, 31, 4};
+    static const char* expected = R"expected([
+  1970-01-01,
+  1970-01-02,
+  null,
+  1970-02-01,
+  null
+])expected";
+    CheckPrimitive<Date32Type, int32_t>({0, 10}, is_valid, values, expected);
+  }
+
+  {
+    constexpr int64_t ms_per_day = 24 * 60 * 60 * 1000;
+    std::vector<int64_t> values = {0 * ms_per_day, 1 * ms_per_day, 2 * ms_per_day,
+                                   31 * ms_per_day, 4 * ms_per_day};
+    static const char* expected = R"expected([
+  1970-01-01,
+  1970-01-02,
+  null,
+  1970-02-01,
+  null
+])expected";
+    CheckPrimitive<Date64Type, int64_t>({0, 10}, is_valid, values, expected);
+  }
+
+  {
+    std::vector<int64_t> values = {
+        0, 1, 2, 678 + 1000000 * (5 + 60 * (4 + 60 * (3 + 24 * int64_t(1)))), 4};
+    static const char* expected = R"expected([
+  1970-01-01 00:00:00.000000,
+  1970-01-01 00:00:00.000001,
+  null,
+  1970-01-02 03:04:05.000678,
+  null
+])expected";
+    CheckPrimitive<TimestampType, int64_t>(timestamp(TimeUnit::MICRO, "Transylvania"),
+                                           {0, 10}, is_valid, values, expected);
+  }
+
+  {
+    std::vector<int32_t> values = {1, 62, 2, 3 + 60 * (2 + 60 * 1), 4};
+    static const char* expected = R"expected([
+  00:00:01,
+  00:01:02,
+  null,
+  01:02:03,
+  null
+])expected";
+    CheckPrimitive<Time32Type, int32_t>(time32(TimeUnit::SECOND), {0, 10}, is_valid,
+                                        values, expected);
+  }
+
+  {
+    std::vector<int64_t> values = {
+        0, 1, 2, 678 + int64_t(1000000000) * (5 + 60 * (4 + 60 * 3)), 4};
+    static const char* expected = R"expected([
+  00:00:00.000000000,
+  00:00:00.000000001,
+  null,
+  03:04:05.000000678,
+  null
+])expected";
+    CheckPrimitive<Time64Type, int64_t>(time64(TimeUnit::NANO), {0, 10}, is_valid, values,
+                                        expected);
+  }
+}
+
 TEST_F(TestPrettyPrint, StructTypeBasic) {
   auto simple_1 = field("one", int32());
   auto simple_2 = field("two", int32());
diff --git a/cpp/src/arrow/pretty_print.cc b/cpp/src/arrow/pretty_print.cc
index 4033977..9c2fcd3 100644
--- a/cpp/src/arrow/pretty_print.cc
+++ b/cpp/src/arrow/pretty_print.cc
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <chrono>
 #include <cstddef>
 #include <cstdint>
 #include <iostream>
@@ -33,6 +34,7 @@
 #include "arrow/type_traits.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/string.h"
+#include "arrow/vendored/datetime.h"
 #include "arrow/visitor_inline.h"
 
 namespace arrow {
@@ -143,14 +145,44 @@ class ArrayPrinter : public PrettyPrinter {
   }
 
   template <typename T>
-  inline typename std::enable_if<IsInteger<T>::value, Status>::type WriteDataValues(
-      const T& array) {
+  inline typename std::enable_if<IsInteger<T>::value &&
+                                     !is_date<typename T::TypeClass>::value &&
+                                     !is_time<typename T::TypeClass>::value,
+                                 Status>::type
+  WriteDataValues(const T& array) {
     const auto data = array.raw_values();
     WriteValues(array, [&](int64_t i) { (*sink_) << static_cast<int64_t>(data[i]); });
     return Status::OK();
   }
 
   template <typename T>
+  enable_if_date<typename T::TypeClass, Status> WriteDataValues(const T& array) {
+    const auto data = array.raw_values();
+    using unit =
+        typename std::conditional<std::is_same<T, Date32Array>::value, util::date::days,
+                                  std::chrono::milliseconds>::type;
+    WriteValues(array, [&](int64_t i) { FormatDateTime<unit>("%F", data[i], true); });
+    return Status::OK();
+  }
+
+  Status WriteDataValues(const TimestampArray& array) {
+    const int64_t* data = array.raw_values();
+    const auto type = static_cast<const TimestampType*>(array.type().get());
+    WriteValues(array,
+                [&](int64_t i) { FormatDateTime(type->unit(), "%F %T", data[i], true); });
+    return Status::OK();
+  }
+
+  template <typename T>
+  enable_if_time<typename T::TypeClass, Status> WriteDataValues(const T& array) {
+    const auto data = array.raw_values();
+    const auto type = static_cast<const TimeType*>(array.type().get());
+    WriteValues(array,
+                [&](int64_t i) { FormatDateTime(type->unit(), "%T", data[i], false); });
+    return Status::OK();
+  }
+
+  template <typename T>
   inline typename std::enable_if<IsFloatingPoint<T>::value, Status>::type WriteDataValues(
       const T& array) {
     const auto data = array.raw_values();
@@ -320,9 +352,40 @@ class ArrayPrinter : public PrettyPrinter {
   }
 
  private:
+  template <typename Unit>
+  void FormatDateTime(const char* fmt, int64_t value, bool add_epoch) {
+    if (add_epoch) {
+      (*sink_) << util::date::format(fmt, epoch_ + Unit{value});
+    } else {
+      (*sink_) << util::date::format(fmt, Unit{value});
+    }
+  }
+
+  void FormatDateTime(TimeUnit::type unit, const char* fmt, int64_t value,
+                      bool add_epoch) {
+    switch (unit) {
+      case TimeUnit::NANO:
+        FormatDateTime<std::chrono::nanoseconds>(fmt, value, add_epoch);
+        break;
+      case TimeUnit::MICRO:
+        FormatDateTime<std::chrono::microseconds>(fmt, value, add_epoch);
+        break;
+      case TimeUnit::MILLI:
+        FormatDateTime<std::chrono::milliseconds>(fmt, value, add_epoch);
+        break;
+      case TimeUnit::SECOND:
+        FormatDateTime<std::chrono::seconds>(fmt, value, add_epoch);
+        break;
+    }
+  }
+
+  static util::date::sys_days epoch_;
   std::string null_rep_;
 };
 
+util::date::sys_days ArrayPrinter::epoch_ =
+    util::date::sys_days{util::date::jan / 1 / 1970};
+
 Status ArrayPrinter::WriteValidityBitmap(const Array& array) {
   Indent();
   Write("-- is_valid:");
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index 4c35378..9460f43 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -691,7 +691,7 @@ class ARROW_EXPORT Time64Type : public TimeType {
 
   int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); }
 
-  explicit Time64Type(TimeUnit::type unit = TimeUnit::MILLI);
+  explicit Time64Type(TimeUnit::type unit = TimeUnit::NANO);
 
   std::string ToString() const override;