You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2019/05/10 08:58:50 UTC
[arrow] branch master updated: ARROW-4505: [C++] adding pretty
print for dates, times, and timestamps
This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new f88474c ARROW-4505: [C++] adding pretty print for dates, times, and timestamps
f88474c is described below
commit f88474c84e7f02e226eb4cc32afef5e2bbc6e5b4
Author: Benjamin Kietzman <be...@gmail.com>
AuthorDate: Fri May 10 10:55:22 2019 +0200
ARROW-4505: [C++] adding pretty print for dates, times, and timestamps
| type | fmt |
|---|---|
| DATE32 (days since epoch) | YYYY-MM-DD |
| DATE64 (days since epoch, measured in ms) | YYYY-MM-DD |
| TIMESTAMP (`<TimeUnit>` since epoch) | YYYY-MM-DD HH:mm:ss.xxx<timezone> |
| TIME32 (`<TimeUnit>` since midnight) | HH:mm:ss.xxx |
| TIME64 (`<TimeUnit>` since midnight) | HH:mm:ss.xxx |
Author: Benjamin Kietzman <be...@gmail.com>
Closes #4268 from bkietz/4505-Nicer-PrettyPrint-for-date32 and squashes the following commits:
95822dc80 <Benjamin Kietzman> remove timezone from timestamp pretty print
9086c9d1b <Benjamin Kietzman> fix: sub-day increments in date64 are invalid
e7267027f <Benjamin Kietzman> remove unused enable_if, time64 default precision -> nano
ddc930a59 <Benjamin Kietzman> adding pretty print for dates, times, and timestamps
---
cpp/src/arrow/pretty_print-test.cc | 84 +++++++++++++++++++++++++++++++++++++-
cpp/src/arrow/pretty_print.cc | 67 +++++++++++++++++++++++++++++-
cpp/src/arrow/type.h | 2 +-
3 files changed, 148 insertions(+), 5 deletions(-)
diff --git a/cpp/src/arrow/pretty_print-test.cc b/cpp/src/arrow/pretty_print-test.cc
index d914a75..76b1a31 100644
--- a/cpp/src/arrow/pretty_print-test.cc
+++ b/cpp/src/arrow/pretty_print-test.cc
@@ -71,14 +71,23 @@ void Check(const T& obj, const PrettyPrintOptions& options, const char* expected
}
template <typename TYPE, typename C_TYPE>
-void CheckPrimitive(const PrettyPrintOptions& options, const std::vector<bool>& is_valid,
+void CheckPrimitive(const std::shared_ptr<DataType>& type,
+ const PrettyPrintOptions& options, const std::vector<bool>& is_valid,
const std::vector<C_TYPE>& values, const char* expected,
bool check_operator = true) {
std::shared_ptr<Array> array;
- ArrayFromVector<TYPE, C_TYPE>(is_valid, values, &array);
+ ArrayFromVector<TYPE, C_TYPE>(type, is_valid, values, &array);
CheckArray(*array, options, expected, check_operator);
}
+template <typename TYPE, typename C_TYPE>
+void CheckPrimitive(const PrettyPrintOptions& options, const std::vector<bool>& is_valid,
+ const std::vector<C_TYPE>& values, const char* expected,
+ bool check_operator = true) {
+ CheckPrimitive<TYPE, C_TYPE>(TypeTraits<TYPE>::type_singleton(), options, is_valid,
+ values, expected, check_operator);
+}
+
TEST_F(TestPrettyPrint, PrimitiveType) {
std::vector<bool> is_valid = {true, true, false, true, false};
@@ -156,6 +165,77 @@ TEST_F(TestPrettyPrint, PrimitiveType) {
CheckPrimitive<StringType, std::string>({2, 10}, is_valid, values3, ex3_in2);
}
+TEST_F(TestPrettyPrint, DateTimeTypes) {
+ std::vector<bool> is_valid = {true, true, false, true, false};
+
+ {
+ std::vector<int32_t> values = {0, 1, 2, 31, 4};
+ static const char* expected = R"expected([
+ 1970-01-01,
+ 1970-01-02,
+ null,
+ 1970-02-01,
+ null
+])expected";
+ CheckPrimitive<Date32Type, int32_t>({0, 10}, is_valid, values, expected);
+ }
+
+ {
+ constexpr int64_t ms_per_day = 24 * 60 * 60 * 1000;
+ std::vector<int64_t> values = {0 * ms_per_day, 1 * ms_per_day, 2 * ms_per_day,
+ 31 * ms_per_day, 4 * ms_per_day};
+ static const char* expected = R"expected([
+ 1970-01-01,
+ 1970-01-02,
+ null,
+ 1970-02-01,
+ null
+])expected";
+ CheckPrimitive<Date64Type, int64_t>({0, 10}, is_valid, values, expected);
+ }
+
+ {
+ std::vector<int64_t> values = {
+ 0, 1, 2, 678 + 1000000 * (5 + 60 * (4 + 60 * (3 + 24 * int64_t(1)))), 4};
+ static const char* expected = R"expected([
+ 1970-01-01 00:00:00.000000,
+ 1970-01-01 00:00:00.000001,
+ null,
+ 1970-01-02 03:04:05.000678,
+ null
+])expected";
+ CheckPrimitive<TimestampType, int64_t>(timestamp(TimeUnit::MICRO, "Transylvania"),
+ {0, 10}, is_valid, values, expected);
+ }
+
+ {
+ std::vector<int32_t> values = {1, 62, 2, 3 + 60 * (2 + 60 * 1), 4};
+ static const char* expected = R"expected([
+ 00:00:01,
+ 00:01:02,
+ null,
+ 01:02:03,
+ null
+])expected";
+ CheckPrimitive<Time32Type, int32_t>(time32(TimeUnit::SECOND), {0, 10}, is_valid,
+ values, expected);
+ }
+
+ {
+ std::vector<int64_t> values = {
+ 0, 1, 2, 678 + int64_t(1000000000) * (5 + 60 * (4 + 60 * 3)), 4};
+ static const char* expected = R"expected([
+ 00:00:00.000000000,
+ 00:00:00.000000001,
+ null,
+ 03:04:05.000000678,
+ null
+])expected";
+ CheckPrimitive<Time64Type, int64_t>(time64(TimeUnit::NANO), {0, 10}, is_valid, values,
+ expected);
+ }
+}
+
TEST_F(TestPrettyPrint, StructTypeBasic) {
auto simple_1 = field("one", int32());
auto simple_2 = field("two", int32());
diff --git a/cpp/src/arrow/pretty_print.cc b/cpp/src/arrow/pretty_print.cc
index 4033977..9c2fcd3 100644
--- a/cpp/src/arrow/pretty_print.cc
+++ b/cpp/src/arrow/pretty_print.cc
@@ -15,6 +15,7 @@
// specific language governing permissions and limitations
// under the License.
+#include <chrono>
#include <cstddef>
#include <cstdint>
#include <iostream>
@@ -33,6 +34,7 @@
#include "arrow/type_traits.h"
#include "arrow/util/checked_cast.h"
#include "arrow/util/string.h"
+#include "arrow/vendored/datetime.h"
#include "arrow/visitor_inline.h"
namespace arrow {
@@ -143,14 +145,44 @@ class ArrayPrinter : public PrettyPrinter {
}
template <typename T>
- inline typename std::enable_if<IsInteger<T>::value, Status>::type WriteDataValues(
- const T& array) {
+ inline typename std::enable_if<IsInteger<T>::value &&
+ !is_date<typename T::TypeClass>::value &&
+ !is_time<typename T::TypeClass>::value,
+ Status>::type
+ WriteDataValues(const T& array) {
const auto data = array.raw_values();
WriteValues(array, [&](int64_t i) { (*sink_) << static_cast<int64_t>(data[i]); });
return Status::OK();
}
template <typename T>
+ enable_if_date<typename T::TypeClass, Status> WriteDataValues(const T& array) {
+ const auto data = array.raw_values();
+ using unit =
+ typename std::conditional<std::is_same<T, Date32Array>::value, util::date::days,
+ std::chrono::milliseconds>::type;
+ WriteValues(array, [&](int64_t i) { FormatDateTime<unit>("%F", data[i], true); });
+ return Status::OK();
+ }
+
+ Status WriteDataValues(const TimestampArray& array) {
+ const int64_t* data = array.raw_values();
+ const auto type = static_cast<const TimestampType*>(array.type().get());
+ WriteValues(array,
+ [&](int64_t i) { FormatDateTime(type->unit(), "%F %T", data[i], true); });
+ return Status::OK();
+ }
+
+ template <typename T>
+ enable_if_time<typename T::TypeClass, Status> WriteDataValues(const T& array) {
+ const auto data = array.raw_values();
+ const auto type = static_cast<const TimeType*>(array.type().get());
+ WriteValues(array,
+ [&](int64_t i) { FormatDateTime(type->unit(), "%T", data[i], false); });
+ return Status::OK();
+ }
+
+ template <typename T>
inline typename std::enable_if<IsFloatingPoint<T>::value, Status>::type WriteDataValues(
const T& array) {
const auto data = array.raw_values();
@@ -320,9 +352,40 @@ class ArrayPrinter : public PrettyPrinter {
}
private:
+ template <typename Unit>
+ void FormatDateTime(const char* fmt, int64_t value, bool add_epoch) {
+ if (add_epoch) {
+ (*sink_) << util::date::format(fmt, epoch_ + Unit{value});
+ } else {
+ (*sink_) << util::date::format(fmt, Unit{value});
+ }
+ }
+
+ void FormatDateTime(TimeUnit::type unit, const char* fmt, int64_t value,
+ bool add_epoch) {
+ switch (unit) {
+ case TimeUnit::NANO:
+ FormatDateTime<std::chrono::nanoseconds>(fmt, value, add_epoch);
+ break;
+ case TimeUnit::MICRO:
+ FormatDateTime<std::chrono::microseconds>(fmt, value, add_epoch);
+ break;
+ case TimeUnit::MILLI:
+ FormatDateTime<std::chrono::milliseconds>(fmt, value, add_epoch);
+ break;
+ case TimeUnit::SECOND:
+ FormatDateTime<std::chrono::seconds>(fmt, value, add_epoch);
+ break;
+ }
+ }
+
+ static util::date::sys_days epoch_;
std::string null_rep_;
};
+util::date::sys_days ArrayPrinter::epoch_ =
+ util::date::sys_days{util::date::jan / 1 / 1970};
+
Status ArrayPrinter::WriteValidityBitmap(const Array& array) {
Indent();
Write("-- is_valid:");
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index 4c35378..9460f43 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -691,7 +691,7 @@ class ARROW_EXPORT Time64Type : public TimeType {
int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); }
- explicit Time64Type(TimeUnit::type unit = TimeUnit::MILLI);
+ explicit Time64Type(TimeUnit::type unit = TimeUnit::NANO);
std::string ToString() const override;