You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by li...@apache.org on 2023/06/12 15:43:40 UTC
[arrow-adbc] branch main updated: feat(c/driver/postgresql): add timestamp types support (#758)
This is an automated email from the ASF dual-hosted git repository.
lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git
The following commit(s) were added to refs/heads/main by this push:
new 53752b3d feat(c/driver/postgresql): add timestamp types support (#758)
53752b3d is described below
commit 53752b3d9c5818bc191875d6462a911f93036445
Author: David Li <li...@gmail.com>
AuthorDate: Mon Jun 12 11:43:34 2023 -0400
feat(c/driver/postgresql): add timestamp types support (#758)
Fixes #727.
---
c/driver/postgresql/postgres_copy_reader.h | 18 +++++-
c/driver/postgresql/postgres_type.h | 14 +++++
c/driver/postgresql/postgresql_test.cc | 93 ++++++++++++++++++++++++++++++
3 files changed, 123 insertions(+), 2 deletions(-)
diff --git a/c/driver/postgresql/postgres_copy_reader.h b/c/driver/postgresql/postgres_copy_reader.h
index 6e8f8db9..b1efed4a 100644
--- a/c/driver/postgresql/postgres_copy_reader.h
+++ b/c/driver/postgresql/postgres_copy_reader.h
@@ -184,7 +184,7 @@ class PostgresCopyBooleanFieldReader : public PostgresCopyFieldReader {
// Reader for Pg->Arrow conversions whose representations are identical minus
// the bswap from network endian. This includes all integral and float types.
-template <typename T>
+template <typename T, T kOffset = 0>
class PostgresCopyNetworkEndianFieldReader : public PostgresCopyFieldReader {
public:
ArrowErrorCode Read(ArrowBufferView* data, int32_t field_size_bytes, ArrowArray* array,
@@ -200,7 +200,7 @@ class PostgresCopyNetworkEndianFieldReader : public PostgresCopyFieldReader {
return EINVAL;
}
- T value = ReadUnsafe<T>(data);
+ T value = kOffset + ReadUnsafe<T>(data);
NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_, &value, sizeof(T)));
return AppendValid(array);
}
@@ -635,6 +635,20 @@ static inline ArrowErrorCode MakeCopyFieldReader(const PostgresType& pg_type,
default:
return ErrorCantConvert(error, pg_type, schema_view);
}
+
+ case NANOARROW_TYPE_TIMESTAMP:
+ switch (pg_type.type_id()) {
+ case PostgresTypeId::kTimestamp:
+ case PostgresTypeId::kTimestamptz: {
+ // 2000-01-01 00:00:00.000000 in microseconds
+ constexpr int64_t kPostgresTimestampEpoch = 946684800000000;
+ *out = new PostgresCopyNetworkEndianFieldReader<int64_t,
+ kPostgresTimestampEpoch>();
+ return NANOARROW_OK;
+ }
+ default:
+ return ErrorCantConvert(error, pg_type, schema_view);
+ }
default:
return ErrorCantConvert(error, pg_type, schema_view);
}
diff --git a/c/driver/postgresql/postgres_type.h b/c/driver/postgresql/postgres_type.h
index a2152a58..084b3289 100644
--- a/c/driver/postgresql/postgres_type.h
+++ b/c/driver/postgresql/postgres_type.h
@@ -223,6 +223,20 @@ class PostgresType {
NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_BINARY));
break;
+ case PostgresTypeId::kTimestamp:
+ // We always return microsecond precision even if the type
+ // specifies differently
+ NANOARROW_RETURN_NOT_OK(
+ ArrowSchemaSetTypeDateTime(schema, NANOARROW_TYPE_TIMESTAMP,
+ NANOARROW_TIME_UNIT_MICRO, /*timezone=*/nullptr));
+ break;
+
+ case PostgresTypeId::kTimestamptz:
+ NANOARROW_RETURN_NOT_OK(
+ ArrowSchemaSetTypeDateTime(schema, NANOARROW_TYPE_TIMESTAMP,
+ NANOARROW_TIME_UNIT_MICRO, /*timezone=*/"UTC"));
+ break;
+
case PostgresTypeId::kRecord:
NANOARROW_RETURN_NOT_OK(ArrowSchemaSetTypeStruct(schema, n_children()));
for (int64_t i = 0; i < n_children(); i++) {
diff --git a/c/driver/postgresql/postgresql_test.cc b/c/driver/postgresql/postgresql_test.cc
index bc947812..1d323cc7 100644
--- a/c/driver/postgresql/postgresql_test.cc
+++ b/c/driver/postgresql/postgresql_test.cc
@@ -617,6 +617,14 @@ TEST_P(PostgresTypeTest, SelectValue) {
ASSERT_NO_FATAL_FAILURE(reader.GetSchema());
ASSERT_NO_FATAL_FAILURE(adbc_validation::CompareSchema(
&reader.schema.value, {{std::nullopt, GetParam().arrow_type, true}}));
+ if (GetParam().arrow_type == NANOARROW_TYPE_TIMESTAMP) {
+ if (GetParam().sql_type.find("WITH TIME ZONE") == std::string::npos) {
+ ASSERT_STREQ(reader.schema->children[0]->format, "tsu:");
+ } else {
+ ASSERT_STREQ(reader.schema->children[0]->format, "tsu:UTC");
+ }
+ }
+
ASSERT_NO_FATAL_FAILURE(reader.Next());
ASSERT_NE(nullptr, reader.array->release);
ASSERT_FALSE(ArrowArrayViewIsNull(&reader.array_view.value, 0));
@@ -677,6 +685,88 @@ static std::initializer_list<TypeTestCase> kIntTypeCases = {
{"BIGSERIAL", "BIGSERIAL", std::to_string(std::numeric_limits<int64_t>::max()),
NANOARROW_TYPE_INT64, std::numeric_limits<int64_t>::max()},
};
+static std::initializer_list<TypeTestCase> kTimestampTypeCases = {
+ {"TIMESTAMP_WITHOUT_TIME_ZONE", "TIMESTAMP WITHOUT TIME ZONE",
+ "'1970-01-01 00:00:00.000000'", NANOARROW_TYPE_TIMESTAMP, int64_t(0)},
+ {"TIMESTAMP_WITHOUT_TIME_ZONE_VAL", "TIMESTAMP WITHOUT TIME ZONE",
+ "'1970-01-02 03:04:05.123123'", NANOARROW_TYPE_TIMESTAMP, int64_t(97'445'123'123)},
+ {"TIMESTAMP_6_WITHOUT_TIME_ZONE", "TIMESTAMP (6) WITHOUT TIME ZONE",
+ "'1970-01-01 00:00:00.000000'", NANOARROW_TYPE_TIMESTAMP, int64_t(0)},
+ {"TIMESTAMP_6_WITHOUT_TIME_ZONE_VAL", "TIMESTAMP (6) WITHOUT TIME ZONE",
+ "'1970-01-02 03:04:05.123123'", NANOARROW_TYPE_TIMESTAMP, int64_t(97'445'123'123)},
+ {"TIMESTAMP_5_WITHOUT_TIME_ZONE", "TIMESTAMP (5) WITHOUT TIME ZONE",
+ "'1970-01-01 00:00:00.000000'", NANOARROW_TYPE_TIMESTAMP, int64_t(0)},
+ {"TIMESTAMP_5_WITHOUT_TIME_ZONE_VAL", "TIMESTAMP (5) WITHOUT TIME ZONE",
+ "'1970-01-02 03:04:05.123123'", NANOARROW_TYPE_TIMESTAMP, int64_t(97'445'123'120)},
+ {"TIMESTAMP_4_WITHOUT_TIME_ZONE", "TIMESTAMP (4) WITHOUT TIME ZONE",
+ "'1970-01-01 00:00:00.000000'", NANOARROW_TYPE_TIMESTAMP, int64_t(0)},
+ {"TIMESTAMP_4_WITHOUT_TIME_ZONE_VAL", "TIMESTAMP (4) WITHOUT TIME ZONE",
+ "'1970-01-02 03:04:05.123123'", NANOARROW_TYPE_TIMESTAMP, int64_t(97'445'123'100)},
+ {"TIMESTAMP_3_WITHOUT_TIME_ZONE", "TIMESTAMP (3) WITHOUT TIME ZONE",
+ "'1970-01-01 00:00:00.000000'", NANOARROW_TYPE_TIMESTAMP, int64_t(0)},
+ {"TIMESTAMP_3_WITHOUT_TIME_ZONE_VAL", "TIMESTAMP (3) WITHOUT TIME ZONE",
+ "'1970-01-02 03:04:05.123123'", NANOARROW_TYPE_TIMESTAMP, int64_t(97'445'123'000)},
+ {"TIMESTAMP_2_WITHOUT_TIME_ZONE", "TIMESTAMP (2) WITHOUT TIME ZONE",
+ "'1970-01-01 00:00:00.000000'", NANOARROW_TYPE_TIMESTAMP, int64_t(0)},
+ {"TIMESTAMP_2_WITHOUT_TIME_ZONE_VAL", "TIMESTAMP (2) WITHOUT TIME ZONE",
+ "'1970-01-02 03:04:05.123123'", NANOARROW_TYPE_TIMESTAMP, int64_t(97'445'120'000)},
+ {"TIMESTAMP_1_WITHOUT_TIME_ZONE", "TIMESTAMP (1) WITHOUT TIME ZONE",
+ "'1970-01-01 00:00:00.000000'", NANOARROW_TYPE_TIMESTAMP, int64_t(0)},
+ {"TIMESTAMP_1_WITHOUT_TIME_ZONE_VAL", "TIMESTAMP (1) WITHOUT TIME ZONE",
+ "'1970-01-02 03:04:05.123123'", NANOARROW_TYPE_TIMESTAMP, int64_t(97'445'100'000)},
+ {"TIMESTAMP_0_WITHOUT_TIME_ZONE", "TIMESTAMP (0) WITHOUT TIME ZONE",
+ "'1970-01-01 00:00:00.000000'", NANOARROW_TYPE_TIMESTAMP, int64_t(0)},
+ {"TIMESTAMP_0_WITHOUT_TIME_ZONE_VAL", "TIMESTAMP (0) WITHOUT TIME ZONE",
+ "'1970-01-02 03:04:05.123123'", NANOARROW_TYPE_TIMESTAMP, int64_t(97'445'000'000)},
+ {"TIMESTAMP_WITH_TIME_ZONE", "TIMESTAMP WITH TIME ZONE",
+ "'1970-01-01 00:00:00.000000+00:30'", NANOARROW_TYPE_TIMESTAMP,
+ int64_t(-1'800'000'000)},
+ {"TIMESTAMP_WITH_TIME_ZONE_VAL", "TIMESTAMP WITH TIME ZONE",
+ "'1970-01-02 03:04:05.123123+00:30'", NANOARROW_TYPE_TIMESTAMP,
+ int64_t(95'645'123'123)},
+ {"TIMESTAMP_6_WITH_TIME_ZONE", "TIMESTAMP (6) WITH TIME ZONE",
+ "'1970-01-01 00:00:00.000000+00:30'", NANOARROW_TYPE_TIMESTAMP,
+ int64_t(-1'800'000'000)},
+ {"TIMESTAMP_6_WITH_TIME_ZONE_VAL", "TIMESTAMP (6) WITH TIME ZONE",
+ "'1970-01-02 03:04:05.123123+00:30'", NANOARROW_TYPE_TIMESTAMP,
+ int64_t(95'645'123'123)},
+ {"TIMESTAMP_5_WITH_TIME_ZONE", "TIMESTAMP (5) WITH TIME ZONE",
+ "'1970-01-01 00:00:00.000000+00:30'", NANOARROW_TYPE_TIMESTAMP,
+ int64_t(-1'800'000'000)},
+ {"TIMESTAMP_5_WITH_TIME_ZONE_VAL", "TIMESTAMP (5) WITH TIME ZONE",
+ "'1970-01-02 03:04:05.123123+00:30'", NANOARROW_TYPE_TIMESTAMP,
+ int64_t(95'645'123'120)},
+ {"TIMESTAMP_4_WITH_TIME_ZONE", "TIMESTAMP (4) WITH TIME ZONE",
+ "'1970-01-01 00:00:00.000000+00:30'", NANOARROW_TYPE_TIMESTAMP,
+ int64_t(-1'800'000'000)},
+ {"TIMESTAMP_4_WITH_TIME_ZONE_VAL", "TIMESTAMP (4) WITH TIME ZONE",
+ "'1970-01-02 03:04:05.123123+00:30'", NANOARROW_TYPE_TIMESTAMP,
+ int64_t(95'645'123'100)},
+ {"TIMESTAMP_3_WITH_TIME_ZONE", "TIMESTAMP (3) WITH TIME ZONE",
+ "'1970-01-01 00:00:00.000000+00:30'", NANOARROW_TYPE_TIMESTAMP,
+ int64_t(-1'800'000'000)},
+ {"TIMESTAMP_3_WITH_TIME_ZONE_VAL", "TIMESTAMP (3) WITH TIME ZONE",
+ "'1970-01-02 03:04:05.123123+00:30'", NANOARROW_TYPE_TIMESTAMP,
+ int64_t(95'645'123'000)},
+ {"TIMESTAMP_2_WITH_TIME_ZONE", "TIMESTAMP (2) WITH TIME ZONE",
+ "'1970-01-01 00:00:00.000000+00:30'", NANOARROW_TYPE_TIMESTAMP,
+ int64_t(-1'800'000'000)},
+ {"TIMESTAMP_2_WITH_TIME_ZONE_VAL", "TIMESTAMP (2) WITH TIME ZONE",
+ "'1970-01-02 03:04:05.123123+00:30'", NANOARROW_TYPE_TIMESTAMP,
+ int64_t(95'645'120'000)},
+ {"TIMESTAMP_1_WITH_TIME_ZONE", "TIMESTAMP (1) WITH TIME ZONE",
+ "'1970-01-01 00:00:00.000000+00:30'", NANOARROW_TYPE_TIMESTAMP,
+ int64_t(-1'800'000'000)},
+ {"TIMESTAMP_1_WITH_TIME_ZONE_VAL", "TIMESTAMP (1) WITH TIME ZONE",
+ "'1970-01-02 03:04:05.123123+00:30'", NANOARROW_TYPE_TIMESTAMP,
+ int64_t(95'645'100'000)},
+ {"TIMESTAMP_0_WITH_TIME_ZONE", "TIMESTAMP (0) WITH TIME ZONE",
+ "'1970-01-01 00:00:00.000000+00:30'", NANOARROW_TYPE_TIMESTAMP,
+ int64_t(-1'800'000'000)},
+ {"TIMESTAMP_0_WITH_TIME_ZONE_VAL", "TIMESTAMP (0) WITH TIME ZONE",
+ "'1970-01-02 03:04:05.123123+00:30'", NANOARROW_TYPE_TIMESTAMP,
+ int64_t(95'645'000'000)},
+};
INSTANTIATE_TEST_SUITE_P(BoolType, PostgresTypeTest, testing::ValuesIn(kBoolTypeCases),
TypeTestCase::FormatName);
@@ -686,3 +776,6 @@ INSTANTIATE_TEST_SUITE_P(FloatTypes, PostgresTypeTest, testing::ValuesIn(kFloatT
TypeTestCase::FormatName);
INSTANTIATE_TEST_SUITE_P(IntTypes, PostgresTypeTest, testing::ValuesIn(kIntTypeCases),
TypeTestCase::FormatName);
+INSTANTIATE_TEST_SUITE_P(TimestampTypes, PostgresTypeTest,
+ testing::ValuesIn(kTimestampTypeCases),
+ TypeTestCase::FormatName);