You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by li...@apache.org on 2023/06/13 14:51:16 UTC
[arrow-adbc] branch main updated: feat(c/driver/postgresql,java): ensure time/date type support (#774)
This is an automated email from the ASF dual-hosted git repository.
lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git
The following commit(s) were added to refs/heads/main by this push:
new 43d4c67f feat(c/driver/postgresql,java): ensure time/date type support (#774)
43d4c67f is described below
commit 43d4c67f563c140921cfd5a2809345ea57222921
Author: David Li <li...@gmail.com>
AuthorDate: Tue Jun 13 10:51:10 2023 -0400
feat(c/driver/postgresql,java): ensure time/date type support (#774)
Fixes #761.
---
c/driver/postgresql/postgres_copy_reader.h | 12 ++++++
c/driver/postgresql/postgres_type.h | 17 +++++++++
c/driver/postgresql/postgresql_test.cc | 43 ++++++++++++++++++++++
.../driver/jdbc/mssqlserver/MsSqlServerQuirks.java | 5 +++
.../jdbc/mssqlserver/MsSqlServerSqlTypeTest.java | 11 +++++-
.../driver/jdbc/postgresql/PostgreSqlTypeTest.java | 9 +++++
.../driver/jdbc/postgresql/PostgresqlQuirks.java | 5 +++
.../src/test/resources/PostgreSqlTypeTest.sql | 4 +-
.../arrow/adbc/driver/jdbc/StandardJdbcQuirks.java | 14 +++++--
.../adbc/driver/testsuite/AbstractSqlTypeTest.java | 29 +++++++++++++++
.../adbc/driver/testsuite/SqlValidationQuirks.java | 20 ++++++++++
11 files changed, 162 insertions(+), 7 deletions(-)
diff --git a/c/driver/postgresql/postgres_copy_reader.h b/c/driver/postgresql/postgres_copy_reader.h
index b1efed4a..78358a9c 100644
--- a/c/driver/postgresql/postgres_copy_reader.h
+++ b/c/driver/postgresql/postgres_copy_reader.h
@@ -636,6 +636,18 @@ static inline ArrowErrorCode MakeCopyFieldReader(const PostgresType& pg_type,
return ErrorCantConvert(error, pg_type, schema_view);
}
+ case NANOARROW_TYPE_DATE32: {
+ // 2000-01-01
+ constexpr int32_t kPostgresDateEpoch = 10957;
+ *out = new PostgresCopyNetworkEndianFieldReader<int32_t, kPostgresDateEpoch>();
+ return NANOARROW_OK;
+ }
+
+ case NANOARROW_TYPE_TIME64: {
+ *out = new PostgresCopyNetworkEndianFieldReader<int64_t>();
+ return NANOARROW_OK;
+ }
+
case NANOARROW_TYPE_TIMESTAMP:
switch (pg_type.type_id()) {
case PostgresTypeId::kTimestamp:
diff --git a/c/driver/postgresql/postgres_type.h b/c/driver/postgresql/postgres_type.h
index 084b3289..e234e36a 100644
--- a/c/driver/postgresql/postgres_type.h
+++ b/c/driver/postgresql/postgres_type.h
@@ -192,6 +192,7 @@ class PostgresType {
// binary COPY representation in the output.
ArrowErrorCode SetSchema(ArrowSchema* schema) const {
switch (type_id_) {
+ // ---- Primitive types --------------------
case PostgresTypeId::kBool:
NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_BOOL));
break;
@@ -212,6 +213,8 @@ class PostgresType {
case PostgresTypeId::kFloat8:
NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_DOUBLE));
break;
+
+ // ---- Binary/string --------------------
case PostgresTypeId::kChar:
case PostgresTypeId::kBpchar:
case PostgresTypeId::kVarchar:
@@ -223,6 +226,19 @@ class PostgresType {
NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_BINARY));
break;
+ // ---- Temporal --------------------
+ case PostgresTypeId::kDate:
+ NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_DATE32));
+ break;
+
+ case PostgresTypeId::kTime:
+ // We always return microsecond precision even if the type
+ // specifies differently
+ NANOARROW_RETURN_NOT_OK(ArrowSchemaSetTypeDateTime(schema, NANOARROW_TYPE_TIME64,
+ NANOARROW_TIME_UNIT_MICRO,
+ /*timezone=*/nullptr));
+ break;
+
case PostgresTypeId::kTimestamp:
// We always return microsecond precision even if the type
// specifies differently
@@ -237,6 +253,7 @@ class PostgresType {
NANOARROW_TIME_UNIT_MICRO, /*timezone=*/"UTC"));
break;
+ // ---- Nested --------------------
case PostgresTypeId::kRecord:
NANOARROW_RETURN_NOT_OK(ArrowSchemaSetTypeStruct(schema, n_children()));
for (int64_t i = 0; i < n_children(); i++) {
diff --git a/c/driver/postgresql/postgresql_test.cc b/c/driver/postgresql/postgresql_test.cc
index 18ecaedf..ee18a98a 100644
--- a/c/driver/postgresql/postgresql_test.cc
+++ b/c/driver/postgresql/postgresql_test.cc
@@ -875,6 +875,45 @@ static std::initializer_list<TypeTestCase> kIntTypeCases = {
{"BIGSERIAL", "BIGSERIAL", std::to_string(std::numeric_limits<int64_t>::max()),
NANOARROW_TYPE_INT64, std::numeric_limits<int64_t>::max()},
};
+static std::initializer_list<TypeTestCase> kDateTypeCases = {
+ {"DATE0", "DATE", "'1970-01-01'", NANOARROW_TYPE_DATE32, int64_t(0)},
+ {"DATE1", "DATE", "'2000-01-01'", NANOARROW_TYPE_DATE32, int64_t(10957)},
+ {"DATE2", "DATE", "'1950-01-01'", NANOARROW_TYPE_DATE32, int64_t(-7305)},
+};
+static std::initializer_list<TypeTestCase> kTimeTypeCases = {
+ {"TIME_WITHOUT_TIME_ZONE", "TIME WITHOUT TIME ZONE", "'00:00'", NANOARROW_TYPE_TIME64,
+ int64_t(0)},
+ {"TIME_WITHOUT_TIME_ZONE_VAL", "TIME WITHOUT TIME ZONE", "'01:02:03.123123'",
+ NANOARROW_TYPE_TIME64, int64_t(3'723'123'123)},
+ {"TIME_6_WITHOUT_TIME_ZONE", "TIME (6) WITHOUT TIME ZONE", "'00:00'",
+ NANOARROW_TYPE_TIME64, int64_t(0)},
+ {"TIME_6_WITHOUT_TIME_ZONE_VAL", "TIME (6) WITHOUT TIME ZONE", "'01:02:03.123123'",
+ NANOARROW_TYPE_TIME64, int64_t(3'723'123'123)},
+ {"TIME_5_WITHOUT_TIME_ZONE", "TIME (5) WITHOUT TIME ZONE", "'00:00'",
+ NANOARROW_TYPE_TIME64, int64_t(0)},
+ {"TIME_5_WITHOUT_TIME_ZONE_VAL", "TIME (5) WITHOUT TIME ZONE", "'01:02:03.123123'",
+ NANOARROW_TYPE_TIME64, int64_t(3'723'123'120)},
+ {"TIME_4_WITHOUT_TIME_ZONE", "TIME (4) WITHOUT TIME ZONE", "'00:00'",
+ NANOARROW_TYPE_TIME64, int64_t(0)},
+ {"TIME_4_WITHOUT_TIME_ZONE_VAL", "TIME (4) WITHOUT TIME ZONE", "'01:02:03.123123'",
+ NANOARROW_TYPE_TIME64, int64_t(3'723'123'100)},
+ {"TIME_3_WITHOUT_TIME_ZONE", "TIME (3) WITHOUT TIME ZONE", "'00:00'",
+ NANOARROW_TYPE_TIME64, int64_t(0)},
+ {"TIME_3_WITHOUT_TIME_ZONE_VAL", "TIME (3) WITHOUT TIME ZONE", "'01:02:03.123123'",
+ NANOARROW_TYPE_TIME64, int64_t(3'723'123'000)},
+ {"TIME_2_WITHOUT_TIME_ZONE", "TIME (2) WITHOUT TIME ZONE", "'00:00'",
+ NANOARROW_TYPE_TIME64, int64_t(0)},
+ {"TIME_2_WITHOUT_TIME_ZONE_VAL", "TIME (2) WITHOUT TIME ZONE", "'01:02:03.123123'",
+ NANOARROW_TYPE_TIME64, int64_t(3'723'120'000)},
+ {"TIME_1_WITHOUT_TIME_ZONE", "TIME (1) WITHOUT TIME ZONE", "'00:00'",
+ NANOARROW_TYPE_TIME64, int64_t(0)},
+ {"TIME_1_WITHOUT_TIME_ZONE_VAL", "TIME (1) WITHOUT TIME ZONE", "'01:02:03.123123'",
+ NANOARROW_TYPE_TIME64, int64_t(3'723'100'000)},
+ {"TIME_0_WITHOUT_TIME_ZONE", "TIME (0) WITHOUT TIME ZONE", "'00:00'",
+ NANOARROW_TYPE_TIME64, int64_t(0)},
+ {"TIME_0_WITHOUT_TIME_ZONE_VAL", "TIME (0) WITHOUT TIME ZONE", "'01:02:03.123123'",
+ NANOARROW_TYPE_TIME64, int64_t(3'723'000'000)},
+};
static std::initializer_list<TypeTestCase> kTimestampTypeCases = {
{"TIMESTAMP_WITHOUT_TIME_ZONE", "TIMESTAMP WITHOUT TIME ZONE",
"'1970-01-01 00:00:00.000000'", NANOARROW_TYPE_TIMESTAMP, int64_t(0)},
@@ -966,6 +1005,10 @@ INSTANTIATE_TEST_SUITE_P(FloatTypes, PostgresTypeTest, testing::ValuesIn(kFloatT
TypeTestCase::FormatName);
INSTANTIATE_TEST_SUITE_P(IntTypes, PostgresTypeTest, testing::ValuesIn(kIntTypeCases),
TypeTestCase::FormatName);
+INSTANTIATE_TEST_SUITE_P(DateTypes, PostgresTypeTest, testing::ValuesIn(kDateTypeCases),
+ TypeTestCase::FormatName);
+INSTANTIATE_TEST_SUITE_P(TimeTypes, PostgresTypeTest, testing::ValuesIn(kTimeTypeCases),
+ TypeTestCase::FormatName);
INSTANTIATE_TEST_SUITE_P(TimestampTypes, PostgresTypeTest,
testing::ValuesIn(kTimestampTypeCases),
TypeTestCase::FormatName);
diff --git a/java/driver/jdbc-validation-mssqlserver/src/test/java/org/apache/arrow/adbc/driver/jdbc/mssqlserver/MsSqlServerQuirks.java b/java/driver/jdbc-validation-mssqlserver/src/test/java/org/apache/arrow/adbc/driver/jdbc/mssqlserver/MsSqlServerQuirks.java
index 47251b41..b0297ab6 100644
--- a/java/driver/jdbc-validation-mssqlserver/src/test/java/org/apache/arrow/adbc/driver/jdbc/mssqlserver/MsSqlServerQuirks.java
+++ b/java/driver/jdbc-validation-mssqlserver/src/test/java/org/apache/arrow/adbc/driver/jdbc/mssqlserver/MsSqlServerQuirks.java
@@ -84,6 +84,11 @@ public class MsSqlServerQuirks extends SqlValidationQuirks {
return name.toLowerCase();
}
+ @Override
+ public TimeUnit defaultTimeUnit() {
+ return TimeUnit.NANOSECOND;
+ }
+
@Override
public TimeUnit defaultTimestampUnit() {
return TimeUnit.NANOSECOND;
diff --git a/java/driver/jdbc-validation-mssqlserver/src/test/java/org/apache/arrow/adbc/driver/jdbc/mssqlserver/MsSqlServerSqlTypeTest.java b/java/driver/jdbc-validation-mssqlserver/src/test/java/org/apache/arrow/adbc/driver/jdbc/mssqlserver/MsSqlServerSqlTypeTest.java
index 486aeef9..0724c5cd 100644
--- a/java/driver/jdbc-validation-mssqlserver/src/test/java/org/apache/arrow/adbc/driver/jdbc/mssqlserver/MsSqlServerSqlTypeTest.java
+++ b/java/driver/jdbc-validation-mssqlserver/src/test/java/org/apache/arrow/adbc/driver/jdbc/mssqlserver/MsSqlServerSqlTypeTest.java
@@ -29,13 +29,22 @@ class MsSqlServerSqlTypeTest extends AbstractSqlTypeTest {
quirks = new MsSqlServerQuirks();
}
+ @Test
+ @Override
+ protected void timeWithoutTimeZoneValue() {
+ // TODO(https://github.com/apache/arrow/issues/35916): needs upstream fix
+ // XXX: Java 8 compiler complains without lambda https://stackoverflow.com/questions/33621060
+ //noinspection Convert2MethodRef
+ assertThrows(AssertionError.class, () -> super.timeWithoutTimeZoneValue());
+ }
+
@Test
@Override
protected void timestampWithoutTimeZoneValue() {
// TODO(https://github.com/apache/arrow/issues/35916): needs upstream fix
// XXX: Java 8 compiler complains without lambda https://stackoverflow.com/questions/33621060
//noinspection Convert2MethodRef
- assertThrows(RuntimeException.class, () -> super.timestampWithTimeZoneValue());
+ assertThrows(AssertionError.class, () -> super.timestampWithoutTimeZoneValue());
}
@Test
diff --git a/java/driver/jdbc-validation-postgresql/src/test/java/org/apache/arrow/adbc/driver/jdbc/postgresql/PostgreSqlTypeTest.java b/java/driver/jdbc-validation-postgresql/src/test/java/org/apache/arrow/adbc/driver/jdbc/postgresql/PostgreSqlTypeTest.java
index 2a02ff5c..5b8357d7 100644
--- a/java/driver/jdbc-validation-postgresql/src/test/java/org/apache/arrow/adbc/driver/jdbc/postgresql/PostgreSqlTypeTest.java
+++ b/java/driver/jdbc-validation-postgresql/src/test/java/org/apache/arrow/adbc/driver/jdbc/postgresql/PostgreSqlTypeTest.java
@@ -33,6 +33,15 @@ class PostgreSqlTypeTest extends AbstractSqlTypeTest {
quirks = new PostgresqlQuirks();
}
+ @Test
+ @Override
+ protected void timeWithoutTimeZoneValue() throws Exception {
+ // TODO(https://github.com/apache/arrow/issues/35916): needs upstream fix
+ // XXX: Java 8 compiler complains without lambda https://stackoverflow.com/questions/33621060
+ //noinspection Convert2MethodRef
+ assertThrows(AssertionError.class, () -> super.timeWithoutTimeZoneValue());
+ }
+
@Test
@Override
protected void timestampWithoutTimeZoneValue() {
diff --git a/java/driver/jdbc-validation-postgresql/src/test/java/org/apache/arrow/adbc/driver/jdbc/postgresql/PostgresqlQuirks.java b/java/driver/jdbc-validation-postgresql/src/test/java/org/apache/arrow/adbc/driver/jdbc/postgresql/PostgresqlQuirks.java
index 5a4db321..ccce7db7 100644
--- a/java/driver/jdbc-validation-postgresql/src/test/java/org/apache/arrow/adbc/driver/jdbc/postgresql/PostgresqlQuirks.java
+++ b/java/driver/jdbc-validation-postgresql/src/test/java/org/apache/arrow/adbc/driver/jdbc/postgresql/PostgresqlQuirks.java
@@ -85,6 +85,11 @@ public class PostgresqlQuirks extends SqlValidationQuirks {
return name.toLowerCase();
}
+ @Override
+ public TimeUnit defaultTimeUnit() {
+ return TimeUnit.MICROSECOND;
+ }
+
@Override
public TimeUnit defaultTimestampUnit() {
return TimeUnit.MICROSECOND;
diff --git a/java/driver/jdbc-validation-postgresql/src/test/resources/PostgreSqlTypeTest.sql b/java/driver/jdbc-validation-postgresql/src/test/resources/PostgreSqlTypeTest.sql
index bb7d2ef3..c2fa8a12 100644
--- a/java/driver/jdbc-validation-postgresql/src/test/resources/PostgreSqlTypeTest.sql
+++ b/java/driver/jdbc-validation-postgresql/src/test/resources/PostgreSqlTypeTest.sql
@@ -44,8 +44,8 @@ INSERT INTO adbc_alltypes VALUES (
42,
42,
'foo',
- '04:05:06.789012',
- '04:05:06.789012-08:00',
+ '04:05:06.123456',
+ '04:05:06.123456-08:00',
TIMESTAMP '2000-01-02 03:04:05.123456',
TIMESTAMP (6) '2000-01-02 03:04:05.123456',
TIMESTAMP (5) '2000-01-02 03:04:05.12345',
diff --git a/java/driver/jdbc/src/main/java/org/apache/arrow/adbc/driver/jdbc/StandardJdbcQuirks.java b/java/driver/jdbc/src/main/java/org/apache/arrow/adbc/driver/jdbc/StandardJdbcQuirks.java
index aa38150f..e87283cb 100644
--- a/java/driver/jdbc/src/main/java/org/apache/arrow/adbc/driver/jdbc/StandardJdbcQuirks.java
+++ b/java/driver/jdbc/src/main/java/org/apache/arrow/adbc/driver/jdbc/StandardJdbcQuirks.java
@@ -21,6 +21,7 @@ import org.apache.arrow.adapter.jdbc.JdbcToArrowUtils;
import org.apache.arrow.adbc.driver.jdbc.adapter.JdbcFieldInfoExtra;
import org.apache.arrow.adbc.sql.SqlQuirks;
import org.apache.arrow.vector.types.TimeUnit;
+import org.apache.arrow.vector.types.Types.MinorType;
import org.apache.arrow.vector.types.pojo.ArrowType;
public final class StandardJdbcQuirks {
@@ -41,16 +42,19 @@ public final class StandardJdbcQuirks {
.build())
.typeConverter(StandardJdbcQuirks::postgresql)
.build();
+ private static final int MS_SQL_TYPE_DATETIMEOFFSET = -155;
private static ArrowType mssql(JdbcFieldInfoExtra field) {
switch (field.getJdbcType()) {
+ case Types.TIME:
+ return MinorType.TIMENANO.getType();
+ case Types.TIMESTAMP:
// DATETIME2
// Precision is "100 nanoseconds" -> TimeUnit is NANOSECOND
- case Types.TIMESTAMP:
- return new ArrowType.Timestamp(TimeUnit.NANOSECOND, /*timezone*/ null);
+ return MinorType.TIMESTAMPNANO.getType();
+ case MS_SQL_TYPE_DATETIMEOFFSET:
// DATETIMEOFFSET
// Precision is "100 nanoseconds" -> TimeUnit is NANOSECOND
- case -155:
return new ArrowType.Timestamp(TimeUnit.NANOSECOND, "UTC");
default:
return JdbcToArrowUtils.getArrowTypeFromJdbcType(field.getFieldInfo(), /*calendar*/ null);
@@ -59,11 +63,13 @@ public final class StandardJdbcQuirks {
private static ArrowType postgresql(JdbcFieldInfoExtra field) {
switch (field.getJdbcType()) {
+ case Types.TIME:
+ return MinorType.TIMEMICRO.getType();
case Types.TIMESTAMP:
if ("timestamptz".equals(field.getTypeName())) {
return new ArrowType.Timestamp(TimeUnit.MICROSECOND, "UTC");
} else if ("timestamp".equals(field.getTypeName())) {
- return new ArrowType.Timestamp(TimeUnit.MICROSECOND, /*timezone*/ null);
+ return MinorType.TIMESTAMPMICRO.getType();
}
// Unknown type
return null;
diff --git a/java/driver/validation/src/main/java/org/apache/arrow/adbc/driver/testsuite/AbstractSqlTypeTest.java b/java/driver/validation/src/main/java/org/apache/arrow/adbc/driver/testsuite/AbstractSqlTypeTest.java
index 770a1a88..55d8a835 100644
--- a/java/driver/validation/src/main/java/org/apache/arrow/adbc/driver/testsuite/AbstractSqlTypeTest.java
+++ b/java/driver/validation/src/main/java/org/apache/arrow/adbc/driver/testsuite/AbstractSqlTypeTest.java
@@ -35,6 +35,10 @@ import org.apache.arrow.vector.DateDayVector;
import org.apache.arrow.vector.FieldVector;
import org.apache.arrow.vector.IntVector;
import org.apache.arrow.vector.SmallIntVector;
+import org.apache.arrow.vector.TimeMicroVector;
+import org.apache.arrow.vector.TimeMilliVector;
+import org.apache.arrow.vector.TimeNanoVector;
+import org.apache.arrow.vector.TimeSecVector;
import org.apache.arrow.vector.TimeStampMicroTZVector;
import org.apache.arrow.vector.TimeStampMilliTZVector;
import org.apache.arrow.vector.TimeStampNanoTZVector;
@@ -189,6 +193,31 @@ public class AbstractSqlTypeTest {
assertValue("text_t", VarCharVector.class, new Text("foo"));
}
+ @Test
+ protected void timeWithoutTimeZoneType() throws Exception {
+ final Schema schema = connection.getTableSchema(null, null, "adbc_alltypes");
+ assertThat(schema.findField("time_without_time_zone_t").getType())
+ .isEqualTo(quirks.defaultTimeType());
+ }
+
+ @Test
+ protected void timeWithoutTimeZoneValue() throws Exception {
+ switch (quirks.defaultTimeUnit()) {
+ case SECOND:
+ assertValue("time_without_time_zone_t", TimeSecVector.class, 14706L);
+ break;
+ case MILLISECOND:
+ assertValue("time_without_time_zone_t", TimeMilliVector.class, 14706123L);
+ break;
+ case MICROSECOND:
+ assertValue("time_without_time_zone_t", TimeMicroVector.class, 14706123456L);
+ break;
+ case NANOSECOND:
+ assertValue("time_without_time_zone_t", TimeNanoVector.class, 14706123456000L);
+ break;
+ }
+ }
+
@Test
protected void timestampWithoutTimeZoneType() throws Exception {
final Schema schema = connection.getTableSchema(null, null, "adbc_alltypes");
diff --git a/java/driver/validation/src/main/java/org/apache/arrow/adbc/driver/testsuite/SqlValidationQuirks.java b/java/driver/validation/src/main/java/org/apache/arrow/adbc/driver/testsuite/SqlValidationQuirks.java
index a8d259cf..120ecab2 100644
--- a/java/driver/validation/src/main/java/org/apache/arrow/adbc/driver/testsuite/SqlValidationQuirks.java
+++ b/java/driver/validation/src/main/java/org/apache/arrow/adbc/driver/testsuite/SqlValidationQuirks.java
@@ -23,6 +23,8 @@ import org.apache.arrow.adbc.core.AdbcDatabase;
import org.apache.arrow.adbc.core.AdbcException;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.vector.types.TimeUnit;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.ArrowType;
/** Account for driver/vendor-specific quirks in implementing validation tests. */
public abstract class SqlValidationQuirks {
@@ -87,6 +89,24 @@ public abstract class SqlValidationQuirks {
+ ") ";
}
+ public TimeUnit defaultTimeUnit() {
+ return TimeUnit.MILLISECOND;
+ }
+
+ public ArrowType defaultTimeType() {
+ switch (defaultTimeUnit()) {
+ case SECOND:
+ return Types.MinorType.TIMESEC.getType();
+ case MILLISECOND:
+ return Types.MinorType.TIMEMILLI.getType();
+ case MICROSECOND:
+ return Types.MinorType.TIMEMICRO.getType();
+ case NANOSECOND:
+ return Types.MinorType.TIMENANO.getType();
+ }
+ throw new AssertionError("Unhandled case");
+ }
+
public TimeUnit defaultTimestampUnit() {
return TimeUnit.MILLISECOND;
}