You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by qu...@apache.org on 2022/12/07 23:38:46 UTC
[arrow-julia] branch main updated: Store ZDT with a UTC, not local, timestamp (#329)
This is an automated email from the ASF dual-hosted git repository.
quinnj pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-julia.git
The following commit(s) were added to refs/heads/main by this push:
new 0c47938 Store ZDT with a UTC, not local, timestamp (#329)
0c47938 is described below
commit 0c4793871d911e185cb6a9603e577a1f52f52a22
Author: Tom Gillam <tp...@googlemail.com>
AuthorDate: Wed Dec 7 23:38:40 2022 +0000
Store ZDT with a UTC, not local, timestamp (#329)
* Add failing test
* Store ZDT with a UTC, not local, timestamp
* Change expectation for previous test
* support old ZonedDateTimes
Co-authored-by: Eric Hanson <58...@users.noreply.github.com>
---
src/eltypes.jl | 17 ++++++++++++++---
test/old_zdt.arrow | Bin 0 -> 818 bytes
test/runtests.jl | 13 ++++++++++++-
3 files changed, 26 insertions(+), 4 deletions(-)
diff --git a/src/eltypes.jl b/src/eltypes.jl
index 66f7a68..4bec444 100644
--- a/src/eltypes.jl
+++ b/src/eltypes.jl
@@ -262,7 +262,7 @@ finaljuliatype(::Type{Timestamp{U, nothing}}) where {U} = DateTime
function Base.convert(::Type{ZonedDateTime}, x::Timestamp{U, TZ}) where {U, TZ}
(U === Meta.TimeUnits.MICROSECOND || U == Meta.TimeUnits.NANOSECOND) && warntimestamp(U, ZonedDateTime)
- return ZonedDateTime(Dates.DateTime(Dates.UTM(Int64(Dates.toms(periodtype(U)(x.x)) + UNIX_EPOCH_DATETIME))), TimeZone(String(TZ)))
+ return ZonedDateTime(Dates.DateTime(Dates.UTM(Int64(Dates.toms(periodtype(U)(x.x)) + UNIX_EPOCH_DATETIME))), TimeZone(String(TZ)); from_utc=true)
end
function Base.convert(::Type{DateTime}, x::Timestamp{U, nothing}) where {U}
@@ -271,7 +271,7 @@ function Base.convert(::Type{DateTime}, x::Timestamp{U, nothing}) where {U}
end
Base.convert(::Type{Timestamp{Meta.TimeUnits.MILLISECOND, TZ}}, x::ZonedDateTime) where {TZ} =
- Timestamp{Meta.TimeUnits.MILLISECOND, TZ}(Int64(Dates.value(DateTime(x)) - UNIX_EPOCH_DATETIME))
+ Timestamp{Meta.TimeUnits.MILLISECOND, TZ}(Int64(Dates.value(DateTime(x, UTC)) - UNIX_EPOCH_DATETIME))
Base.convert(::Type{Timestamp{Meta.TimeUnits.MILLISECOND, nothing}}, x::DateTime) =
Timestamp{Meta.TimeUnits.MILLISECOND, nothing}(Int64(Dates.value(x) - UNIX_EPOCH_DATETIME))
@@ -294,12 +294,23 @@ ArrowTypes.default(::Type{Dates.DateTime}) = Dates.DateTime(1,1,1,1,1,1)
ArrowTypes.ArrowType(::Type{ZonedDateTime}) = Timestamp
ArrowTypes.toarrow(x::ZonedDateTime) = convert(Timestamp{Meta.TimeUnits.MILLISECOND, Symbol(x.timezone)}, x)
-const ZONEDDATETIME_SYMBOL = Symbol("JuliaLang.ZonedDateTime")
+const ZONEDDATETIME_SYMBOL = Symbol("JuliaLang.ZonedDateTime-UTC")
ArrowTypes.arrowname(::Type{ZonedDateTime}) = ZONEDDATETIME_SYMBOL
ArrowTypes.JuliaType(::Val{ZONEDDATETIME_SYMBOL}, S) = ZonedDateTime
ArrowTypes.fromarrow(::Type{ZonedDateTime}, x::Timestamp) = convert(ZonedDateTime, x)
ArrowTypes.default(::Type{TimeZones.ZonedDateTime}) = TimeZones.ZonedDateTime(1,1,1,1,1,1,TimeZones.tz"UTC")
+# Backwards compatibility: older versions of Arrow saved ZonedDateTime's with this metdata:
+const OLD_ZONEDDATETIME_SYMBOL = Symbol("JuliaLang.ZonedDateTime")
+# and stored the local time instead of the UTC time.
+struct LocalZonedDateTime end
+ArrowTypes.JuliaType(::Val{OLD_ZONEDDATETIME_SYMBOL}, S) = LocalZonedDateTime
+function ArrowTypes.fromarrow(::Type{LocalZonedDateTime}, x::Timestamp{U, TZ}) where {U, TZ}
+ (U === Meta.TimeUnits.MICROSECOND || U == Meta.TimeUnits.NANOSECOND) && warntimestamp(U, ZonedDateTime)
+ return ZonedDateTime(Dates.DateTime(Dates.UTM(Int64(Dates.toms(periodtype(U)(x.x)) + UNIX_EPOCH_DATETIME))), TimeZone(String(TZ)))
+end
+
+
"""
Arrow.ToTimestamp(x::AbstractVector{ZonedDateTime})
diff --git a/test/old_zdt.arrow b/test/old_zdt.arrow
new file mode 100644
index 0000000..b8e55ba
Binary files /dev/null and b/test/old_zdt.arrow differ
diff --git a/test/runtests.jl b/test/runtests.jl
index 7fcb3c2..343ee77 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -344,7 +344,7 @@ end
x = [ZonedDateTime(Dates.DateTime(2020), tz"Europe/Paris")]
c = Arrow.ToTimestamp(x)
@test eltype(c) == Arrow.Timestamp{Arrow.Flatbuf.TimeUnits.MILLISECOND, Symbol("Europe/Paris")}
-@test c[1] == Arrow.Timestamp{Arrow.Flatbuf.TimeUnits.MILLISECOND, Symbol("Europe/Paris")}(1577836800000)
+@test c[1] == Arrow.Timestamp{Arrow.Flatbuf.TimeUnits.MILLISECOND, Symbol("Europe/Paris")}(1577833200000)
# 158
# arrow ipc stream generated from pyarrow with no record batches
@@ -537,6 +537,17 @@ end
# https://github.com/apache/arrow-julia/issues/324
@test_throws ArgumentError filter!(x -> x > 1, Arrow.toarrowvector([1, 2, 3]))
+# https://github.com/apache/arrow-julia/issues/327
+zdt = ZonedDateTime(DateTime(2020, 11, 1, 6), tz"America/New_York"; from_utc=true)
+arrow_zdt = ArrowTypes.toarrow(zdt)
+zdt_again = ArrowTypes.fromarrow(ZonedDateTime, arrow_zdt)
+@test zdt == zdt_again
+
+# Check that we still correctly read in old TimeZones
+original_table = (; col = [ ZonedDateTime(DateTime(1, 2, 3, 4, 5, 6), tz"UTC+3") for _ in 1:5])
+table = Arrow.Table(joinpath(@__DIR__, "old_zdt.arrow"))
+@test original_table.col == table.col
+
end # @testset "misc"
end