You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by qu...@apache.org on 2022/12/07 23:38:46 UTC

[arrow-julia] branch main updated: Store ZDT with a UTC, not local, timestamp (#329)

This is an automated email from the ASF dual-hosted git repository.

quinnj pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-julia.git


The following commit(s) were added to refs/heads/main by this push:
     new 0c47938  Store ZDT with a UTC, not local, timestamp (#329)
0c47938 is described below

commit 0c4793871d911e185cb6a9603e577a1f52f52a22
Author: Tom Gillam <tp...@googlemail.com>
AuthorDate: Wed Dec 7 23:38:40 2022 +0000

    Store ZDT with a UTC, not local, timestamp (#329)
    
    * Add failing test
    
    * Store ZDT with a UTC, not local, timestamp
    
    * Change expectation for previous test
    
    * support old ZonedDateTimes
    
    Co-authored-by: Eric Hanson <58...@users.noreply.github.com>
---
 src/eltypes.jl     |  17 ++++++++++++++---
 test/old_zdt.arrow | Bin 0 -> 818 bytes
 test/runtests.jl   |  13 ++++++++++++-
 3 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/src/eltypes.jl b/src/eltypes.jl
index 66f7a68..4bec444 100644
--- a/src/eltypes.jl
+++ b/src/eltypes.jl
@@ -262,7 +262,7 @@ finaljuliatype(::Type{Timestamp{U, nothing}}) where {U} = DateTime
 
 function Base.convert(::Type{ZonedDateTime}, x::Timestamp{U, TZ}) where {U, TZ}
     (U === Meta.TimeUnits.MICROSECOND || U == Meta.TimeUnits.NANOSECOND) && warntimestamp(U, ZonedDateTime)
-    return ZonedDateTime(Dates.DateTime(Dates.UTM(Int64(Dates.toms(periodtype(U)(x.x)) + UNIX_EPOCH_DATETIME))), TimeZone(String(TZ)))
+    return ZonedDateTime(Dates.DateTime(Dates.UTM(Int64(Dates.toms(periodtype(U)(x.x)) + UNIX_EPOCH_DATETIME))), TimeZone(String(TZ)); from_utc=true)
 end
 
 function Base.convert(::Type{DateTime}, x::Timestamp{U, nothing}) where {U}
@@ -271,7 +271,7 @@ function Base.convert(::Type{DateTime}, x::Timestamp{U, nothing}) where {U}
 end
 
 Base.convert(::Type{Timestamp{Meta.TimeUnits.MILLISECOND, TZ}}, x::ZonedDateTime) where {TZ} =
-    Timestamp{Meta.TimeUnits.MILLISECOND, TZ}(Int64(Dates.value(DateTime(x)) - UNIX_EPOCH_DATETIME))
+    Timestamp{Meta.TimeUnits.MILLISECOND, TZ}(Int64(Dates.value(DateTime(x, UTC)) - UNIX_EPOCH_DATETIME))
 Base.convert(::Type{Timestamp{Meta.TimeUnits.MILLISECOND, nothing}}, x::DateTime) =
     Timestamp{Meta.TimeUnits.MILLISECOND, nothing}(Int64(Dates.value(x) - UNIX_EPOCH_DATETIME))
 
@@ -294,12 +294,23 @@ ArrowTypes.default(::Type{Dates.DateTime}) = Dates.DateTime(1,1,1,1,1,1)
 
 ArrowTypes.ArrowType(::Type{ZonedDateTime}) = Timestamp
 ArrowTypes.toarrow(x::ZonedDateTime) = convert(Timestamp{Meta.TimeUnits.MILLISECOND, Symbol(x.timezone)}, x)
-const ZONEDDATETIME_SYMBOL = Symbol("JuliaLang.ZonedDateTime")
+const ZONEDDATETIME_SYMBOL = Symbol("JuliaLang.ZonedDateTime-UTC")
 ArrowTypes.arrowname(::Type{ZonedDateTime}) = ZONEDDATETIME_SYMBOL
 ArrowTypes.JuliaType(::Val{ZONEDDATETIME_SYMBOL}, S) = ZonedDateTime
 ArrowTypes.fromarrow(::Type{ZonedDateTime}, x::Timestamp) = convert(ZonedDateTime, x)
 ArrowTypes.default(::Type{TimeZones.ZonedDateTime}) = TimeZones.ZonedDateTime(1,1,1,1,1,1,TimeZones.tz"UTC")
 
+# Backwards compatibility: older versions of Arrow saved ZonedDateTime's with this metdata:
+const OLD_ZONEDDATETIME_SYMBOL = Symbol("JuliaLang.ZonedDateTime")
+# and stored the local time instead of the UTC time.
+struct LocalZonedDateTime end
+ArrowTypes.JuliaType(::Val{OLD_ZONEDDATETIME_SYMBOL}, S) = LocalZonedDateTime
+function ArrowTypes.fromarrow(::Type{LocalZonedDateTime}, x::Timestamp{U, TZ}) where {U, TZ}
+    (U === Meta.TimeUnits.MICROSECOND || U == Meta.TimeUnits.NANOSECOND) && warntimestamp(U, ZonedDateTime)
+    return ZonedDateTime(Dates.DateTime(Dates.UTM(Int64(Dates.toms(periodtype(U)(x.x)) + UNIX_EPOCH_DATETIME))), TimeZone(String(TZ)))
+end
+
+
 """
     Arrow.ToTimestamp(x::AbstractVector{ZonedDateTime})
 
diff --git a/test/old_zdt.arrow b/test/old_zdt.arrow
new file mode 100644
index 0000000..b8e55ba
Binary files /dev/null and b/test/old_zdt.arrow differ
diff --git a/test/runtests.jl b/test/runtests.jl
index 7fcb3c2..343ee77 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -344,7 +344,7 @@ end
 x = [ZonedDateTime(Dates.DateTime(2020), tz"Europe/Paris")]
 c = Arrow.ToTimestamp(x)
 @test eltype(c) == Arrow.Timestamp{Arrow.Flatbuf.TimeUnits.MILLISECOND, Symbol("Europe/Paris")}
-@test c[1] == Arrow.Timestamp{Arrow.Flatbuf.TimeUnits.MILLISECOND, Symbol("Europe/Paris")}(1577836800000)
+@test c[1] == Arrow.Timestamp{Arrow.Flatbuf.TimeUnits.MILLISECOND, Symbol("Europe/Paris")}(1577833200000)
 
 # 158
 # arrow ipc stream generated from pyarrow with no record batches
@@ -537,6 +537,17 @@ end
 # https://github.com/apache/arrow-julia/issues/324
 @test_throws ArgumentError filter!(x -> x > 1, Arrow.toarrowvector([1, 2, 3]))
 
+# https://github.com/apache/arrow-julia/issues/327
+zdt = ZonedDateTime(DateTime(2020, 11, 1, 6), tz"America/New_York"; from_utc=true)
+arrow_zdt = ArrowTypes.toarrow(zdt)
+zdt_again = ArrowTypes.fromarrow(ZonedDateTime, arrow_zdt)
+@test zdt == zdt_again
+
+# Check that we still correctly read in old TimeZones
+original_table = (; col = [ ZonedDateTime(DateTime(1, 2, 3, 4, 5, 6), tz"UTC+3") for _ in 1:5])
+table = Arrow.Table(joinpath(@__DIR__, "old_zdt.arrow"))
+@test original_table.col == table.col
+
 end # @testset "misc"
 
 end