You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by qu...@apache.org on 2023/06/12 23:22:08 UTC

[arrow-julia] 01/01: Fix defaults for structs with missing fields + Vararg fieldtypes

This is an automated email from the ASF dual-hosted git repository.

quinnj pushed a commit to branch jq-461
in repository https://gitbox.apache.org/repos/asf/arrow-julia.git

commit f51d45415058080ce275fd332841e21de492a625
Author: Jacob Quinn <qu...@gmail.com>
AuthorDate: Mon Jun 12 17:17:53 2023 -0600

    Fix defaults for structs with missing fields + Vararg fieldtypes
    
    Fixes #461. The issue here is that in the conversion from a column
    of a struct that had a field with type `Union{T, Missing}` where `T`
    was a type that had any `Tuple{Vararg}` fieldtypes, then the
    ArrowTypes.default call blew up (fieldcount not defined). To resolve,
    we _could_ try to define a `default` for `Tuple{Vararg}`, but that
    seemed pretty messy and relying on some pretty deep reflection internals
    that aren't well-exposed (`T.parameters[end] isa Core.TypeofVararg`!),
    so the alternative proposed here is that to get a struct fields default
    value, we actually call the `default` on the entire struct and get
    the default from that. I think this should be more robust because we
    have pretty sensible `default`s (pun intended) on most types, but
    when it comes to _field types_ specifically, there are a lot of weird
    edge cases that are just hard to handle, like vararg fields.
    In this way, we don't try to mess with field type directly and instead
    rely on being able to get the `default` of the parent struct on the way
    down.
---
 src/arraytypes/struct.jl | 8 ++++----
 test/runtests.jl         | 9 +++++++++
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/src/arraytypes/struct.jl b/src/arraytypes/struct.jl
index 2b38847..45e9207 100644
--- a/src/arraytypes/struct.jl
+++ b/src/arraytypes/struct.jl
@@ -74,10 +74,10 @@ ToStruct(x::A, j::Integer) where {A} = ToStruct{fieldtype(Base.nonmissingtype(el
 Base.IndexStyle(::Type{<:ToStruct}) = Base.IndexLinear()
 Base.size(x::ToStruct) = (length(x.data),)
 
-Base.@propagate_inbounds function Base.getindex(A::ToStruct{T, j}, i::Integer) where {T, j}
-    @boundscheck checkbounds(A, i)
-    @inbounds x = A.data[i]
-    return x === missing ? ArrowTypes.default(T) : getfield(x, j)
+Base.@propagate_inbounds function Base.getindex(a::ToStruct{T, j, A}, i::Integer) where {T, j, A}
+    @boundscheck checkbounds(a, i)
+    @inbounds x = a.data[i]
+    return x === missing ? getfield(ArrowTypes.default(eltype(A)), j) : getfield(x, j)
 end
 
 arrowvector(::StructKind, x::Struct, i, nl, fi, de, ded, meta; kw...) = x
diff --git a/test/runtests.jl b/test/runtests.jl
index edeb4d1..5525bf9 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -713,6 +713,15 @@ t = [(a=1,b=view(data,1:2)), (a=2,b=view(data,3:4)), missing]
 
 end
 
+@testset "# 461" begin
+
+table = (; v=[v"1", v"2", missing])
+buf = Arrow.tobuffer(table)
+table2 = Arrow.Table(buf)
+@test isequal(table.v, table2.v)
+
+end
+
 end # @testset "misc"
 
 end