You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by qu...@apache.org on 2022/11/29 05:51:36 UTC

[arrow-julia] branch main updated: Ensure elements are converted when indexed from ArrowTypes.ToArrow (#365)

This is an automated email from the ASF dual-hosted git repository.

quinnj pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-julia.git


The following commit(s) were added to refs/heads/main by this push:
     new f3e5e62  Ensure elements are converted when indexed from ArrowTypes.ToArrow (#365)
f3e5e62 is described below

commit f3e5e62d2a41a36bdaa9b015b6508c777bdce5dc
Author: Jacob Quinn <qu...@gmail.com>
AuthorDate: Mon Nov 28 22:51:29 2022 -0700

    Ensure elements are converted when indexed from ArrowTypes.ToArrow (#365)
    
    * Ensure elements are converted when indexed from ArrowTypes.ToArrow
    
    Fixes #364. This is an unfortunate bug that was sneaking in under
    the guise of "supposedly passing" tests. i.e. we had a test like:
    
    ```julia
    @test ArrowTypes.ToArrow(Any[1, 3.14]) == [1.0, 3.14]
    ```
    _BUT_, this was misleading, because, in reality, we had:
    
    ```julia
    x = ArrowTypes.ToArrow(Any[1, 3.14])
    @test x === 1
    ```
    
    i.e. when you indexed an element of `ToArrow`, no actual conversion
    was happening. So even though `ToArrow` was doing the work of figuring
    out the right concrete promoted type or union type, when you indexed
    later, it was just giving you the original element. Which actually
    works in a number of cases, for example, if the promoted type was a
    `Union` and all elements were one of the Union types, but in this case,
    where the promoted case was `Float64` and one element was an integer,
    we just got the plain integer out, when in reality, we want that integer
    converted to a float.
    
    * Oops, wrong toarrow/_convert order
    
    * Cleanup from old ArrowTypes deprecations
---
 Project.toml                     |  2 +-
 src/ArrowTypes/src/ArrowTypes.jl | 22 ++++++++++++++++++----
 src/ArrowTypes/test/tests.jl     |  4 +++-
 src/arraytypes/dictencoding.jl   |  2 +-
 test/runtests.jl                 |  5 +----
 5 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/Project.toml b/Project.toml
index 3f060c5..e10125c 100644
--- a/Project.toml
+++ b/Project.toml
@@ -37,7 +37,7 @@ UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
 WorkerUtilities = "76eceee3-57b5-4d4a-8e66-0e911cebbf60"
 
 [compat]
-ArrowTypes = "1.1"
+ArrowTypes = "1.1,2"
 BitIntegers = "0.2"
 CodecLz4 = "0.4"
 CodecZstd = "0.7"
diff --git a/src/ArrowTypes/src/ArrowTypes.jl b/src/ArrowTypes/src/ArrowTypes.jl
index 6117896..115c102 100644
--- a/src/ArrowTypes/src/ArrowTypes.jl
+++ b/src/ArrowTypes/src/ArrowTypes.jl
@@ -233,9 +233,6 @@ const UUIDSYMBOL = Symbol("JuliaLang.UUID")
 arrowname(::Type{UUID}) = UUIDSYMBOL
 JuliaType(::Val{UUIDSYMBOL}) = UUID
 fromarrow(::Type{UUID}, x::NTuple{16, UInt8}) = UUID(_cast(UInt128, x))
-# for backwards compatibility
-arrowconvert(::Type{UUID}, u::NamedTuple{(:value,),Tuple{UInt128}}) = UUID(u.value)
-arrowconvert(::Type{UUID}, u::UInt128) = UUID(u)
 
 function _cast(::Type{Y}, x)::Y where {Y}
     y = Ref{Y}()
@@ -354,6 +351,23 @@ end
 Base.IndexStyle(::Type{<:ToArrow}) = Base.IndexLinear()
 Base.size(x::ToArrow) = (length(x.data),)
 Base.eltype(x::ToArrow{T, A}) where {T, A} = T
-Base.getindex(x::ToArrow{T}, i::Int) where {T} = toarrow(getindex(x.data, i))
+function _convert(::Type{T}, x) where {T}
+    if x isa T
+        return x
+    elseif T isa Union
+        # T was a promoted Union and x is not already one of
+        # the concrete Union types, so we need to just try
+        # to convert, recursively, to one of the Union types
+        # unfortunately not much we can do more efficiently here
+        try
+            return _convert(T.a, x)
+        catch
+            return _convert(T.b, x)
+        end
+    else
+        return convert(T, x)
+    end
+end
+Base.getindex(x::ToArrow{T}, i::Int) where {T} = _convert(T, toarrow(getindex(x.data, i)))
 
 end # module ArrowTypes
diff --git a/src/ArrowTypes/test/tests.jl b/src/ArrowTypes/test/tests.jl
index eeeb2e2..7552d89 100644
--- a/src/ArrowTypes/test/tests.jl
+++ b/src/ArrowTypes/test/tests.jl
@@ -141,7 +141,9 @@ v_nt = (major=1, minor=0, patch=0, prerelease=(), build=())
 
 @test ArrowTypes.ToArrow([1,2,3]) == [1,2,3]
 @test ArrowTypes.ToArrow([:hey, :ho]) == ["hey", "ho"]
-@test ArrowTypes.ToArrow(Any[1, 3.14]) == [1.0, 3.14]
+x = ArrowTypes.ToArrow(Any[1, 3.14])
+@test x[1] === 1.0
+@test x[2] === 3.14
 @test ArrowTypes.ToArrow(Any[1, 3.14, "hey"]) == [1.0, 3.14, "hey"]
 
 end
diff --git a/src/arraytypes/dictencoding.jl b/src/arraytypes/dictencoding.jl
index e08f1a0..6477489 100644
--- a/src/arraytypes/dictencoding.jl
+++ b/src/arraytypes/dictencoding.jl
@@ -45,7 +45,7 @@ Base.size(d::DictEncoding) = size(d.data)
 
 @propagate_inbounds function Base.getindex(d::DictEncoding{T}, i::Integer) where {T}
     @boundscheck checkbounds(d, i)
-    return @inbounds ArrowTypes.arrowconvert(T, d.data[i])
+    return @inbounds ArrowTypes.fromarrow(T, d.data[i])
 end
 
 # convenience wrapper to signal that an input column should be
diff --git a/test/runtests.jl b/test/runtests.jl
index 87e561e..2fd9ab2 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -222,10 +222,7 @@ tt = Arrow.Table(Arrow.tobuffer(t))
 @test length(tt) == length(t)
 @test all(isequal.(values(t), values(tt)))
 
-# 89 etc. - test deprecation paths for old UUID autoconversion + UUID FixedSizeListKind overloads
-u = 0x6036fcbd20664bd8a65cdfa25434513f
-@test Arrow.ArrowTypes.arrowconvert(UUID, (value=u,)) === UUID(u)
-@test Arrow.ArrowTypes.arrowconvert(UUID, u) === UUID(u)
+# 89 etc. - UUID FixedSizeListKind overloads
 @test Arrow.ArrowTypes.gettype(Arrow.ArrowTypes.ArrowKind(UUID)) == UInt8
 @test Arrow.ArrowTypes.getsize(Arrow.ArrowTypes.ArrowKind(UUID)) == 16