You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by qu...@apache.org on 2023/06/01 04:35:48 UTC

[arrow-julia] 01/01: Return SubArrays when possible for arrow list types

This is an automated email from the ASF dual-hosted git repository.

quinnj pushed a commit to branch jq-subarray-return
in repository https://gitbox.apache.org/repos/asf/arrow-julia.git

commit 61c4c03af1c01dc92ed0ce5c887a392e181b9e7b
Author: Jacob Quinn <qu...@gmail.com>
AuthorDate: Wed May 31 22:35:31 2023 -0600

    Return SubArrays when possible for arrow list types
---
 src/table.jl | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/table.jl b/src/table.jl
index da23038..80d8fc6 100644
--- a/src/table.jl
+++ b/src/table.jl
@@ -625,6 +625,8 @@ function reinterp(::Type{T}, batch, buf, compression) where {T}
     end
 end
 
+const SubVector{T, P} = SubArray{T, 1, P, Tuple{UnitRange{Int64}}, true}
+
 function build(f::Meta.Field, L::ListTypes, batch, rb, de, nodeidx, bufferidx, convert)
     @debugv 2 "building array: L = $L"
     validity = buildbitmap(batch, rb, nodeidx, bufferidx)
@@ -637,16 +639,23 @@ function build(f::Meta.Field, L::ListTypes, batch, rb, de, nodeidx, bufferidx, c
     bufferidx += 1
     len = rb.nodes[nodeidx].length
     nodeidx += 1
+    meta = buildmetadata(f.custom_metadata)
     if L isa Meta.Utf8 || L isa Meta.LargeUtf8 || L isa Meta.Binary || L isa Meta.LargeBinary
         buffer = rb.buffers[bufferidx]
         bytes, A = reinterp(UInt8, batch, buffer, rb.compression)
         bufferidx += 1
+        T = juliaeltype(f, meta, convert)
     else
         bytes = UInt8[]
         A, nodeidx, bufferidx = build(f.children[1], batch, rb, de, nodeidx, bufferidx, convert)
+        T = juliaeltype(f, meta, convert)
+        # juliaeltype returns Vector for List, translate to SubArray
+        S = Base.nonmissingtype(T)
+        if S <: Vector
+            ST = SubVector{eltype(S), typeof(A)}
+            T = S == T ? ST : Union{Missing, ST}
+        end
     end
-    meta = buildmetadata(f.custom_metadata)
-    T = juliaeltype(f, meta, convert)
     return List{T, OT, typeof(A)}(bytes, validity, offsets, A, len, meta), nodeidx, bufferidx
 end