You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by qu...@apache.org on 2023/06/01 04:35:48 UTC
[arrow-julia] 01/01: Return SubArrays when possible for arrow list types
This is an automated email from the ASF dual-hosted git repository.
quinnj pushed a commit to branch jq-subarray-return
in repository https://gitbox.apache.org/repos/asf/arrow-julia.git
commit 61c4c03af1c01dc92ed0ce5c887a392e181b9e7b
Author: Jacob Quinn <qu...@gmail.com>
AuthorDate: Wed May 31 22:35:31 2023 -0600
Return SubArrays when possible for arrow list types
---
src/table.jl | 13 +++++++++++--
1 file changed, 11 insertions(+), 2 deletions(-)
diff --git a/src/table.jl b/src/table.jl
index da23038..80d8fc6 100644
--- a/src/table.jl
+++ b/src/table.jl
@@ -625,6 +625,8 @@ function reinterp(::Type{T}, batch, buf, compression) where {T}
end
end
+const SubVector{T, P} = SubArray{T, 1, P, Tuple{UnitRange{Int64}}, true}
+
function build(f::Meta.Field, L::ListTypes, batch, rb, de, nodeidx, bufferidx, convert)
@debugv 2 "building array: L = $L"
validity = buildbitmap(batch, rb, nodeidx, bufferidx)
@@ -637,16 +639,23 @@ function build(f::Meta.Field, L::ListTypes, batch, rb, de, nodeidx, bufferidx, c
bufferidx += 1
len = rb.nodes[nodeidx].length
nodeidx += 1
+ meta = buildmetadata(f.custom_metadata)
if L isa Meta.Utf8 || L isa Meta.LargeUtf8 || L isa Meta.Binary || L isa Meta.LargeBinary
buffer = rb.buffers[bufferidx]
bytes, A = reinterp(UInt8, batch, buffer, rb.compression)
bufferidx += 1
+ T = juliaeltype(f, meta, convert)
else
bytes = UInt8[]
A, nodeidx, bufferidx = build(f.children[1], batch, rb, de, nodeidx, bufferidx, convert)
+ T = juliaeltype(f, meta, convert)
+ # juliaeltype returns Vector for List, translate to SubArray
+ S = Base.nonmissingtype(T)
+ if S <: Vector
+ ST = SubVector{eltype(S), typeof(A)}
+ T = S == T ? ST : Union{Missing, ST}
+ end
end
- meta = buildmetadata(f.custom_metadata)
- T = juliaeltype(f, meta, convert)
return List{T, OT, typeof(A)}(bytes, validity, offsets, A, len, meta), nodeidx, bufferidx
end