You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by qu...@apache.org on 2023/06/01 04:35:47 UTC

[arrow-julia] branch jq-subarray-return created (now 61c4c03)

This is an automated email from the ASF dual-hosted git repository.

quinnj pushed a change to branch jq-subarray-return
in repository https://gitbox.apache.org/repos/asf/arrow-julia.git


      at 61c4c03  Return SubArrays when possible for arrow list types

This branch includes the following new commits:

     new 61c4c03  Return SubArrays when possible for arrow list types

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[arrow-julia] 01/01: Return SubArrays when possible for arrow list types

Posted by qu...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

quinnj pushed a commit to branch jq-subarray-return
in repository https://gitbox.apache.org/repos/asf/arrow-julia.git

commit 61c4c03af1c01dc92ed0ce5c887a392e181b9e7b
Author: Jacob Quinn <qu...@gmail.com>
AuthorDate: Wed May 31 22:35:31 2023 -0600

    Return SubArrays when possible for arrow list types
---
 src/table.jl | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/table.jl b/src/table.jl
index da23038..80d8fc6 100644
--- a/src/table.jl
+++ b/src/table.jl
@@ -625,6 +625,8 @@ function reinterp(::Type{T}, batch, buf, compression) where {T}
     end
 end
 
+const SubVector{T, P} = SubArray{T, 1, P, Tuple{UnitRange{Int64}}, true}
+
 function build(f::Meta.Field, L::ListTypes, batch, rb, de, nodeidx, bufferidx, convert)
     @debugv 2 "building array: L = $L"
     validity = buildbitmap(batch, rb, nodeidx, bufferidx)
@@ -637,16 +639,23 @@ function build(f::Meta.Field, L::ListTypes, batch, rb, de, nodeidx, bufferidx, c
     bufferidx += 1
     len = rb.nodes[nodeidx].length
     nodeidx += 1
+    meta = buildmetadata(f.custom_metadata)
     if L isa Meta.Utf8 || L isa Meta.LargeUtf8 || L isa Meta.Binary || L isa Meta.LargeBinary
         buffer = rb.buffers[bufferidx]
         bytes, A = reinterp(UInt8, batch, buffer, rb.compression)
         bufferidx += 1
+        T = juliaeltype(f, meta, convert)
     else
         bytes = UInt8[]
         A, nodeidx, bufferidx = build(f.children[1], batch, rb, de, nodeidx, bufferidx, convert)
+        T = juliaeltype(f, meta, convert)
+        # juliaeltype returns Vector for List, translate to SubArray
+        S = Base.nonmissingtype(T)
+        if S <: Vector
+            ST = SubVector{eltype(S), typeof(A)}
+            T = S == T ? ST : Union{Missing, ST}
+        end
     end
-    meta = buildmetadata(f.custom_metadata)
-    T = juliaeltype(f, meta, convert)
     return List{T, OT, typeof(A)}(bytes, validity, offsets, A, len, meta), nodeidx, bufferidx
 end