You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by qu...@apache.org on 2023/05/24 03:55:56 UTC

[arrow-julia] branch jq-table-partitions created (now 66399b2)

This is an automated email from the ASF dual-hosted git repository.

quinnj pushed a change to branch jq-table-partitions
in repository https://gitbox.apache.org/repos/asf/arrow-julia.git


      at 66399b2  Add Tables.partitions definition for Arrow.Table

This branch includes the following new commits:

     new 66399b2  Add Tables.partitions definition for Arrow.Table

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[arrow-julia] 01/01: Add Tables.partitions definition for Arrow.Table

Posted by qu...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

quinnj pushed a commit to branch jq-table-partitions
in repository https://gitbox.apache.org/repos/asf/arrow-julia.git

commit 66399b2fd9118bac3f204e5dbb5310800a2d6e0f
Author: Jacob Quinn <qu...@gmail.com>
AuthorDate: Tue May 23 21:54:54 2023 -0600

    Add Tables.partitions definition for Arrow.Table
    
    We had this functionality w/ `Arrow.Stream`, but it's convenient and not
    that expensive to define it for `Arrow.Table` as well.
    
    Fixes #293.
---
 src/table.jl     | 36 ++++++++++++++++++++++++++++++++++++
 test/runtests.jl | 13 +++++++++++++
 2 files changed, 49 insertions(+)

diff --git a/src/table.jl b/src/table.jl
index 49b6153..9d7ddef 100644
--- a/src/table.jl
+++ b/src/table.jl
@@ -261,6 +261,7 @@ types(t::Table) = getfield(t, :types)
 columns(t::Table) = getfield(t, :columns)
 lookup(t::Table) = getfield(t, :lookup)
 schema(t::Table) = getfield(t, :schema)
+metadata(t::Table) = getfield(t, :metadata)
 
 """
     Arrow.getmetadata(x)
@@ -286,6 +287,41 @@ Tables.columnnames(t::Table) = names(t)
 Tables.getcolumn(t::Table, i::Int) = columns(t)[i]
 Tables.getcolumn(t::Table, nm::Symbol) = lookup(t)[nm]
 
+struct TablePartitions
+    table::Table
+    npartitions::Int
+end
+
+function TablePartitions(table::Table)
+    cols = columns(table)
+    npartitions = if length(cols) == 0
+        0
+    elseif cols[1] isa ChainedVector
+        length(cols[1].arrays)
+    else
+        1
+    end
+    return TablePartitions(table, npartitions)
+end
+
+function Base.iterate(tp::TablePartitions, i=1)
+    i > tp.npartitions && return nothing
+    tp.npartitions == 1 && return tp.table, i + 1
+    cols = columns(tp.table)
+    newcols = AbstractVector[cols[j].arrays[i] for j in 1:length(cols)]
+    nms = names(tp.table)
+    tbl = Table(
+        nms,
+        types(tp.table),
+        newcols,
+        Dict{Symbol, AbstractVector}(nms[i] => newcols[i] for i in 1:length(nms)),
+        schema(tp.table)
+    )
+    return tbl, i + 1
+end
+
+Tables.partitions(t::Table) = TablePartitions(t)
+
 # high-level user API functions
 Table(input, pos::Integer=1, len=nothing; kw...) = Table([ArrowBlob(tobytes(input), pos, len)]; kw...)
 Table(input::Vector{UInt8}, pos::Integer=1, len=nothing; kw...) = Table([ArrowBlob(tobytes(input), pos, len)]; kw...)
diff --git a/test/runtests.jl b/test/runtests.jl
index 47a137f..c477462 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -674,6 +674,19 @@ t = Arrow.Table(joinpath(dirname(pathof(Arrow)), "../test/java_compress_len_neg_
 
 end
 
+@testset "# 293" begin
+
+t = (a = [1, 2, 3], b = [1.0, 2.0, 3.0])
+buf = Arrow.tobuffer(t)
+tbl = Arrow.Table(buf)
+parts = Tables.partitioner((t, t))
+buf2 = Arrow.tobuffer(parts)
+tbl2 = Arrow.Table(buf2)
+for t in Tables.partitions(tbl2)
+    @test t.a == tbl.a
+    @test t.b == tbl.b
+end
+
 end # @testset "misc"
 
 end