You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ba...@apache.org on 2023/06/14 00:16:21 UTC
[arrow-julia] branch main updated: Formatting (#464)
This is an automated email from the ASF dual-hosted git repository.
baumgold pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-julia.git
The following commit(s) were added to refs/heads/main by this push:
new fc7cc2d Formatting (#464)
fc7cc2d is described below
commit fc7cc2d7512c9699cb73109c5b646b6d377fce36
Author: Ben Baumgold <49...@users.noreply.github.com>
AuthorDate: Tue Jun 13 20:16:16 2023 -0400
Formatting (#464)
I used the default JuliaFormatter options, but we can customize them if
we want to with a `.JuliaFormatter.toml` file. Details available here:
https://domluna.github.io/JuliaFormatter.jl
Fixes #398. CC: @svilupp
---
src/metadata/Flatbuf.jl => .JuliaFormatter.toml | 14 +-
.github/workflows/ci.yml | 30 +-
docs/make.jl | 15 +-
src/Arrow.jl | 18 +-
src/ArrowTypes/src/ArrowTypes.jl | 109 +-
src/ArrowTypes/test/tests.jl | 391 +++---
src/FlatBuffers/FlatBuffers.jl | 9 +-
src/FlatBuffers/builder.jl | 42 +-
src/FlatBuffers/table.jl | 34 +-
src/append.jl | 146 ++-
src/arraytypes/arraytypes.jl | 97 +-
src/arraytypes/bool.jl | 18 +-
src/arraytypes/compressed.jl | 18 +-
src/arraytypes/dictencoding.jl | 191 ++-
src/arraytypes/fixedsizelist.jl | 71 +-
src/arraytypes/list.jl | 57 +-
src/arraytypes/map.jl | 51 +-
src/arraytypes/primitive.jl | 22 +-
src/arraytypes/struct.jl | 55 +-
src/arraytypes/unions.jl | 137 ++-
src/eltypes.jl | 283 +++--
src/metadata/File.jl | 28 +-
src/metadata/Flatbuf.jl | 2 +-
src/metadata/Message.jl | 51 +-
src/metadata/Schema.jl | 117 +-
src/show.jl | 17 +-
src/table.jl | 242 +++-
src/utils.jl | 18 +-
src/write.jl | 307 ++++-
test/arrowjson.jl | 266 ++--
test/dates.jl | 18 +-
test/pyarrow_roundtrip.jl | 56 +-
test/runtests.jl | 1504 ++++++++++++++---------
test/testappend.jl | 43 +-
test/testtables.jl | 491 +++++---
35 files changed, 3251 insertions(+), 1717 deletions(-)
diff --git a/src/metadata/Flatbuf.jl b/.JuliaFormatter.toml
similarity index 83%
copy from src/metadata/Flatbuf.jl
copy to .JuliaFormatter.toml
index 127e91b..4624423 100644
--- a/src/metadata/Flatbuf.jl
+++ b/.JuliaFormatter.toml
@@ -1,3 +1,4 @@
+
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
@@ -14,13 +15,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-module Flatbuf
-
-using EnumX
-using ..FlatBuffers
-
-include("Schema.jl")
-include("File.jl")
-include("Message.jl")
+# https://github.com/domluna/JuliaFormatter.jl/blob/master/README.md
-end # module
\ No newline at end of file
+whitespace_ops_in_indices = true
+remove_extra_newlines = true
+whitespace_in_kwargs = false
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index f3b2a9b..03b31f2 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -26,7 +26,7 @@ jobs:
name: Audit licenses
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v3
- uses: actions/setup-python@v2
with:
python-version: '3.x'
@@ -42,7 +42,7 @@ jobs:
- macos-latest
- ubuntu-latest
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v3
- name: Create
run: |
git config user.name "github-actions[bot]"
@@ -81,7 +81,7 @@ jobs:
- x64
nthreads: [1, 2]
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v3
- uses: julia-actions/setup-julia@v1
with:
version: ${{ matrix.version }}
@@ -149,9 +149,31 @@ jobs:
name: Documentation
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v3
- uses: julia-actions/julia-buildpkg@latest
- uses: julia-actions/julia-docdeploy@latest
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }}
+ Format:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: julia-actions/setup-julia@v1
+ - uses: actions/checkout@v3
+ - name: Install JuliaFormatter and format
+ # This will use the latest version by default but you can set the version like so:
+ # julia -e 'using Pkg; Pkg.add(PackageSpec(name="JuliaFormatter", version="0.13.0"))'
+ run: |
+ julia -e 'using Pkg; Pkg.add(PackageSpec(name="JuliaFormatter"))'
+ julia -e 'using JuliaFormatter; format(".", verbose=true)'
+ - name: Format check
+ run: |
+ julia -e '
+ out = Cmd(`git diff --name-only`) |> read |> String
+ if out == ""
+ exit(0)
+ else
+ @error "Some files have not been formatted !!!"
+ write(stdout, out)
+ exit(1)
+ end'
diff --git a/docs/make.jl b/docs/make.jl
index fb74460..4d3511f 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -18,7 +18,6 @@
using Documenter
using Arrow
-
makedocs(;
modules=[Arrow],
repo="https://github.com/apache/arrow-julia/blob/{commit}{path}#L{line}",
@@ -28,15 +27,11 @@ makedocs(;
canonical="https://arrow.juliadata.org/",
assets=String[],
),
- pages = [
- "Home" => "index.md",
+ pages=[
+ "Home" => "index.md",
"User Manual" => "manual.md",
- "API Reference" => "reference.md"
- ]
+ "API Reference" => "reference.md",
+ ],
)
-deploydocs(;
- repo="github.com/apache/arrow-julia",
- devbranch = "main",
- branch = "asf-site"
-)
+deploydocs(; repo="github.com/apache/arrow-julia", devbranch="main", branch="asf-site")
diff --git a/src/Arrow.jl b/src/Arrow.jl
index 9d5a7fb..efef826 100644
--- a/src/Arrow.jl
+++ b/src/Arrow.jl
@@ -45,7 +45,15 @@ using Base.Iterators
using Mmap
using LoggingExtras
import Dates
-using DataAPI, Tables, SentinelArrays, PooledArrays, CodecLz4, CodecZstd, TimeZones, BitIntegers, ConcurrentUtilities
+using DataAPI,
+ Tables,
+ SentinelArrays,
+ PooledArrays,
+ CodecLz4,
+ CodecZstd,
+ TimeZones,
+ BitIntegers,
+ ConcurrentUtilities
export ArrowTypes
@@ -60,7 +68,8 @@ include("FlatBuffers/FlatBuffers.jl")
using .FlatBuffers
include("metadata/Flatbuf.jl")
-using .Flatbuf; const Meta = Flatbuf
+using .Flatbuf
+const Meta = Flatbuf
using ArrowTypes
include("utils.jl")
@@ -111,12 +120,13 @@ function access_threaded(f, v::Vector)
end
return x
end
-@noinline _length_assert() = @assert false "0 < tid <= v"
+@noinline _length_assert() = @assert false "0 < tid <= v"
zstd_compressor() = access_threaded(init_zstd_compressor, ZSTD_COMPRESSOR)
zstd_decompressor() = access_threaded(init_zstd_decompressor, ZSTD_DECOMPRESSOR)
lz4_frame_compressor() = access_threaded(init_lz4_frame_compressor, LZ4_FRAME_COMPRESSOR)
-lz4_frame_decompressor() = access_threaded(init_lz4_frame_decompressor, LZ4_FRAME_DECOMPRESSOR)
+lz4_frame_decompressor() =
+ access_threaded(init_lz4_frame_decompressor, LZ4_FRAME_DECOMPRESSOR)
function __init__()
nt = @static if isdefined(Base.Threads, :maxthreadid)
diff --git a/src/ArrowTypes/src/ArrowTypes.jl b/src/ArrowTypes/src/ArrowTypes.jl
index 60663bd..bd67c5f 100644
--- a/src/ArrowTypes/src/ArrowTypes.jl
+++ b/src/ArrowTypes/src/ArrowTypes.jl
@@ -23,7 +23,20 @@ module ArrowTypes
using Sockets
using UUIDs
-export ArrowKind, NullKind, PrimitiveKind, BoolKind, ListKind, FixedSizeListKind, MapKind, StructKind, UnionKind, DictEncodedKind, toarrow, arrowname, fromarrow, ToArrow
+export ArrowKind,
+ NullKind,
+ PrimitiveKind,
+ BoolKind,
+ ListKind,
+ FixedSizeListKind,
+ MapKind,
+ StructKind,
+ UnionKind,
+ DictEncodedKind,
+ toarrow,
+ arrowname,
+ fromarrow,
+ ToArrow
"""
ArrowTypes.ArrowKind(T)
@@ -69,7 +82,7 @@ and [`ArrowTypes.fromarrow`](@ref).
function ArrowType end
ArrowType(::Type{T}) where {T} = T
ArrowType(::Type{Any}) = Any
-ArrowType(::Type{Union{Missing, T}}) where {T} = Union{Missing, ArrowType(T)}
+ArrowType(::Type{Union{Missing,T}}) where {T} = Union{Missing,ArrowType(T)}
ArrowType(::Type{Missing}) = Missing
"""
@@ -166,10 +179,10 @@ A few `ArrowKind`s have/allow slightly more custom overloads for their `fromarro
function fromarrow end
fromarrow(::Type{T}, x::T) where {T} = x
fromarrow(::Type{T}, x...) where {T} = T(x...)
-fromarrow(::Type{Union{Missing, T}}, ::Missing) where {T} = missing
-fromarrow(::Type{Union{Missing, T}}, x::T) where {T} = x
-fromarrow(::Type{Union{Missing, T}}, x::T) where {T<:NamedTuple} = x # ambiguity fix
-fromarrow(::Type{Union{Missing, T}}, x) where {T} = fromarrow(T, x)
+fromarrow(::Type{Union{Missing,T}}, ::Missing) where {T} = missing
+fromarrow(::Type{Union{Missing,T}}, x::T) where {T} = x
+fromarrow(::Type{Union{Missing,T}}, x::T) where {T<:NamedTuple} = x # ambiguity fix
+fromarrow(::Type{Union{Missing,T}}, x) where {T} = fromarrow(T, x)
"NullKind data is actually not physically stored since the data is constant; just the length is needed"
struct NullKind <: ArrowKind end
@@ -211,9 +224,11 @@ ArrowKind(::Type{<:AbstractString}) = ListKind{true}()
# Treate Base.CodeUnits as Binary arrow type
ArrowKind(::Type{<:Base.CodeUnits}) = ListKind{true}()
-fromarrow(::Type{T}, ptr::Ptr{UInt8}, len::Int) where {T} = fromarrow(T, unsafe_string(ptr, len))
-fromarrow(::Type{T}, x) where {T <: Base.CodeUnits} = Base.CodeUnits(x)
-fromarrow(::Type{Union{Missing, Base.CodeUnits}}, x) = x === missing ? missing : Base.CodeUnits(x)
+fromarrow(::Type{T}, ptr::Ptr{UInt8}, len::Int) where {T} =
+ fromarrow(T, unsafe_string(ptr, len))
+fromarrow(::Type{T}, x) where {T<:Base.CodeUnits} = Base.CodeUnits(x)
+fromarrow(::Type{Union{Missing,Base.CodeUnits}}, x) =
+ x === missing ? missing : Base.CodeUnits(x)
ArrowType(::Type{Symbol}) = String
toarrow(x::Symbol) = String(x)
@@ -224,30 +239,31 @@ _symbol(ptr, len) = ccall(:jl_symbol_n, Ref{Symbol}, (Ptr{UInt8}, Int), ptr, len
fromarrow(::Type{Symbol}, ptr::Ptr{UInt8}, len::Int) = _symbol(ptr, len)
ArrowKind(::Type{<:AbstractArray}) = ListKind()
-fromarrow(::Type{A}, x::A) where {A <: AbstractVector{T}} where {T} = x
-fromarrow(::Type{A}, x::AbstractVector{T}) where {A <: AbstractVector{T}} where {T} = convert(A, x)
+fromarrow(::Type{A}, x::A) where {A<:AbstractVector{T}} where {T} = x
+fromarrow(::Type{A}, x::AbstractVector{T}) where {A<:AbstractVector{T}} where {T} =
+ convert(A, x)
ArrowKind(::Type{<:AbstractSet}) = ListKind()
-ArrowType(::Type{T}) where {T <: AbstractSet{S}} where {S} = Vector{S}
+ArrowType(::Type{T}) where {T<:AbstractSet{S}} where {S} = Vector{S}
toarrow(x::AbstractSet) = collect(x)
const SET = Symbol("JuliaLang.Set")
arrowname(::Type{<:AbstractSet}) = SET
-JuliaType(::Val{SET}, ::Type{T}) where {T <: AbstractVector{S}} where {S} = Set{S}
-fromarrow(::Type{T}, x) where {T <: AbstractSet} = T(x)
+JuliaType(::Val{SET}, ::Type{T}) where {T<:AbstractVector{S}} where {S} = Set{S}
+fromarrow(::Type{T}, x) where {T<:AbstractSet} = T(x)
"FixedSizeListKind data are stored in a single contiguous buffer; individual elements can be computed based on the fixed size of the lists"
-struct FixedSizeListKind{N, T} <: ArrowKind end
-gettype(::FixedSizeListKind{N, T}) where {N, T} = T
-getsize(::FixedSizeListKind{N, T}) where {N, T} = N
+struct FixedSizeListKind{N,T} <: ArrowKind end
+gettype(::FixedSizeListKind{N,T}) where {N,T} = T
+getsize(::FixedSizeListKind{N,T}) where {N,T} = N
-ArrowKind(::Type{NTuple{N, T}}) where {N, T} = FixedSizeListKind{N, T}()
+ArrowKind(::Type{NTuple{N,T}}) where {N,T} = FixedSizeListKind{N,T}()
-ArrowKind(::Type{UUID}) = FixedSizeListKind{16, UInt8}()
-ArrowType(::Type{UUID}) = NTuple{16, UInt8}
-toarrow(x::UUID) = _cast(NTuple{16, UInt8}, x.value)
+ArrowKind(::Type{UUID}) = FixedSizeListKind{16,UInt8}()
+ArrowType(::Type{UUID}) = NTuple{16,UInt8}
+toarrow(x::UUID) = _cast(NTuple{16,UInt8}, x.value)
const UUIDSYMBOL = Symbol("JuliaLang.UUID")
arrowname(::Type{UUID}) = UUIDSYMBOL
JuliaType(::Val{UUIDSYMBOL}) = UUID
-fromarrow(::Type{UUID}, x::NTuple{16, UInt8}) = UUID(_cast(UInt128, x))
+fromarrow(::Type{UUID}, x::NTuple{16,UInt8}) = UUID(_cast(UInt128, x))
ArrowKind(::Type{IPv4}) = PrimitiveKind()
ArrowType(::Type{IPv4}) = UInt32
@@ -257,13 +273,13 @@ arrowname(::Type{IPv4}) = IPV4_SYMBOL
JuliaType(::Val{IPV4_SYMBOL}) = IPv4
fromarrow(::Type{IPv4}, x::Integer) = IPv4(x)
-ArrowKind(::Type{IPv6}) = FixedSizeListKind{16, UInt8}()
-ArrowType(::Type{IPv6}) = NTuple{16, UInt8}
-toarrow(x::IPv6) = _cast(NTuple{16, UInt8}, x.host)
+ArrowKind(::Type{IPv6}) = FixedSizeListKind{16,UInt8}()
+ArrowType(::Type{IPv6}) = NTuple{16,UInt8}
+toarrow(x::IPv6) = _cast(NTuple{16,UInt8}, x.host)
const IPV6_SYMBOL = Symbol("JuliaLang.IPv6")
arrowname(::Type{IPv6}) = IPV6_SYMBOL
JuliaType(::Val{IPV6_SYMBOL}) = IPv6
-fromarrow(::Type{IPv6}, x::NTuple{16, UInt8}) = IPv6(_cast(UInt128, x))
+fromarrow(::Type{IPv6}, x::NTuple{16,UInt8}) = IPv6(_cast(UInt128, x))
function _cast(::Type{Y}, x)::Y where {Y}
y = Ref{Y}()
@@ -286,18 +302,21 @@ struct StructKind <: ArrowKind end
ArrowKind(::Type{<:NamedTuple}) = StructKind()
-fromarrow(::Type{NamedTuple{names, types}}, x::NamedTuple{names, types}) where {names, types <: Tuple} = x
+fromarrow(
+ ::Type{NamedTuple{names,types}},
+ x::NamedTuple{names,types},
+) where {names,types<:Tuple} = x
fromarrow(::Type{T}, x::NamedTuple) where {T} = fromarrow(T, Tuple(x)...)
ArrowKind(::Type{<:Tuple}) = StructKind()
ArrowKind(::Type{Tuple{}}) = StructKind()
const TUPLE = Symbol("JuliaLang.Tuple")
# needed to disambiguate the FixedSizeList case for NTuple
-arrowname(::Type{NTuple{N, T}}) where {N, T} = EMPTY_SYMBOL
-arrowname(::Type{T}) where {T <: Tuple} = TUPLE
+arrowname(::Type{NTuple{N,T}}) where {N,T} = EMPTY_SYMBOL
+arrowname(::Type{T}) where {T<:Tuple} = TUPLE
arrowname(::Type{Tuple{}}) = TUPLE
-JuliaType(::Val{TUPLE}, ::Type{NamedTuple{names, types}}) where {names, types <: Tuple} = types
-fromarrow(::Type{T}, x::NamedTuple) where {T <: Tuple} = Tuple(x)
+JuliaType(::Val{TUPLE}, ::Type{NamedTuple{names,types}}) where {names,types<:Tuple} = types
+fromarrow(::Type{T}, x::NamedTuple) where {T<:Tuple} = Tuple(x)
# VersionNumber
const VERSION_NUMBER = Symbol("JuliaLang.VersionNumber")
@@ -338,11 +357,11 @@ default(::Type{<:AbstractString}) = ""
default(::Type{Any}) = nothing
default(::Type{Missing}) = missing
default(::Type{Nothing}) = nothing
-default(::Type{Union{T, Missing}}) where {T} = default(T)
-default(::Type{Union{T, Nothing}}) where {T} = default(T)
-default(::Type{Union{T, Missing, Nothing}}) where {T} = default(T)
+default(::Type{Union{T,Missing}}) where {T} = default(T)
+default(::Type{Union{T,Nothing}}) where {T} = default(T)
+default(::Type{Union{T,Missing,Nothing}}) where {T} = default(T)
-function default(::Type{A}) where {A <: AbstractVector{T}} where {T}
+function default(::Type{A}) where {A<:AbstractVector{T}} where {T}
a = similar(A, 1)
a[1] = default(T)
return a
@@ -350,28 +369,30 @@ end
default(::Type{SubArray{T,N,P,I,L}}) where {T,N,P,I,L} = view(default(P), 0:-1)
-default(::Type{NTuple{N, T}}) where {N, T} = ntuple(i -> default(T), N)
+default(::Type{NTuple{N,T}}) where {N,T} = ntuple(i -> default(T), N)
default(::Type{Tuple{}}) = ()
-function default(::Type{T}) where {T <: Tuple}
+function default(::Type{T}) where {T<:Tuple}
T === Tuple{} && return ()
N = Base.isvarargtype(T.parameters[end]) ? length(T.parameters) - 1 : fieldcount(T)
return Tuple(default(fieldtype(T, i)) for i = 1:N)
end
-default(::Type{T}) where {T <: AbstractDict} = T()
-default(::Type{NamedTuple{names, types}}) where {names, types} = NamedTuple{names}(Tuple(default(fieldtype(types, i)) for i = 1:length(names)))
+default(::Type{T}) where {T<:AbstractDict} = T()
+default(::Type{NamedTuple{names,types}}) where {names,types} =
+ NamedTuple{names}(Tuple(default(fieldtype(types, i)) for i = 1:length(names)))
function promoteunion(T, S)
new = promote_type(T, S)
- return isabstracttype(new) ? Union{T, S} : new
+ return isabstracttype(new) ? Union{T,S} : new
end
# lazily call toarrow(x) on getindex for each x in data
-struct ToArrow{T, A} <: AbstractVector{T}
+struct ToArrow{T,A} <: AbstractVector{T}
data::A
end
-concrete_or_concreteunion(T) = isconcretetype(T) ||
+concrete_or_concreteunion(T) =
+ isconcretetype(T) ||
(T isa Union && concrete_or_concreteunion(T.a) && concrete_or_concreteunion(T.b))
function ToArrow(x::A) where {A}
@@ -392,12 +413,12 @@ function ToArrow(x::A) where {A}
T = promoteunion(T, typeof(toarrow(default(S))))
end
end
- return ToArrow{T, A}(x)
+ return ToArrow{T,A}(x)
end
Base.IndexStyle(::Type{<:ToArrow}) = Base.IndexLinear()
Base.size(x::ToArrow) = (length(x.data),)
-Base.eltype(::Type{TA}) where {T, A, TA<:ToArrow{T, A}} = T
+Base.eltype(::Type{TA}) where {T,A,TA<:ToArrow{T,A}} = T
function _convert(::Type{T}, x) where {T}
if x isa T
return x
diff --git a/src/ArrowTypes/test/tests.jl b/src/ArrowTypes/test/tests.jl
index 143041d..79dd604 100644
--- a/src/ArrowTypes/test/tests.jl
+++ b/src/ArrowTypes/test/tests.jl
@@ -23,203 +23,204 @@ struct Person
end
@testset "ArrowTypes" begin
-
-@test ArrowTypes.ArrowKind(MyInt) == ArrowTypes.PrimitiveKind()
-@test ArrowTypes.ArrowKind(Person) == ArrowTypes.StructKind()
-@test ArrowTypes.ArrowKind(Person(0, "bob")) == ArrowTypes.StructKind()
-
-@test ArrowTypes.ArrowType(Int) == Int
-@test ArrowTypes.ArrowType(Union{Int, Missing}) == Union{Int, Missing}
-@test ArrowTypes.ArrowType(Missing) == Missing
-
-@test ArrowTypes.toarrow(1) === 1
-
-@test ArrowTypes.arrowname(Int) == Symbol()
-@test !ArrowTypes.hasarrowname(Int)
-
-@test ArrowTypes.arrowmetadata(Int) == ""
-@test ArrowTypes.arrowmetadata(Union{Nothing,Int}) == ""
-@test ArrowTypes.arrowmetadata(Union{Missing,Int}) == ""
-
-@test ArrowTypes.JuliaType(1) === nothing
-@test ArrowTypes.JuliaType(1, Int) === nothing
-@test ArrowTypes.JuliaType(1, Int, nothing) === nothing
-
-@test ArrowTypes.fromarrow(Int, 1) === 1
-@test ArrowTypes.fromarrow(Person, 1, "bob") == Person(1, "bob")
-@test ArrowTypes.fromarrow(Union{Int, Missing}, missing) === missing
-@test ArrowTypes.fromarrow(Union{Int, Missing}, 1) === 1
-@test ArrowTypes.fromarrow(Union{Float64, Missing}, 1) === 1.0
-
-@test ArrowTypes.ArrowKind(Missing) == ArrowTypes.NullKind()
-@test ArrowTypes.ArrowKind(Nothing) == ArrowTypes.NullKind()
-@test ArrowTypes.ArrowType(Nothing) == Missing
-@test ArrowTypes.toarrow(nothing) === missing
-@test ArrowTypes.arrowname(Nothing) == ArrowTypes.NOTHING
-@test ArrowTypes.JuliaType(Val(ArrowTypes.NOTHING)) == Nothing
-@test ArrowTypes.fromarrow(Nothing, missing) === nothing
-
-@test ArrowTypes.ArrowKind(Int) == ArrowTypes.PrimitiveKind()
-@test ArrowTypes.ArrowKind(Float64) == ArrowTypes.PrimitiveKind()
-
-@test ArrowTypes.ArrowType(Char) == UInt32
-@test ArrowTypes.toarrow('1') == UInt32('1')
-@test ArrowTypes.arrowname(Char) == ArrowTypes.CHAR
-@test ArrowTypes.JuliaType(Val(ArrowTypes.CHAR)) == Char
-@test ArrowTypes.fromarrow(Char, UInt32('1')) == '1'
-
-@test ArrowTypes.ArrowKind(Bool) == ArrowTypes.BoolKind()
-
-@test ArrowTypes.ListKind() == ArrowTypes.ListKind{false}()
-@test !ArrowTypes.isstringtype(ArrowTypes.ListKind())
-@test !ArrowTypes.isstringtype(typeof(ArrowTypes.ListKind()))
-@test ArrowTypes.ArrowKind(String) == ArrowTypes.ListKind{true}()
-@test ArrowTypes.ArrowKind(Base.CodeUnits) == ArrowTypes.ListKind{true}()
-
-hey = collect(b"hey")
-@test ArrowTypes.fromarrow(String, pointer(hey), 3) == "hey"
-@test ArrowTypes.fromarrow(Base.CodeUnits, pointer(hey), 3) == b"hey"
-@test ArrowTypes.fromarrow(Union{Base.CodeUnits, Missing}, pointer(hey), 3) == b"hey"
-
-@test ArrowTypes.ArrowType(Symbol) == String
-@test ArrowTypes.toarrow(:hey) == "hey"
-@test ArrowTypes.arrowname(Symbol) == ArrowTypes.SYMBOL
-@test ArrowTypes.JuliaType(Val(ArrowTypes.SYMBOL)) == Symbol
-@test ArrowTypes.fromarrow(Symbol, pointer(hey), 3) == :hey
-
-@test ArrowTypes.ArrowKind(Vector{Int}) == ArrowTypes.ListKind()
-@test ArrowTypes.ArrowKind(Set{Int}) == ArrowTypes.ListKind()
-@test ArrowTypes.ArrowType(Set{Int}) == Vector{Int}
-@test typeof(ArrowTypes.toarrow(Set([1,2,3]))) <: Vector{Int}
-@test ArrowTypes.arrowname(Set{Int}) == ArrowTypes.SET
-@test ArrowTypes.JuliaType(Val(ArrowTypes.SET), Vector{Int}) == Set{Int}
-@test ArrowTypes.fromarrow(Set{Int}, [1,2,3]) == Set([1,2,3])
-
-K = ArrowTypes.ArrowKind(NTuple{3, UInt8})
-@test ArrowTypes.gettype(K) == UInt8
-@test ArrowTypes.getsize(K) == 3
-@test K == ArrowTypes.FixedSizeListKind{3, UInt8}()
-
-u = UUID(rand(UInt128))
-ubytes = ArrowTypes._cast(NTuple{16, UInt8}, u.value)
-@test ArrowTypes.ArrowKind(u) == ArrowTypes.FixedSizeListKind{16, UInt8}()
-@test ArrowTypes.ArrowType(UUID) == NTuple{16, UInt8}
-@test ArrowTypes.toarrow(u) == ubytes
-@test ArrowTypes.arrowname(UUID) == ArrowTypes.UUIDSYMBOL
-@test ArrowTypes.JuliaType(Val(ArrowTypes.UUIDSYMBOL)) == UUID
-@test ArrowTypes.fromarrow(UUID, ubytes) == u
-
-ip4 = IPv4(rand(UInt32))
-@test ArrowTypes.ArrowKind(ip4) == PrimitiveKind()
-@test ArrowTypes.ArrowType(IPv4) == UInt32
-@test ArrowTypes.toarrow(ip4) == ip4.host
-@test ArrowTypes.arrowname(IPv4) == ArrowTypes.IPV4_SYMBOL
-@test ArrowTypes.JuliaType(Val(ArrowTypes.IPV4_SYMBOL)) == IPv4
-@test ArrowTypes.fromarrow(IPv4, ip4.host) == ip4
-
-ip6 = IPv6(rand(UInt128))
-ip6_ubytes = ArrowTypes._cast(NTuple{16, UInt8}, ip6.host)
-@test ArrowTypes.ArrowKind(ip6) == ArrowTypes.FixedSizeListKind{16, UInt8}()
-@test ArrowTypes.ArrowType(IPv6) == NTuple{16, UInt8}
-@test ArrowTypes.toarrow(ip6) == ip6_ubytes
-@test ArrowTypes.arrowname(IPv6) == ArrowTypes.IPV6_SYMBOL
-@test ArrowTypes.JuliaType(Val(ArrowTypes.IPV6_SYMBOL)) == IPv6
-@test ArrowTypes.fromarrow(IPv6, ip6_ubytes) == ip6
-
-nt = (id=1, name="bob")
-@test ArrowTypes.ArrowKind(NamedTuple) == ArrowTypes.StructKind()
-@test ArrowTypes.fromarrow(typeof(nt), nt) === nt
-@test ArrowTypes.fromarrow(Person, nt) == Person(1, "bob")
-@test ArrowTypes.ArrowKind(Tuple) == ArrowTypes.StructKind()
-@test ArrowTypes.ArrowKind(Tuple{}) == ArrowTypes.StructKind()
-@test ArrowTypes.arrowname(Tuple{Int, String}) == ArrowTypes.TUPLE
-@test ArrowTypes.arrowname(Tuple{}) == ArrowTypes.TUPLE
-@test ArrowTypes.JuliaType(Val(ArrowTypes.TUPLE), NamedTuple{(Symbol("1"), Symbol("2")), Tuple{Int, String}}) == Tuple{Int, String}
-@test ArrowTypes.fromarrow(Tuple{Int, String}, nt) == (1, "bob")
-@test ArrowTypes.fromarrow(Union{Missing, typeof(nt)}, nt) == nt
-# #461
-@test ArrowTypes.default(Tuple{}) == ()
-@test ArrowTypes.default(Tuple{Vararg{Int}}) == ()
-@test ArrowTypes.default(Tuple{String, Vararg{Int}}) == ("",)
-
-v = v"1"
-v_nt = (major=1, minor=0, patch=0, prerelease=(), build=())
-@test ArrowTypes.ArrowKind(VersionNumber) == ArrowTypes.StructKind()
-@test ArrowTypes.arrowname(VersionNumber) == ArrowTypes.VERSION_NUMBER
-@test ArrowTypes.JuliaType(Val(ArrowTypes.VERSION_NUMBER)) == VersionNumber
-@test ArrowTypes.fromarrow(typeof(v), v_nt) == v
-@test ArrowTypes.default(VersionNumber) == v"0"
-
-@test ArrowTypes.ArrowKind(Dict{String, Int}) == ArrowTypes.MapKind()
-@test ArrowTypes.ArrowKind(Union{String, Int}) == ArrowTypes.UnionKind()
-
-@test ArrowTypes.default(Int) == Int(0)
-@test ArrowTypes.default(Symbol) == Symbol()
-@test ArrowTypes.default(Char) == '\0'
-@test ArrowTypes.default(String) == ""
-@test ArrowTypes.default(Missing) === missing
-@test ArrowTypes.default(Nothing) === nothing
-@test ArrowTypes.default(Union{Int, Missing}) == Int(0)
-@test ArrowTypes.default(Union{Int, Nothing}) == Int(0)
-@test ArrowTypes.default(Union{Int, Missing, Nothing}) == Int(0)
-
-@test ArrowTypes.promoteunion(Int, Float64) == Float64
-@test ArrowTypes.promoteunion(Int, String) == Union{Int, String}
-
-@test ArrowTypes.concrete_or_concreteunion(Int)
-@test !ArrowTypes.concrete_or_concreteunion(Union{Real, String})
-@test !ArrowTypes.concrete_or_concreteunion(Any)
-
-@testset "ToArrow" begin
- x = ArrowTypes.ToArrow([1,2,3])
- @test x isa Vector{Int}
- @test x == [1,2,3]
-
- x = ArrowTypes.ToArrow([:hey, :ho])
- @test x isa ArrowTypes.ToArrow{String, Vector{Symbol}}
- @test eltype(x) == String
- @test x == ["hey", "ho"]
-
- x = ArrowTypes.ToArrow(Any[1, 3.14])
- @test x isa ArrowTypes.ToArrow{Float64, Vector{Any}}
- @test eltype(x) == Float64
- @test x == [1.0, 3.14]
-
- x = ArrowTypes.ToArrow(Any[1, 3.14, "hey"])
- @test x isa ArrowTypes.ToArrow{Union{Float64, String}, Vector{Any}}
- @test eltype(x) == Union{Float64, String}
- @test x == [1.0, 3.14, "hey"]
-
- @testset "respect non-missing concrete type" begin
- struct DateTimeTZ
- instant::Int64
- tz::String
+ @test ArrowTypes.ArrowKind(MyInt) == ArrowTypes.PrimitiveKind()
+ @test ArrowTypes.ArrowKind(Person) == ArrowTypes.StructKind()
+ @test ArrowTypes.ArrowKind(Person(0, "bob")) == ArrowTypes.StructKind()
+
+ @test ArrowTypes.ArrowType(Int) == Int
+ @test ArrowTypes.ArrowType(Union{Int,Missing}) == Union{Int,Missing}
+ @test ArrowTypes.ArrowType(Missing) == Missing
+
+ @test ArrowTypes.toarrow(1) === 1
+
+ @test ArrowTypes.arrowname(Int) == Symbol()
+ @test !ArrowTypes.hasarrowname(Int)
+
+ @test ArrowTypes.arrowmetadata(Int) == ""
+ @test ArrowTypes.arrowmetadata(Union{Nothing,Int}) == ""
+ @test ArrowTypes.arrowmetadata(Union{Missing,Int}) == ""
+
+ @test ArrowTypes.JuliaType(1) === nothing
+ @test ArrowTypes.JuliaType(1, Int) === nothing
+ @test ArrowTypes.JuliaType(1, Int, nothing) === nothing
+
+ @test ArrowTypes.fromarrow(Int, 1) === 1
+ @test ArrowTypes.fromarrow(Person, 1, "bob") == Person(1, "bob")
+ @test ArrowTypes.fromarrow(Union{Int,Missing}, missing) === missing
+ @test ArrowTypes.fromarrow(Union{Int,Missing}, 1) === 1
+ @test ArrowTypes.fromarrow(Union{Float64,Missing}, 1) === 1.0
+
+ @test ArrowTypes.ArrowKind(Missing) == ArrowTypes.NullKind()
+ @test ArrowTypes.ArrowKind(Nothing) == ArrowTypes.NullKind()
+ @test ArrowTypes.ArrowType(Nothing) == Missing
+ @test ArrowTypes.toarrow(nothing) === missing
+ @test ArrowTypes.arrowname(Nothing) == ArrowTypes.NOTHING
+ @test ArrowTypes.JuliaType(Val(ArrowTypes.NOTHING)) == Nothing
+ @test ArrowTypes.fromarrow(Nothing, missing) === nothing
+
+ @test ArrowTypes.ArrowKind(Int) == ArrowTypes.PrimitiveKind()
+ @test ArrowTypes.ArrowKind(Float64) == ArrowTypes.PrimitiveKind()
+
+ @test ArrowTypes.ArrowType(Char) == UInt32
+ @test ArrowTypes.toarrow('1') == UInt32('1')
+ @test ArrowTypes.arrowname(Char) == ArrowTypes.CHAR
+ @test ArrowTypes.JuliaType(Val(ArrowTypes.CHAR)) == Char
+ @test ArrowTypes.fromarrow(Char, UInt32('1')) == '1'
+
+ @test ArrowTypes.ArrowKind(Bool) == ArrowTypes.BoolKind()
+
+ @test ArrowTypes.ListKind() == ArrowTypes.ListKind{false}()
+ @test !ArrowTypes.isstringtype(ArrowTypes.ListKind())
+ @test !ArrowTypes.isstringtype(typeof(ArrowTypes.ListKind()))
+ @test ArrowTypes.ArrowKind(String) == ArrowTypes.ListKind{true}()
+ @test ArrowTypes.ArrowKind(Base.CodeUnits) == ArrowTypes.ListKind{true}()
+
+ hey = collect(b"hey")
+ @test ArrowTypes.fromarrow(String, pointer(hey), 3) == "hey"
+ @test ArrowTypes.fromarrow(Base.CodeUnits, pointer(hey), 3) == b"hey"
+ @test ArrowTypes.fromarrow(Union{Base.CodeUnits,Missing}, pointer(hey), 3) == b"hey"
+
+ @test ArrowTypes.ArrowType(Symbol) == String
+ @test ArrowTypes.toarrow(:hey) == "hey"
+ @test ArrowTypes.arrowname(Symbol) == ArrowTypes.SYMBOL
+ @test ArrowTypes.JuliaType(Val(ArrowTypes.SYMBOL)) == Symbol
+ @test ArrowTypes.fromarrow(Symbol, pointer(hey), 3) == :hey
+
+ @test ArrowTypes.ArrowKind(Vector{Int}) == ArrowTypes.ListKind()
+ @test ArrowTypes.ArrowKind(Set{Int}) == ArrowTypes.ListKind()
+ @test ArrowTypes.ArrowType(Set{Int}) == Vector{Int}
+ @test typeof(ArrowTypes.toarrow(Set([1, 2, 3]))) <: Vector{Int}
+ @test ArrowTypes.arrowname(Set{Int}) == ArrowTypes.SET
+ @test ArrowTypes.JuliaType(Val(ArrowTypes.SET), Vector{Int}) == Set{Int}
+ @test ArrowTypes.fromarrow(Set{Int}, [1, 2, 3]) == Set([1, 2, 3])
+
+ K = ArrowTypes.ArrowKind(NTuple{3,UInt8})
+ @test ArrowTypes.gettype(K) == UInt8
+ @test ArrowTypes.getsize(K) == 3
+ @test K == ArrowTypes.FixedSizeListKind{3,UInt8}()
+
+ u = UUID(rand(UInt128))
+ ubytes = ArrowTypes._cast(NTuple{16,UInt8}, u.value)
+ @test ArrowTypes.ArrowKind(u) == ArrowTypes.FixedSizeListKind{16,UInt8}()
+ @test ArrowTypes.ArrowType(UUID) == NTuple{16,UInt8}
+ @test ArrowTypes.toarrow(u) == ubytes
+ @test ArrowTypes.arrowname(UUID) == ArrowTypes.UUIDSYMBOL
+ @test ArrowTypes.JuliaType(Val(ArrowTypes.UUIDSYMBOL)) == UUID
+ @test ArrowTypes.fromarrow(UUID, ubytes) == u
+
+ ip4 = IPv4(rand(UInt32))
+ @test ArrowTypes.ArrowKind(ip4) == PrimitiveKind()
+ @test ArrowTypes.ArrowType(IPv4) == UInt32
+ @test ArrowTypes.toarrow(ip4) == ip4.host
+ @test ArrowTypes.arrowname(IPv4) == ArrowTypes.IPV4_SYMBOL
+ @test ArrowTypes.JuliaType(Val(ArrowTypes.IPV4_SYMBOL)) == IPv4
+ @test ArrowTypes.fromarrow(IPv4, ip4.host) == ip4
+
+ ip6 = IPv6(rand(UInt128))
+ ip6_ubytes = ArrowTypes._cast(NTuple{16,UInt8}, ip6.host)
+ @test ArrowTypes.ArrowKind(ip6) == ArrowTypes.FixedSizeListKind{16,UInt8}()
+ @test ArrowTypes.ArrowType(IPv6) == NTuple{16,UInt8}
+ @test ArrowTypes.toarrow(ip6) == ip6_ubytes
+ @test ArrowTypes.arrowname(IPv6) == ArrowTypes.IPV6_SYMBOL
+ @test ArrowTypes.JuliaType(Val(ArrowTypes.IPV6_SYMBOL)) == IPv6
+ @test ArrowTypes.fromarrow(IPv6, ip6_ubytes) == ip6
+
+ nt = (id=1, name="bob")
+ @test ArrowTypes.ArrowKind(NamedTuple) == ArrowTypes.StructKind()
+ @test ArrowTypes.fromarrow(typeof(nt), nt) === nt
+ @test ArrowTypes.fromarrow(Person, nt) == Person(1, "bob")
+ @test ArrowTypes.ArrowKind(Tuple) == ArrowTypes.StructKind()
+ @test ArrowTypes.ArrowKind(Tuple{}) == ArrowTypes.StructKind()
+ @test ArrowTypes.arrowname(Tuple{Int,String}) == ArrowTypes.TUPLE
+ @test ArrowTypes.arrowname(Tuple{}) == ArrowTypes.TUPLE
+ @test ArrowTypes.JuliaType(
+ Val(ArrowTypes.TUPLE),
+ NamedTuple{(Symbol("1"), Symbol("2")),Tuple{Int,String}},
+ ) == Tuple{Int,String}
+ @test ArrowTypes.fromarrow(Tuple{Int,String}, nt) == (1, "bob")
+ @test ArrowTypes.fromarrow(Union{Missing,typeof(nt)}, nt) == nt
+ # #461
+ @test ArrowTypes.default(Tuple{}) == ()
+ @test ArrowTypes.default(Tuple{Vararg{Int}}) == ()
+ @test ArrowTypes.default(Tuple{String,Vararg{Int}}) == ("",)
+
+ v = v"1"
+ v_nt = (major=1, minor=0, patch=0, prerelease=(), build=())
+ @test ArrowTypes.ArrowKind(VersionNumber) == ArrowTypes.StructKind()
+ @test ArrowTypes.arrowname(VersionNumber) == ArrowTypes.VERSION_NUMBER
+ @test ArrowTypes.JuliaType(Val(ArrowTypes.VERSION_NUMBER)) == VersionNumber
+ @test ArrowTypes.fromarrow(typeof(v), v_nt) == v
+ @test ArrowTypes.default(VersionNumber) == v"0"
+
+ @test ArrowTypes.ArrowKind(Dict{String,Int}) == ArrowTypes.MapKind()
+ @test ArrowTypes.ArrowKind(Union{String,Int}) == ArrowTypes.UnionKind()
+
+ @test ArrowTypes.default(Int) == Int(0)
+ @test ArrowTypes.default(Symbol) == Symbol()
+ @test ArrowTypes.default(Char) == '\0'
+ @test ArrowTypes.default(String) == ""
+ @test ArrowTypes.default(Missing) === missing
+ @test ArrowTypes.default(Nothing) === nothing
+ @test ArrowTypes.default(Union{Int,Missing}) == Int(0)
+ @test ArrowTypes.default(Union{Int,Nothing}) == Int(0)
+ @test ArrowTypes.default(Union{Int,Missing,Nothing}) == Int(0)
+
+ @test ArrowTypes.promoteunion(Int, Float64) == Float64
+ @test ArrowTypes.promoteunion(Int, String) == Union{Int,String}
+
+ @test ArrowTypes.concrete_or_concreteunion(Int)
+ @test !ArrowTypes.concrete_or_concreteunion(Union{Real,String})
+ @test !ArrowTypes.concrete_or_concreteunion(Any)
+
+ @testset "ToArrow" begin
+ x = ArrowTypes.ToArrow([1, 2, 3])
+ @test x isa Vector{Int}
+ @test x == [1, 2, 3]
+
+ x = ArrowTypes.ToArrow([:hey, :ho])
+ @test x isa ArrowTypes.ToArrow{String,Vector{Symbol}}
+ @test eltype(x) == String
+ @test x == ["hey", "ho"]
+
+ x = ArrowTypes.ToArrow(Any[1, 3.14])
+ @test x isa ArrowTypes.ToArrow{Float64,Vector{Any}}
+ @test eltype(x) == Float64
+ @test x == [1.0, 3.14]
+
+ x = ArrowTypes.ToArrow(Any[1, 3.14, "hey"])
+ @test x isa ArrowTypes.ToArrow{Union{Float64,String},Vector{Any}}
+ @test eltype(x) == Union{Float64,String}
+ @test x == [1.0, 3.14, "hey"]
+
+ @testset "respect non-missing concrete type" begin
+ struct DateTimeTZ
+ instant::Int64
+ tz::String
+ end
+
+ struct Timestamp{TZ}
+ x::Int64
+ end
+
+ ArrowTypes.ArrowType(::Type{DateTimeTZ}) = Timestamp
+ ArrowTypes.toarrow(x::DateTimeTZ) = Timestamp{Symbol(x.tz)}(x.instant)
+ ArrowTypes.default(::Type{DateTimeTZ}) = DateTimeTZ(0, "UTC")
+
+ T = Union{DateTimeTZ,Missing}
+ @test !ArrowTypes.concrete_or_concreteunion(ArrowTypes.ArrowType(T))
+ @test eltype(ArrowTypes.ToArrow(T[missing])) == Union{Timestamp{:UTC},Missing}
+
+ # Works since `ArrowTypes.default(Any) === nothing` and
+ # `ArrowTypes.toarrow(nothing) === missing`. Defining `toarrow(::Nothing) = nothing`
+ # would break this test by returning `Union{Nothing,Missing}`.
+ @test eltype(ArrowTypes.ToArrow(Any[missing])) == Missing
end
- struct Timestamp{TZ}
- x::Int64
+ @testset "ignore non-missing abstract type" begin
+ x = ArrowTypes.ToArrow(Union{Missing,Array{Int}}[missing])
+ @test x isa ArrowTypes.ToArrow{Missing,Vector{Union{Missing,Array{Int64}}}}
+ @test eltype(x) == Missing
+ @test isequal(x, [missing])
end
-
- ArrowTypes.ArrowType(::Type{DateTimeTZ}) = Timestamp
- ArrowTypes.toarrow(x::DateTimeTZ) = Timestamp{Symbol(x.tz)}(x.instant)
- ArrowTypes.default(::Type{DateTimeTZ}) = DateTimeTZ(0, "UTC")
-
- T = Union{DateTimeTZ,Missing}
- @test !ArrowTypes.concrete_or_concreteunion(ArrowTypes.ArrowType(T))
- @test eltype(ArrowTypes.ToArrow(T[missing])) == Union{Timestamp{:UTC}, Missing}
-
- # Works since `ArrowTypes.default(Any) === nothing` and
- # `ArrowTypes.toarrow(nothing) === missing`. Defining `toarrow(::Nothing) = nothing`
- # would break this test by returning `Union{Nothing,Missing}`.
- @test eltype(ArrowTypes.ToArrow(Any[missing])) == Missing
end
-
- @testset "ignore non-missing abstract type" begin
- x = ArrowTypes.ToArrow(Union{Missing,Array{Int}}[missing])
- @test x isa ArrowTypes.ToArrow{Missing, Vector{Union{Missing, Array{Int64}}}}
- @test eltype(x) == Missing
- @test isequal(x, [missing])
- end
-end
-
end
diff --git a/src/FlatBuffers/FlatBuffers.jl b/src/FlatBuffers/FlatBuffers.jl
index 7445c5f..715c910 100644
--- a/src/FlatBuffers/FlatBuffers.jl
+++ b/src/FlatBuffers/FlatBuffers.jl
@@ -24,7 +24,7 @@ const VOffsetT = UInt16
const VtableMetadataFields = 2
basetype(::Enum) = UInt8
-basetype(::Type{T}) where {T <: Enum{S}} where {S} = S
+basetype(::Type{T}) where {T<:Enum{S}} where {S} = S
function readbuffer(t::AbstractVector{UInt8}, pos::Integer, ::Type{Bool})
@inbounds b = t[pos + 1]
@@ -46,7 +46,12 @@ function Base.show(io::IO, x::TableOrStruct)
if isempty(propertynames(x))
print(io, "()")
else
- show(io, NamedTuple{propertynames(x)}(Tuple(getproperty(x, y) for y in propertynames(x))))
+ show(
+ io,
+ NamedTuple{propertynames(x)}(
+ Tuple(getproperty(x, y) for y in propertynames(x)),
+ ),
+ )
end
end
diff --git a/src/FlatBuffers/builder.jl b/src/FlatBuffers/builder.jl
index 0c65c6f..1ca1198 100644
--- a/src/FlatBuffers/builder.jl
+++ b/src/FlatBuffers/builder.jl
@@ -20,10 +20,8 @@ const fileIdentifierLength = 4
Scalar
A Union of the Julia types `T <: Number` that are allowed in FlatBuffers schema
"""
-const Scalar = Union{Bool,
-Int8, Int16, Int32, Int64,
-UInt8, UInt16, UInt32, UInt64,
-Float32, Float64, Enum}
+const Scalar =
+ Union{Bool,Int8,Int16,Int32,Int64,UInt8,UInt16,UInt32,UInt64,Float32,Float64,Enum}
"""
Builder is a state machine for creating FlatBuffer objects.
@@ -41,12 +39,22 @@ mutable struct Builder
head::UOffsetT
nested::Bool
finished::Bool
- sharedstrings::Dict{String, UOffsetT}
+ sharedstrings::Dict{String,UOffsetT}
end
bytes(b::Builder) = getfield(b, :bytes)
-Builder(size=0) = Builder(zeros(UInt8, size), 1, UOffsetT[], UOffsetT(0), UOffsetT[], UOffsetT(size), false, false, Dict{String, UOffsetT}())
+Builder(size=0) = Builder(
+ zeros(UInt8, size),
+ 1,
+ UOffsetT[],
+ UOffsetT(0),
+ UOffsetT[],
+ UOffsetT(size),
+ false,
+ false,
+ Dict{String,UOffsetT}(),
+)
function reset!(b::Builder)
empty!(b.bytes)
@@ -60,11 +68,11 @@ function reset!(b::Builder)
return
end
-Base.write(sink::Builder, o, x::Union{Bool,UInt8}) = sink.bytes[o+1] = UInt8(x)
+Base.write(sink::Builder, o, x::Union{Bool,UInt8}) = sink.bytes[o + 1] = UInt8(x)
function Base.write(sink::Builder, off, x::T) where {T}
off += 1
- for (i, ind) = enumerate(off:(off + sizeof(T) - 1))
- sink.bytes[ind] = (x >> ((i-1) * 8)) % UInt8
+ for (i, ind) in enumerate(off:(off + sizeof(T) - 1))
+ sink.bytes[ind] = (x >> ((i - 1) * 8)) % UInt8
end
end
Base.write(b::Builder, o, x::Float32) = write(b, o, reinterpret(UInt32, x))
@@ -124,7 +132,7 @@ function writevtable!(b::Builder)
if i !== nothing
resize!(b.vtable, i)
end
-
+
# Search backwards through existing vtables, because similar vtables
# are likely to have been recently appended. See
# BenchmarkVtableDeduplication for a case in which this heuristic
@@ -208,7 +216,7 @@ end
offset(b::Builder) = UOffsetT(length(b.bytes) - b.head)
-pad!(b::Builder, n) = foreach(x->place!(b, 0x00), 1:n)
+pad!(b::Builder, n) = foreach(x -> place!(b, 0x00), 1:n)
"""
`prep!` prepares to write an element of `size` after `additionalbytes`
@@ -295,7 +303,7 @@ end
"""
`createstring!` writes a null-terminated string as a vector.
"""
-function createstring!(b::Builder, s::Union{AbstractString, AbstractVector{UInt8}})
+function createstring!(b::Builder, s::Union{AbstractString,AbstractVector{UInt8}})
assertnotnested(b)
b.nested = true
s = codeunits(s)
@@ -305,7 +313,7 @@ function createstring!(b::Builder, s::Union{AbstractString, AbstractVector{UInt8
l = sizeof(s)
b.head -= l
- copyto!(b.bytes, b.head+1, s, 1, l)
+ copyto!(b.bytes, b.head + 1, s, 1, l)
return endvector!(b, sizeof(s))
end
@@ -353,7 +361,7 @@ end
If value `x` equals default `d`, then the slot will be set to zero and no
other data will be written.
"""
-function prependslot!(b::Builder, o::Int, x::T, d, sh=false) where {T <: Scalar}
+function prependslot!(b::Builder, o::Int, x::T, d, sh=false) where {T<:Scalar}
if x != T(d)
prepend!(b, x)
slot!(b, o)
@@ -418,13 +426,13 @@ function vtableEqual(a::Vector{UOffsetT}, objectStart, b::AbstractVector{UInt8})
return false
end
- for i = 0:(length(a)-1)
+ for i = 0:(length(a) - 1)
x = read(IOBuffer(view(b, (i * sizeof(VOffsetT) + 1):length(b))), VOffsetT)
# Skip vtable entries that indicate a default value.
- x == 0 && a[i+1] == 0 && continue
+ x == 0 && a[i + 1] == 0 && continue
- y = objectStart - a[i+1]
+ y = objectStart - a[i + 1]
x != y && return false
end
return true
diff --git a/src/FlatBuffers/table.jl b/src/FlatBuffers/table.jl
index 0b610ca..a17d1cd 100644
--- a/src/FlatBuffers/table.jl
+++ b/src/FlatBuffers/table.jl
@@ -27,22 +27,25 @@ The actual values in the table follow `pos` offset and size of the vtable.
abstract type Table end
abstract type Struct end
-const TableOrStruct = Union{Table, Struct}
+const TableOrStruct = Union{Table,Struct}
bytes(x::TableOrStruct) = getfield(x, :bytes)
pos(x::TableOrStruct) = getfield(x, :pos)
-==(a::T, b::T) where {T <: TableOrStruct} = all(getproperty(a, p) == getproperty(b, p) for p in propertynames(a))
+==(a::T, b::T) where {T<:TableOrStruct} =
+ all(getproperty(a, p) == getproperty(b, p) for p in propertynames(a))
-(::Type{T})(b::Builder) where {T <: TableOrStruct} = T(b.bytes[b.head+1:end], get(b, b.head, Int32))
+(::Type{T})(b::Builder) where {T<:TableOrStruct} =
+ T(b.bytes[(b.head + 1):end], get(b, b.head, Int32))
-getrootas(::Type{T}, bytes::Vector{UInt8}, offset) where {T <: Table} = init(T, bytes, offset + readbuffer(bytes, offset, UOffsetT))
-init(::Type{T}, bytes::Vector{UInt8}, pos::Integer) where {T <: TableOrStruct} = T(bytes, pos)
+getrootas(::Type{T}, bytes::Vector{UInt8}, offset) where {T<:Table} =
+ init(T, bytes, offset + readbuffer(bytes, offset, UOffsetT))
+init(::Type{T}, bytes::Vector{UInt8}, pos::Integer) where {T<:TableOrStruct} = T(bytes, pos)
-const TableOrBuilder = Union{Table, Struct, Builder}
+const TableOrBuilder = Union{Table,Struct,Builder}
Base.get(t::TableOrBuilder, pos, ::Type{T}) where {T} = readbuffer(bytes(t), pos, T)
-Base.get(t::TableOrBuilder, pos, ::Type{T}) where {T <: Enum} = T(get(t, pos, basetype(T)))
+Base.get(t::TableOrBuilder, pos, ::Type{T}) where {T<:Enum} = T(get(t, pos, basetype(T)))
"""
`offset` provides access into the Table's vtable.
@@ -51,15 +54,16 @@ Deprecated fields are ignored by checking against the vtable's length.
"""
function offset(t::Table, vtableoffset)
vtable = pos(t) - get(t, pos(t), SOffsetT)
- return vtableoffset < get(t, vtable, VOffsetT) ? get(t, vtable + vtableoffset, VOffsetT) : VOffsetT(0)
+ return vtableoffset < get(t, vtable, VOffsetT) ?
+ get(t, vtable + vtableoffset, VOffsetT) : VOffsetT(0)
end
"`indirect` retrieves the relative offset stored at `offset`."
indirect(t::Table, off) = off + get(t, off, UOffsetT)
getvalue(t, o, ::Type{Nothing}) = nothing
-getvalue(t, o, ::Type{T}) where {T <: Scalar} = get(t, pos(t) + o, T)
-getvalue(t, o, ::Type{T}) where {T <: Enum} = T(get(t, pos(t) + o, enumtype(T)))
+getvalue(t, o, ::Type{T}) where {T<:Scalar} = get(t, pos(t) + o, T)
+getvalue(t, o, ::Type{T}) where {T<:Enum} = T(get(t, pos(t) + o, enumtype(T)))
function Base.String(t::Table, off)
off += get(t, off, UOffsetT)
@@ -96,7 +100,7 @@ function vector(t::Table, off)
return x + sizeof(UOffsetT)
end
-struct Array{T, S, TT} <: AbstractVector{T}
+struct Array{T,S,TT} <: AbstractVector{T}
_tab::TT
pos::Int64
data::Vector{S}
@@ -104,17 +108,17 @@ end
function Array{T}(t::Table, off) where {T}
a = vector(t, off)
- S = T <: Table ? UOffsetT : T <: Struct ? NTuple{structsizeof(T), UInt8} : T
+ S = T <: Table ? UOffsetT : T <: Struct ? NTuple{structsizeof(T),UInt8} : T
ptr = convert(Ptr{S}, pointer(bytes(t), a + 1))
data = unsafe_wrap(Base.Array, ptr, vectorlen(t, off))
- return Array{T, S, typeof(t)}(t, a, data)
+ return Array{T,S,typeof(t)}(t, a, data)
end
function structsizeof end
Base.IndexStyle(::Type{<:Array}) = Base.IndexLinear()
Base.size(x::Array) = size(x.data)
-Base.@propagate_inbounds function Base.getindex(A::Array{T, S}, i::Integer) where {T, S}
+Base.@propagate_inbounds function Base.getindex(A::Array{T,S}, i::Integer) where {T,S}
if T === S
return A.data[i]
elseif T <: Struct
@@ -124,7 +128,7 @@ Base.@propagate_inbounds function Base.getindex(A::Array{T, S}, i::Integer) wher
end
end
-Base.@propagate_inbounds function Base.setindex!(A::Array{T, S}, v, i::Integer) where {T, S}
+Base.@propagate_inbounds function Base.setindex!(A::Array{T,S}, v, i::Integer) where {T,S}
if T === S
return setindex!(A.data, v, i)
else
diff --git a/src/append.jl b/src/append.jl
index db7f1d3..c0c663c 100644
--- a/src/append.jl
+++ b/src/append.jl
@@ -67,21 +67,27 @@ function append(file::String, tbl; kwargs...)
return file
end
-function append(io::IO, tbl;
- metadata=getmetadata(tbl),
- colmetadata=nothing,
- largelists::Bool=false,
- denseunions::Bool=true,
- dictencode::Bool=false,
- dictencodenested::Bool=false,
- alignment::Int=8,
- maxdepth::Int=DEFAULT_MAX_DEPTH,
- ntasks=Inf,
- convert::Bool=true,
- file::Bool=false)
-
+function append(
+ io::IO,
+ tbl;
+ metadata=getmetadata(tbl),
+ colmetadata=nothing,
+ largelists::Bool=false,
+ denseunions::Bool=true,
+ dictencode::Bool=false,
+ dictencodenested::Bool=false,
+ alignment::Int=8,
+ maxdepth::Int=DEFAULT_MAX_DEPTH,
+ ntasks=Inf,
+ convert::Bool=true,
+ file::Bool=false,
+)
if ntasks < 1
- throw(ArgumentError("ntasks keyword argument must be > 0; pass `ntasks=1` to disable multithreaded writing"))
+ throw(
+ ArgumentError(
+ "ntasks keyword argument must be > 0; pass `ntasks=1` to disable multithreaded writing",
+ ),
+ )
end
startpos = position(io)
@@ -90,7 +96,7 @@ function append(io::IO, tbl;
seek(io, startpos) # leave the stream position unchanged
if len == 0 # empty file, not initialized, we can just write to it
- kwargs = Dict{Symbol, Any}(
+ kwargs = Dict{Symbol,Any}(
:largelists => largelists,
:denseunions => denseunions,
:dictencode => dictencode,
@@ -110,36 +116,70 @@ function append(io::IO, tbl;
throw(ArgumentError("append is supported only to files in arrow stream format"))
end
if compress isa Symbol && compress !== :lz4 && compress !== :zstd
- throw(ArgumentError("unsupported compress keyword argument value: $compress. Valid values include `:lz4` or `:zstd`"))
+ throw(
+ ArgumentError(
+ "unsupported compress keyword argument value: $compress. Valid values include `:lz4` or `:zstd`",
+ ),
+ )
end
- append(io, tbl, arrow_schema, compress, largelists, denseunions, dictencode, dictencodenested, alignment, maxdepth, ntasks, metadata, colmetadata)
+ append(
+ io,
+ tbl,
+ arrow_schema,
+ compress,
+ largelists,
+ denseunions,
+ dictencode,
+ dictencodenested,
+ alignment,
+ maxdepth,
+ ntasks,
+ metadata,
+ colmetadata,
+ )
end
return io
end
-function append(io::IO, source, arrow_schema, compress, largelists, denseunions, dictencode, dictencodenested, alignment, maxdepth, ntasks, meta, colmeta)
+function append(
+ io::IO,
+ source,
+ arrow_schema,
+ compress,
+ largelists,
+ denseunions,
+ dictencode,
+ dictencodenested,
+ alignment,
+ maxdepth,
+ ntasks,
+ meta,
+ colmeta,
+)
seekend(io)
skip(io, -8) # overwrite last 8 bytes of last empty message footer
sch = Ref{Tables.Schema}(arrow_schema)
sync = OrderedSynchronizer()
msgs = Channel{Message}(ntasks)
- dictencodings = Dict{Int64, Any}() # Lockable{DictEncoding}
+ dictencodings = Dict{Int64,Any}() # Lockable{DictEncoding}
# build messages
blocks = (Block[], Block[])
# start message writing from channel
threaded = ntasks > 1
- tsk = threaded ? (Threads.@spawn for msg in msgs
- Base.write(io, msg, blocks, sch, alignment)
- end) : (@async for msg in msgs
- Base.write(io, msg, blocks, sch, alignment)
- end)
+ tsk =
+ threaded ? (Threads.@spawn for msg in msgs
+ Base.write(io, msg, blocks, sch, alignment)
+ end) : (@async for msg in msgs
+ Base.write(io, msg, blocks, sch, alignment)
+ end)
anyerror = Threads.Atomic{Bool}(false)
errorref = Ref{Any}()
@sync for (i, tbl) in enumerate(Tables.partitions(source))
if anyerror[]
- @error "error writing arrow data on partition = $(errorref[][3])" exception=(errorref[][1], errorref[][2])
+ @error "error writing arrow data on partition = $(errorref[][3])" exception =
+ (errorref[][1], errorref[][2])
error("fatal error writing arrow data")
end
@debugv 1 "processing table partition i = $i"
@@ -151,13 +191,50 @@ function append(io::IO, source, arrow_schema, compress, largelists, denseunions,
end
if threaded
- Threads.@spawn process_partition(tbl_cols, dictencodings, largelists, compress, denseunions, dictencode, dictencodenested, maxdepth, sync, msgs, alignment, i, sch, errorref, anyerror, meta, colmeta)
+ Threads.@spawn process_partition(
+ tbl_cols,
+ dictencodings,
+ largelists,
+ compress,
+ denseunions,
+ dictencode,
+ dictencodenested,
+ maxdepth,
+ sync,
+ msgs,
+ alignment,
+ i,
+ sch,
+ errorref,
+ anyerror,
+ meta,
+ colmeta,
+ )
else
- @async process_partition(tbl_cols, dictencodings, largelists, compress, denseunions, dictencode, dictencodenested, maxdepth, sync, msgs, alignment, i, sch, errorref, anyerror, meta, colmeta)
+ @async process_partition(
+ tbl_cols,
+ dictencodings,
+ largelists,
+ compress,
+ denseunions,
+ dictencode,
+ dictencodenested,
+ maxdepth,
+ sync,
+ msgs,
+ alignment,
+ i,
+ sch,
+ errorref,
+ anyerror,
+ meta,
+ colmeta,
+ )
end
end
if anyerror[]
- @error "error writing arrow data on partition = $(errorref[][3])" exception=(errorref[][1], errorref[][2])
+ @error "error writing arrow data on partition = $(errorref[][3])" exception =
+ (errorref[][1], errorref[][2])
error("fatal error writing arrow data")
end
# close our message-writing channel, no further put!-ing is allowed
@@ -165,7 +242,13 @@ function append(io::IO, source, arrow_schema, compress, largelists, denseunions,
# now wait for our message-writing task to finish writing
wait(tsk)
- Base.write(io, Message(UInt8[], nothing, 0, true, false, Meta.Schema), blocks, sch, alignment)
+ Base.write(
+ io,
+ Message(UInt8[], nothing, 0, true, false, Meta.Schema),
+ blocks,
+ sch,
+ alignment,
+ )
return io
end
@@ -196,10 +279,11 @@ end
function is_equivalent_schema(sch1::Tables.Schema, sch2::Tables.Schema)
(sch1.names == sch2.names) || (return false)
- for (t1,t2) in zip(sch1.types, sch2.types)
+ for (t1, t2) in zip(sch1.types, sch2.types)
tt1 = Base.nonmissingtype(t1)
tt2 = Base.nonmissingtype(t2)
- if t1 == t2 || (tt1 <: AbstractVector && tt2 <: AbstractVector && eltype(tt1) == eltype(tt2))
+ if t1 == t2 ||
+ (tt1 <: AbstractVector && tt2 <: AbstractVector && eltype(tt1) == eltype(tt2))
continue
else
return false
diff --git a/src/arraytypes/arraytypes.jl b/src/arraytypes/arraytypes.jl
index a3449f1..b417abf 100644
--- a/src/arraytypes/arraytypes.jl
+++ b/src/arraytypes/arraytypes.jl
@@ -24,14 +24,24 @@ subtypes `ArrowVector`. See [`BoolVector`](@ref), [`Primitive`](@ref), [`List`](
"""
abstract type ArrowVector{T} <: AbstractVector{T} end
-Base.IndexStyle(::Type{A}) where {A <: ArrowVector} = Base.IndexLinear()
-Base.similar(::Type{A}, dims::Dims) where {T, A <: ArrowVector{T}} = Vector{T}(undef, dims)
+Base.IndexStyle(::Type{A}) where {A<:ArrowVector} = Base.IndexLinear()
+Base.similar(::Type{A}, dims::Dims) where {T,A<:ArrowVector{T}} = Vector{T}(undef, dims)
validitybitmap(x::ArrowVector) = x.validity
nullcount(x::ArrowVector) = validitybitmap(x).nc
getmetadata(x::ArrowVector) = x.metadata
-Base.deleteat!(x::T, inds) where {T <: ArrowVector} = throw(ArgumentError("`$T` does not support `deleteat!`; arrow data is by nature immutable"))
+Base.deleteat!(x::T, inds) where {T<:ArrowVector} = throw(
+ ArgumentError("`$T` does not support `deleteat!`; arrow data is by nature immutable"),
+)
-function toarrowvector(x, i=1, de=Dict{Int64, Any}(), ded=DictEncoding[], meta=getmetadata(x); compression::Union{Nothing, Symbol, LZ4FrameCompressor, ZstdCompressor}=nothing, kw...)
+function toarrowvector(
+ x,
+ i=1,
+ de=Dict{Int64,Any}(),
+ ded=DictEncoding[],
+ meta=getmetadata(x);
+ compression::Union{Nothing,Symbol,LZ4FrameCompressor,ZstdCompressor}=nothing,
+ kw...,
+)
@debugv 2 "converting top-level column to arrow format: col = $(typeof(x)), compression = $compression, kw = $(values(kw))"
@debugv 3 x
A = arrowvector(x, i, 0, 0, de, ded, meta; compression=compression, kw...)
@@ -55,30 +65,73 @@ function toarrowvector(x, i=1, de=Dict{Int64, Any}(), ded=DictEncoding[], meta=g
return A
end
-function arrowvector(x, i, nl, fi, de, ded, meta; dictencoding::Bool=false, dictencode::Bool=false, maxdepth::Int=DEFAULT_MAX_DEPTH, kw...)
+function arrowvector(
+ x,
+ i,
+ nl,
+ fi,
+ de,
+ ded,
+ meta;
+ dictencoding::Bool=false,
+ dictencode::Bool=false,
+ maxdepth::Int=DEFAULT_MAX_DEPTH,
+ kw...,
+)
if nl > maxdepth
- error("reached nested serialization level ($nl) deeper than provided max depth argument ($(maxdepth)); to increase allowed nesting level, pass `maxdepth=X`")
+ error(
+ "reached nested serialization level ($nl) deeper than provided max depth argument ($(maxdepth)); to increase allowed nesting level, pass `maxdepth=X`",
+ )
end
T = maybemissing(eltype(x))
if !(x isa DictEncode) && !dictencoding && (dictencode || DataAPI.refarray(x) !== x)
x = DictEncode(x, dictencodeid(i, nl, fi))
elseif x isa DictEncoded
- return arrowvector(DictEncodeType, x, i, nl, fi, de, ded, meta; dictencode=dictencode, kw...)
+ return arrowvector(
+ DictEncodeType,
+ x,
+ i,
+ nl,
+ fi,
+ de,
+ ded,
+ meta;
+ dictencode=dictencode,
+ kw...,
+ )
elseif !(x isa DictEncode)
x = ToArrow(x)
end
S = maybemissing(eltype(x))
if ArrowTypes.hasarrowname(T)
- meta = _arrowtypemeta(_normalizemeta(meta), String(ArrowTypes.arrowname(T)), String(ArrowTypes.arrowmetadata(T)))
+ meta = _arrowtypemeta(
+ _normalizemeta(meta),
+ String(ArrowTypes.arrowname(T)),
+ String(ArrowTypes.arrowmetadata(T)),
+ )
end
- return arrowvector(S, x, i, nl, fi, de, ded, meta; dictencode=dictencode, maxdepth=maxdepth, kw...)
+ return arrowvector(
+ S,
+ x,
+ i,
+ nl,
+ fi,
+ de,
+ ded,
+ meta;
+ dictencode=dictencode,
+ maxdepth=maxdepth,
+ kw...,
+ )
end
_normalizemeta(::Nothing) = nothing
_normalizemeta(meta) = toidict(String(k) => String(v) for (k, v) in meta)
_normalizecolmeta(::Nothing) = nothing
-_normalizecolmeta(colmeta) = toidict(Symbol(k) => toidict(String(v1) => String(v2) for (v1, v2) in v) for (k, v) in colmeta)
+_normalizecolmeta(colmeta) = toidict(
+ Symbol(k) => toidict(String(v1) => String(v2) for (v1, v2) in v) for (k, v) in colmeta
+)
function _arrowtypemeta(::Nothing, n, m)
return toidict(("ARROW:extension:name" => n, "ARROW:extension:metadata" => m))
@@ -99,16 +152,26 @@ end
struct NullVector{T} <: ArrowVector{T}
data::MissingVector
- metadata::Union{Nothing, Base.ImmutableDict{String, String}}
+ metadata::Union{Nothing,Base.ImmutableDict{String,String}}
end
Base.size(v::NullVector) = (length(v.data),)
-Base.getindex(v::NullVector{T}, i::Int) where {T} = ArrowTypes.fromarrow(T, getindex(v.data, i))
+Base.getindex(v::NullVector{T}, i::Int) where {T} =
+ ArrowTypes.fromarrow(T, getindex(v.data, i))
-arrowvector(::NullKind, x, i, nl, fi, de, ded, meta; kw...) = NullVector{eltype(x)}(MissingVector(length(x)), isnothing(meta) ? nothing : toidict(meta))
+arrowvector(::NullKind, x, i, nl, fi, de, ded, meta; kw...) = NullVector{eltype(x)}(
+ MissingVector(length(x)),
+ isnothing(meta) ? nothing : toidict(meta),
+)
compress(Z::Meta.CompressionType.T, comp, v::NullVector) =
- Compressed{Z, NullVector}(v, CompressedBuffer[], length(v), length(v), Compressed[])
+ Compressed{Z,NullVector}(v, CompressedBuffer[], length(v), length(v), Compressed[])
-function makenodesbuffers!(col::NullVector, fieldnodes, fieldbuffers, bufferoffset, alignment)
+function makenodesbuffers!(
+ col::NullVector,
+ fieldnodes,
+ fieldbuffers,
+ bufferoffset,
+ alignment,
+)
push!(fieldnodes, FieldNode(length(col), length(col)))
@debugv 1 "made field node: nodeidx = $(length(fieldnodes)), col = $(typeof(col)), len = $(fieldnodes[end].length), nc = $(fieldnodes[end].null_count)"
return bufferoffset
@@ -176,7 +239,7 @@ end
# 2) parent array is also empty, so "all" elements are valid
p.nc == 0 && return true
# translate element index to bitpacked byte index
- a, b = divrem(i-1, 8) .+ (1,1)
+ a, b = divrem(i - 1, 8) .+ (1, 1)
@inbounds byte = p.bytes[p.pos + a - 1]
# check individual bit of byte
return getbit(byte, b)
@@ -195,7 +258,7 @@ function writebitmap(io, col::ArrowVector, alignment)
v = col.validity
@debugv 1 "writing validity bitmap: nc = $(v.nc), n = $(cld(v.ℓ, 8))"
v.nc == 0 && return 0
- n = Base.write(io, view(v.bytes, v.pos:(v.pos + cld(v.ℓ, 8) - 1)))
+ n = Base.write(io, view(v.bytes, (v.pos):(v.pos + cld(v.ℓ, 8) - 1)))
return n + writezeros(io, paddinglength(n, alignment))
end
diff --git a/src/arraytypes/bool.jl b/src/arraytypes/bool.jl
index 3ef44c8..b9e853a 100644
--- a/src/arraytypes/bool.jl
+++ b/src/arraytypes/bool.jl
@@ -25,7 +25,7 @@ struct BoolVector{T} <: ArrowVector{T}
pos::Int
validity::ValidityBitmap
ℓ::Int64
- metadata::Union{Nothing, Base.ImmutableDict{String, String}}
+ metadata::Union{Nothing,Base.ImmutableDict{String,String}}
end
Base.size(p::BoolVector) = (p.ℓ,)
@@ -77,15 +77,21 @@ function arrowvector(::BoolKind, x, i, nl, fi, de, ded, meta; kw...)
return BoolVector{eltype(x)}(bytes, 1, validity, len, meta)
end
-function compress(Z::Meta.CompressionType.T, comp, p::P) where {P <: BoolVector}
+function compress(Z::Meta.CompressionType.T, comp, p::P) where {P<:BoolVector}
len = length(p)
nc = nullcount(p)
validity = compress(Z, comp, p.validity)
- data = compress(Z, comp, view(p.arrow, p.pos:(p.pos + cld(p.ℓ, 8) - 1)))
- return Compressed{Z, P}(p, [validity, data], len, nc, Compressed[])
+ data = compress(Z, comp, view(p.arrow, (p.pos):(p.pos + cld(p.ℓ, 8) - 1)))
+ return Compressed{Z,P}(p, [validity, data], len, nc, Compressed[])
end
-function makenodesbuffers!(col::BoolVector, fieldnodes, fieldbuffers, bufferoffset, alignment)
+function makenodesbuffers!(
+ col::BoolVector,
+ fieldnodes,
+ fieldbuffers,
+ bufferoffset,
+ alignment,
+)
len = length(col)
nc = nullcount(col)
push!(fieldnodes, FieldNode(len, nc))
@@ -106,6 +112,6 @@ function writebuffer(io, col::BoolVector, alignment)
@debugv 1 "writebuffer: col = $(typeof(col))"
@debugv 2 col
writebitmap(io, col, alignment)
- n = Base.write(io, view(col.arrow, col.pos:(col.pos + cld(col.ℓ, 8) - 1)))
+ n = Base.write(io, view(col.arrow, (col.pos):(col.pos + cld(col.ℓ, 8) - 1)))
return n + writezeros(io, paddinglength(n, alignment))
end
diff --git a/src/arraytypes/compressed.jl b/src/arraytypes/compressed.jl
index cc86a1c..7c0366e 100644
--- a/src/arraytypes/compressed.jl
+++ b/src/arraytypes/compressed.jl
@@ -26,7 +26,7 @@ Represents the compressed version of an [`ArrowVector`](@ref).
Holds a reference to the original column. May have `Compressed`
children for nested array types.
"""
-struct Compressed{Z, A}
+struct Compressed{Z,A}
data::A
buffers::Vector{CompressedBuffer}
len::Int64
@@ -35,7 +35,7 @@ struct Compressed{Z, A}
end
Base.length(c::Compressed) = c.len
-Base.eltype(::Type{C}) where {Z, A, C<:Compressed{Z, A}} = eltype(A)
+Base.eltype(::Type{C}) where {Z,A,C<:Compressed{Z,A}} = eltype(A)
getmetadata(x::Compressed) = getmetadata(x.data)
compressiontype(c::Compressed{Z}) where {Z} = Z
@@ -49,9 +49,16 @@ end
compress(Z::Meta.CompressionType.T, comp, x) = compress(Z, comp, convert(Array, x))
compress(Z::Meta.CompressionType.T, comp, v::ValidityBitmap) =
- v.nc == 0 ? CompressedBuffer(UInt8[], 0) : compress(Z, comp, view(v.bytes, v.pos:(v.pos + cld(v.ℓ, 8) - 1)))
+ v.nc == 0 ? CompressedBuffer(UInt8[], 0) :
+ compress(Z, comp, view(v.bytes, (v.pos):(v.pos + cld(v.ℓ, 8) - 1)))
-function makenodesbuffers!(col::Compressed, fieldnodes, fieldbuffers, bufferoffset, alignment)
+function makenodesbuffers!(
+ col::Compressed,
+ fieldnodes,
+ fieldbuffers,
+ bufferoffset,
+ alignment,
+)
push!(fieldnodes, FieldNode(col.len, col.nullcount))
@debugv 1 "made field node: nodeidx = $(length(fieldnodes)), col = $(typeof(col)), len = $(fieldnodes[end].length), nc = $(fieldnodes[end].null_count)"
for buffer in col.buffers
@@ -61,7 +68,8 @@ function makenodesbuffers!(col::Compressed, fieldnodes, fieldbuffers, bufferoffs
bufferoffset += padding(blen, alignment)
end
for child in col.children
- bufferoffset = makenodesbuffers!(child, fieldnodes, fieldbuffers, bufferoffset, alignment)
+ bufferoffset =
+ makenodesbuffers!(child, fieldnodes, fieldbuffers, bufferoffset, alignment)
end
return bufferoffset
end
diff --git a/src/arraytypes/dictencoding.jl b/src/arraytypes/dictencoding.jl
index 7b4784f..9b7f7e0 100644
--- a/src/arraytypes/dictencoding.jl
+++ b/src/arraytypes/dictencoding.jl
@@ -31,15 +31,15 @@ schema exactly. For example, if a non-first record batch dict encoded column
were to cause a DictEncoding pool to overflow on unique values, a fatal error
should be thrown.
"""
-mutable struct DictEncoding{T, S, A} <: ArrowVector{T}
+mutable struct DictEncoding{T,S,A} <: ArrowVector{T}
id::Int64
data::A
isOrdered::Bool
- metadata::Union{Nothing, Base.ImmutableDict{String, String}}
+ metadata::Union{Nothing,Base.ImmutableDict{String,String}}
end
-indextype(::Type{DictEncoding{T, S, A}}) where {T, S, A} = S
-indextype(::T) where {T <: DictEncoding} = indextype(T)
+indextype(::Type{DictEncoding{T,S,A}}) where {T,S,A} = S
+indextype(::T) where {T<:DictEncoding} = indextype(T)
Base.size(d::DictEncoding) = size(d.data)
@@ -61,12 +61,12 @@ to the arrow streaming/file format. An optional `id` number may be provided
to signal that multiple columns should use the same pool when being
dictionary encoded.
"""
-struct DictEncode{T, A} <: AbstractVector{DictEncodeType{T}}
+struct DictEncode{T,A} <: AbstractVector{DictEncodeType{T}}
id::Int64
data::A
end
-DictEncode(x::A, id=-1) where {A} = DictEncode{eltype(A), A}(id, x)
+DictEncode(x::A, id=-1) where {A} = DictEncode{eltype(A),A}(id, x)
Base.IndexStyle(::Type{<:DictEncode}) = Base.IndexLinear()
Base.size(x::DictEncode) = (length(x.data),)
Base.iterate(x::DictEncode, st...) = iterate(x.data, st...)
@@ -86,22 +86,27 @@ either by passing the `dictencode=true` keyword argument to [`Arrow.write`](@ref
(which causes _all_ columns to be dict encoded), or wrapping individual columns/
arrays in [`Arrow.DictEncode(x)`](@ref).
"""
-struct DictEncoded{T, S, A} <: ArrowVector{T}
+struct DictEncoded{T,S,A} <: ArrowVector{T}
arrow::Vector{UInt8} # need to hold a reference to arrow memory blob
validity::ValidityBitmap
indices::Vector{S}
- encoding::DictEncoding{T, S, A}
- metadata::Union{Nothing, Base.ImmutableDict{String, String}}
+ encoding::DictEncoding{T,S,A}
+ metadata::Union{Nothing,Base.ImmutableDict{String,String}}
end
-DictEncoded(b::Vector{UInt8}, v::ValidityBitmap, inds::Vector{S}, encoding::DictEncoding{T, S, A}, meta) where {S, T, A} =
- DictEncoded{T, S, A}(b, v, inds, encoding, meta)
+DictEncoded(
+ b::Vector{UInt8},
+ v::ValidityBitmap,
+ inds::Vector{S},
+ encoding::DictEncoding{T,S,A},
+ meta,
+) where {S,T,A} = DictEncoded{T,S,A}(b, v, inds, encoding, meta)
Base.size(d::DictEncoded) = size(d.indices)
isdictencoded(d::DictEncoded) = true
isdictencoded(x) = false
-isdictencoded(c::Compressed{Z, A}) where {Z, A <: DictEncoded} = true
+isdictencoded(c::Compressed{Z,A}) where {Z,A<:DictEncoded} = true
function signedtype(n::Integer)
typs = (Int8, Int16, Int32, Int64)
@@ -112,17 +117,30 @@ signedtype(::Type{UInt8}) = Int8
signedtype(::Type{UInt16}) = Int16
signedtype(::Type{UInt32}) = Int32
signedtype(::Type{UInt64}) = Int64
-signedtype(::Type{T}) where {T <: Signed} = T
+signedtype(::Type{T}) where {T<:Signed} = T
-indtype(d::DictEncoded{T, S, A}) where {T, S, A} = S
-indtype(c::Compressed{Z, A}) where {Z, A <: DictEncoded} = indtype(c.data)
+indtype(d::DictEncoded{T,S,A}) where {T,S,A} = S
+indtype(c::Compressed{Z,A}) where {Z,A<:DictEncoded} = indtype(c.data)
-dictencodeid(colidx, nestedlevel, fieldid) = (Int64(nestedlevel) << 48) | (Int64(fieldid) << 32) | Int64(colidx)
+dictencodeid(colidx, nestedlevel, fieldid) =
+ (Int64(nestedlevel) << 48) | (Int64(fieldid) << 32) | Int64(colidx)
getid(d::DictEncoded) = d.encoding.id
-getid(c::Compressed{Z, A}) where {Z, A <: DictEncoded} = c.data.encoding.id
+getid(c::Compressed{Z,A}) where {Z,A<:DictEncoded} = c.data.encoding.id
-function arrowvector(::DictEncodedKind, x::DictEncoded, i, nl, fi, de, ded, meta; dictencode::Bool=false, dictencodenested::Bool=false, kw...)
+function arrowvector(
+ ::DictEncodedKind,
+ x::DictEncoded,
+ i,
+ nl,
+ fi,
+ de,
+ ded,
+ meta;
+ dictencode::Bool=false,
+ dictencodenested::Bool=false,
+ kw...,
+)
id = x.encoding.id
if !haskey(de, id)
de[id] = Lockable(x.encoding)
@@ -135,15 +153,42 @@ function arrowvector(::DictEncodedKind, x::DictEncoded, i, nl, fi, de, ded, meta
if !isempty(deltas)
ET = indextype(encoding)
if length(deltas) + length(encoding) > typemax(ET)
- error("fatal error serializing dict encoded column with ref index type of $ET; subsequent record batch unique values resulted in $(length(deltas) + length(encoding)) unique values, which exceeds possible index values in $ET")
+ error(
+ "fatal error serializing dict encoded column with ref index type of $ET; subsequent record batch unique values resulted in $(length(deltas) + length(encoding)) unique values, which exceeds possible index values in $ET",
+ )
end
- data = arrowvector(deltas, i, nl, fi, de, ded, nothing; dictencode=dictencodenested, dictencodenested=dictencodenested, dictencoding=true, kw...)
- push!(ded, DictEncoding{eltype(data), ET, typeof(data)}(id, data, false, getmetadata(data)))
+ data = arrowvector(
+ deltas,
+ i,
+ nl,
+ fi,
+ de,
+ ded,
+ nothing;
+ dictencode=dictencodenested,
+ dictencodenested=dictencodenested,
+ dictencoding=true,
+ kw...,
+ )
+ push!(
+ ded,
+ DictEncoding{eltype(data),ET,typeof(data)}(
+ id,
+ data,
+ false,
+ getmetadata(data),
+ ),
+ )
if typeof(encoding.data) <: ChainedVector
append!(encoding.data, data)
else
data2 = ChainedVector([encoding.data, data])
- encoding = DictEncoding{eltype(data2), ET, typeof(data2)}(id, data2, false, getmetadata(encoding))
+ encoding = DictEncoding{eltype(data2),ET,typeof(data2)}(
+ id,
+ data2,
+ false,
+ getmetadata(encoding),
+ )
de[id] = Lockable(encoding)
end
end
@@ -152,7 +197,19 @@ function arrowvector(::DictEncodedKind, x::DictEncoded, i, nl, fi, de, ded, meta
return x
end
-function arrowvector(::DictEncodedKind, x, i, nl, fi, de, ded, meta; dictencode::Bool=false, dictencodenested::Bool=false, kw...)
+function arrowvector(
+ ::DictEncodedKind,
+ x,
+ i,
+ nl,
+ fi,
+ de,
+ ded,
+ meta;
+ dictencode::Bool=false,
+ dictencodenested::Bool=false,
+ kw...,
+)
@assert x isa DictEncode
id = x.id == -1 ? dictencodeid(i, nl, fi) : x.id
x = x.data
@@ -186,20 +243,39 @@ function arrowvector(::DictEncodedKind, x, i, nl, fi, de, ded, meta; dictencode:
@inbounds inds[i] -= 1
end
end
- data = arrowvector(pool, i, nl, fi, de, ded, nothing; dictencode=dictencodenested, dictencodenested=dictencodenested, dictencoding=true, kw...)
- encoding = DictEncoding{eltype(data), eltype(inds), typeof(data)}(id, data, false, getmetadata(data))
+ data = arrowvector(
+ pool,
+ i,
+ nl,
+ fi,
+ de,
+ ded,
+ nothing;
+ dictencode=dictencodenested,
+ dictencodenested=dictencodenested,
+ dictencoding=true,
+ kw...,
+ )
+ encoding = DictEncoding{eltype(data),eltype(inds),typeof(data)}(
+ id,
+ data,
+ false,
+ getmetadata(data),
+ )
de[id] = Lockable(encoding)
else
# encoding already exists
- # compute inds based on it
- # if value doesn't exist in encoding, push! it
- # also add to deltas updates
+ # compute inds based on it
+ # if value doesn't exist in encoding, push! it
+ # also add to deltas updates
encodinglockable = de[id]
Base.@lock encodinglockable begin
encoding = encodinglockable.value
len = length(x)
ET = indextype(encoding)
- pool = Dict{Union{eltype(encoding), eltype(x)}, ET}(a => (b - 1) for (b, a) in enumerate(encoding))
+ pool = Dict{Union{eltype(encoding),eltype(x)},ET}(
+ a => (b - 1) for (b, a) in enumerate(encoding)
+ )
deltas = eltype(x)[]
inds = Vector{ET}(undef, len)
categorical = typeof(x).name.name == :CategoricalArray
@@ -214,15 +290,42 @@ function arrowvector(::DictEncodedKind, x, i, nl, fi, de, ded, meta; dictencode:
end
if !isempty(deltas)
if length(deltas) + length(encoding) > typemax(ET)
- error("fatal error serializing dict encoded column with ref index type of $ET; subsequent record batch unique values resulted in $(length(deltas) + length(encoding)) unique values, which exceeds possible index values in $ET")
+ error(
+ "fatal error serializing dict encoded column with ref index type of $ET; subsequent record batch unique values resulted in $(length(deltas) + length(encoding)) unique values, which exceeds possible index values in $ET",
+ )
end
- data = arrowvector(deltas, i, nl, fi, de, ded, nothing; dictencode=dictencodenested, dictencodenested=dictencodenested, dictencoding=true, kw...)
- push!(ded, DictEncoding{eltype(data), ET, typeof(data)}(id, data, false, getmetadata(data)))
+ data = arrowvector(
+ deltas,
+ i,
+ nl,
+ fi,
+ de,
+ ded,
+ nothing;
+ dictencode=dictencodenested,
+ dictencodenested=dictencodenested,
+ dictencoding=true,
+ kw...,
+ )
+ push!(
+ ded,
+ DictEncoding{eltype(data),ET,typeof(data)}(
+ id,
+ data,
+ false,
+ getmetadata(data),
+ ),
+ )
if typeof(encoding.data) <: ChainedVector
append!(encoding.data, data)
else
data2 = ChainedVector([encoding.data, data])
- encoding = DictEncoding{eltype(data2), ET, typeof(data2)}(id, data2, false, getmetadata(encoding))
+ encoding = DictEncoding{eltype(data2),ET,typeof(data2)}(
+ id,
+ data2,
+ false,
+ getmetadata(encoding),
+ )
de[id] = Lockable(encoding)
end
end
@@ -260,7 +363,7 @@ end
return v
end
-function Base.copy(x::DictEncoded{T, S}) where {T, S}
+function Base.copy(x::DictEncoded{T,S}) where {T,S}
pool = copy(x.encoding.data)
valid = x.validity
inds = x.indices
@@ -268,15 +371,19 @@ function Base.copy(x::DictEncoded{T, S}) where {T, S}
@inbounds for i = 1:length(inds)
refs[i] = refs[i] + one(S)
end
- return PooledArray(PooledArrays.RefArray(refs), Dict{T, S}(val => i for (i, val) in enumerate(pool)), pool)
+ return PooledArray(
+ PooledArrays.RefArray(refs),
+ Dict{T,S}(val => i for (i, val) in enumerate(pool)),
+ pool,
+ )
end
-function compress(Z::Meta.CompressionType.T, comp, x::A) where {A <: DictEncoded}
+function compress(Z::Meta.CompressionType.T, comp, x::A) where {A<:DictEncoded}
len = length(x)
nc = nullcount(x)
validity = compress(Z, comp, x.validity)
inds = compress(Z, comp, x.indices)
- return Compressed{Z, A}(x, [validity, inds], len, nc, Compressed[])
+ return Compressed{Z,A}(x, [validity, inds], len, nc, Compressed[])
end
function DataAPI.levels(x::DictEncoded)
@@ -285,7 +392,13 @@ function DataAPI.levels(x::DictEncoded)
convert(AbstractArray{nonmissingtype(eltype(rp))}, deleteat!(rp, ismissing.(rp)))
end
-function makenodesbuffers!(col::DictEncoded{T, S}, fieldnodes, fieldbuffers, bufferoffset, alignment) where {T, S}
+function makenodesbuffers!(
+ col::DictEncoded{T,S},
+ fieldnodes,
+ fieldbuffers,
+ bufferoffset,
+ alignment,
+) where {T,S}
len = length(col)
nc = nullcount(col)
push!(fieldnodes, FieldNode(len, nc))
@@ -303,7 +416,7 @@ function makenodesbuffers!(col::DictEncoded{T, S}, fieldnodes, fieldbuffers, buf
return bufferoffset
end
-DataAPI.refarray(x::DictEncoded{T, S}) where {T, S} = x.indices .+ one(S)
+DataAPI.refarray(x::DictEncoded{T,S}) where {T,S} = x.indices .+ one(S)
DataAPI.refpool(x::DictEncoded) = copy(x.encoding.data)
diff --git a/src/arraytypes/fixedsizelist.jl b/src/arraytypes/fixedsizelist.jl
index 6c54f28..e613ec9 100644
--- a/src/arraytypes/fixedsizelist.jl
+++ b/src/arraytypes/fixedsizelist.jl
@@ -19,12 +19,12 @@
An `ArrowVector` where each element is a "fixed size" list of some kind, like a `NTuple{N, T}`.
"""
-struct FixedSizeList{T, A <: AbstractVector} <: ArrowVector{T}
+struct FixedSizeList{T,A<:AbstractVector} <: ArrowVector{T}
arrow::Vector{UInt8} # need to hold a reference to arrow memory blob
validity::ValidityBitmap
data::A
ℓ::Int
- metadata::Union{Nothing, Base.ImmutableDict{String,String}}
+ metadata::Union{Nothing,Base.ImmutableDict{String,String}}
end
Base.size(l::FixedSizeList) = (l.ℓ,)
@@ -40,15 +40,19 @@ Base.size(l::FixedSizeList) = (l.ℓ,)
else
off = (i - 1) * N
if X === T && isbitstype(Y)
- tup = _unsafe_load_tuple(NTuple{N, Y}, l.data, off + 1)
+ tup = _unsafe_load_tuple(NTuple{N,Y}, l.data, off + 1)
else
- tup = ntuple(j->l.data[off + j], N)
+ tup = ntuple(j -> l.data[off + j], N)
end
return ArrowTypes.fromarrow(T, tup)
end
end
-function _unsafe_load_tuple(::Type{NTuple{N,T}}, bytes::Vector{UInt8}, i::Integer) where {N,T}
+function _unsafe_load_tuple(
+ ::Type{NTuple{N,T}},
+ bytes::Vector{UInt8},
+ i::Integer,
+) where {N,T}
x = Ref(bytes, i)
y = Ref{NTuple{N,T}}()
ArrowTypes._unsafe_cast!(y, x, N)
@@ -60,7 +64,9 @@ end
if v === missing
@inbounds l.validity[i] = false
else
- N = ArrowTypes.getsize(ArrowTypes.ArrowKind(ArrowTypes.ArrowType(Base.nonmissingtype(T))))
+ N = ArrowTypes.getsize(
+ ArrowTypes.ArrowKind(ArrowTypes.ArrowType(Base.nonmissingtype(T))),
+ )
off = (i - 1) * N
foreach(1:N) do j
@inbounds l.data[off + j] = v[j]
@@ -70,21 +76,26 @@ end
end
# lazy equal-spaced flattener
-struct ToFixedSizeList{T, N, A} <: AbstractVector{T}
+struct ToFixedSizeList{T,N,A} <: AbstractVector{T}
data::A # A is AbstractVector of (AbstractVector or AbstractString)
end
-origtype(::ToFixedSizeList{T, N, A}) where {T, N, A} = eltype(A)
+origtype(::ToFixedSizeList{T,N,A}) where {T,N,A} = eltype(A)
function ToFixedSizeList(input)
NT = ArrowTypes.ArrowKind(Base.nonmissingtype(eltype(input))) # typically NTuple{N, T}
- return ToFixedSizeList{ArrowTypes.gettype(NT), ArrowTypes.getsize(NT), typeof(input)}(input)
+ return ToFixedSizeList{ArrowTypes.gettype(NT),ArrowTypes.getsize(NT),typeof(input)}(
+ input,
+ )
end
Base.IndexStyle(::Type{<:ToFixedSizeList}) = Base.IndexLinear()
-Base.size(x::ToFixedSizeList{T, N}) where {T, N} = (N * length(x.data),)
+Base.size(x::ToFixedSizeList{T,N}) where {T,N} = (N * length(x.data),)
-Base.@propagate_inbounds function Base.getindex(A::ToFixedSizeList{T, N}, i::Integer) where {T, N}
+Base.@propagate_inbounds function Base.getindex(
+ A::ToFixedSizeList{T,N},
+ i::Integer,
+) where {T,N}
@boundscheck checkbounds(A, i)
a, b = fldmod1(i, N)
@inbounds x = A.data[a]
@@ -92,7 +103,10 @@ Base.@propagate_inbounds function Base.getindex(A::ToFixedSizeList{T, N}, i::Int
end
# efficient iteration
-@inline function Base.iterate(A::ToFixedSizeList{T, N}, (i, chunk, chunk_i, len)=(1, 1, 1, length(A))) where {T, N}
+@inline function Base.iterate(
+ A::ToFixedSizeList{T,N},
+ (i, chunk, chunk_i, len)=(1, 1, 1, length(A)),
+) where {T,N}
i > len && return nothing
@inbounds y = A.data[chunk]
@inbounds x = y === missing ? ArrowTypes.default(T) : y[chunk_i]
@@ -107,7 +121,17 @@ end
arrowvector(::FixedSizeListKind, x::FixedSizeList, i, nl, fi, de, ded, meta; kw...) = x
-function arrowvector(::FixedSizeListKind{N, T}, x, i, nl, fi, de, ded, meta; kw...) where {N, T}
+function arrowvector(
+ ::FixedSizeListKind{N,T},
+ x,
+ i,
+ nl,
+ fi,
+ de,
+ ded,
+ meta;
+ kw...,
+) where {N,T}
len = length(x)
validity = ValidityBitmap(x)
flat = ToFixedSizeList(x)
@@ -116,12 +140,12 @@ function arrowvector(::FixedSizeListKind{N, T}, x, i, nl, fi, de, ded, meta; kw.
S = origtype(flat)
else
data = arrowvector(flat, i, nl + 1, fi, de, ded, nothing; kw...)
- S = withmissing(eltype(x), NTuple{N, eltype(data)})
+ S = withmissing(eltype(x), NTuple{N,eltype(data)})
end
- return FixedSizeList{S, typeof(data)}(UInt8[], validity, data, len, meta)
+ return FixedSizeList{S,typeof(data)}(UInt8[], validity, data, len, meta)
end
-function compress(Z::Meta.CompressionType.T, comp, x::FixedSizeList{T, A}) where {T, A}
+function compress(Z::Meta.CompressionType.T, comp, x::FixedSizeList{T,A}) where {T,A}
len = length(x)
nc = nullcount(x)
validity = compress(Z, comp, x.validity)
@@ -132,10 +156,16 @@ function compress(Z::Meta.CompressionType.T, comp, x::FixedSizeList{T, A}) where
else
push!(children, compress(Z, comp, x.data))
end
- return Compressed{Z, typeof(x)}(x, buffers, len, nc, children)
+ return Compressed{Z,typeof(x)}(x, buffers, len, nc, children)
end
-function makenodesbuffers!(col::FixedSizeList{T, A}, fieldnodes, fieldbuffers, bufferoffset, alignment) where {T, A}
+function makenodesbuffers!(
+ col::FixedSizeList{T,A},
+ fieldnodes,
+ fieldbuffers,
+ bufferoffset,
+ alignment,
+) where {T,A}
len = length(col)
nc = nullcount(col)
push!(fieldnodes, FieldNode(len, nc))
@@ -151,12 +181,13 @@ function makenodesbuffers!(col::FixedSizeList{T, A}, fieldnodes, fieldbuffers, b
@debugv 1 "made field buffer: bufferidx = $(length(fieldbuffers)), offset = $(fieldbuffers[end].offset), len = $(fieldbuffers[end].length), padded = $(padding(fieldbuffers[end].length, alignment))"
bufferoffset += padding(blen, alignment)
else
- bufferoffset = makenodesbuffers!(col.data, fieldnodes, fieldbuffers, bufferoffset, alignment)
+ bufferoffset =
+ makenodesbuffers!(col.data, fieldnodes, fieldbuffers, bufferoffset, alignment)
end
return bufferoffset
end
-function writebuffer(io, col::FixedSizeList{T, A}, alignment) where {T, A}
+function writebuffer(io, col::FixedSizeList{T,A}, alignment) where {T,A}
@debugv 1 "writebuffer: col = $(typeof(col))"
@debugv 2 col
writebitmap(io, col, alignment)
diff --git a/src/arraytypes/list.jl b/src/arraytypes/list.jl
index 91f693d..8e2a1af 100644
--- a/src/arraytypes/list.jl
+++ b/src/arraytypes/list.jl
@@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-struct Offsets{T <: Union{Int32, Int64}} <: ArrowVector{Tuple{T, T}}
+struct Offsets{T<:Union{Int32,Int64}} <: ArrowVector{Tuple{T,T}}
arrow::Vector{UInt8} # need to hold a reference to arrow memory blob
offsets::Vector{T}
end
@@ -33,13 +33,13 @@ end
An `ArrowVector` where each element is a variable sized list of some kind, like an `AbstractVector` or `AbstractString`.
"""
-struct List{T, O, A} <: ArrowVector{T}
+struct List{T,O,A} <: ArrowVector{T}
arrow::Vector{UInt8} # need to hold a reference to arrow memory blob
validity::ValidityBitmap
offsets::Offsets{O}
data::A
ℓ::Int
- metadata::Union{Nothing, Base.ImmutableDict{String,String}}
+ metadata::Union{Nothing,Base.ImmutableDict{String,String}}
end
Base.size(l::List) = (l.ℓ,)
@@ -53,9 +53,12 @@ Base.size(l::List) = (l.ℓ,)
if ArrowTypes.isstringtype(K) || S <: Base.CodeUnits
if S !== T
if S <: Base.CodeUnits
- return l.validity[i] ? Base.CodeUnits(unsafe_string(pointer(l.data, lo), hi - lo + 1)) : missing
+ return l.validity[i] ?
+ Base.CodeUnits(unsafe_string(pointer(l.data, lo), hi - lo + 1)) :
+ missing
else
- return l.validity[i] ? ArrowTypes.fromarrow(T, pointer(l.data, lo), hi - lo + 1) : missing
+ return l.validity[i] ?
+ ArrowTypes.fromarrow(T, pointer(l.data, lo), hi - lo + 1) : missing
end
else
if S <: Base.CodeUnits
@@ -83,14 +86,14 @@ _codeunits(x::Base.CodeUnits) = x
# an AbstractVector version of Iterators.flatten
# code based on SentinelArrays.ChainedVector
-struct ToList{T, stringtype, A, I} <: AbstractVector{T}
+struct ToList{T,stringtype,A,I} <: AbstractVector{T}
data::Vector{A} # A is AbstractVector or AbstractString
inds::Vector{I}
end
-origtype(::ToList{T, S, A, I}) where {T, S, A, I} = A
-liststringtype(::Type{ToList{T, S, A, I}}) where {T, S, A, I} = S
-function liststringtype(::List{T, O, A}) where {T, O, A}
+origtype(::ToList{T,S,A,I}) where {T,S,A,I} = A
+liststringtype(::Type{ToList{T,S,A,I}}) where {T,S,A,I} = S
+function liststringtype(::List{T,O,A}) where {T,O,A}
ST = Base.nonmissingtype(T)
K = ArrowTypes.ArrowKind(ST)
return liststringtype(A) || ArrowTypes.isstringtype(K) || ST <: Base.CodeUnits # add the CodeUnits check for ArrowTypes compat for now
@@ -123,7 +126,7 @@ function ToList(input; largelists::Bool=false)
end
push!(inds, totalsize)
end
- return ToList{T, stringtype, AT, I}(data, inds)
+ return ToList{T,stringtype,AT,I}(data, inds)
end
Base.IndexStyle(::Type{<:ToList}) = Base.IndexLinear()
@@ -140,14 +143,21 @@ end
return chunk - 1, i - (@inbounds A.inds[chunk - 1])
end
-Base.@propagate_inbounds function Base.getindex(A::ToList{T, stringtype}, i::Integer) where {T, stringtype}
+Base.@propagate_inbounds function Base.getindex(
+ A::ToList{T,stringtype},
+ i::Integer,
+) where {T,stringtype}
@boundscheck checkbounds(A, i)
chunk, ix = index(A, i)
@inbounds x = A.data[chunk]
return @inbounds stringtype ? _codeunits(x)[ix] : x[ix]
end
-Base.@propagate_inbounds function Base.setindex!(A::ToList{T, stringtype}, v, i::Integer) where {T, stringtype}
+Base.@propagate_inbounds function Base.setindex!(
+ A::ToList{T,stringtype},
+ v,
+ i::Integer,
+) where {T,stringtype}
@boundscheck checkbounds(A, i)
chunk, ix = index(A, i)
@inbounds x = A.data[chunk]
@@ -160,7 +170,7 @@ Base.@propagate_inbounds function Base.setindex!(A::ToList{T, stringtype}, v, i:
end
# efficient iteration
-@inline function Base.iterate(A::ToList{T, stringtype}) where {T, stringtype}
+@inline function Base.iterate(A::ToList{T,stringtype}) where {T,stringtype}
length(A) == 0 && return nothing
i = 1
chunk = 2
@@ -187,7 +197,10 @@ end
return x, (i, chunk, chunk_i, chunk_len, length(A))
end
-@inline function Base.iterate(A::ToList{T, stringtype}, (i, chunk, chunk_i, chunk_len, len)) where {T, stringtype}
+@inline function Base.iterate(
+ A::ToList{T,stringtype},
+ (i, chunk, chunk_i, chunk_len, len),
+) where {T,stringtype}
i > len && return nothing
@inbounds val = A.data[chunk - 1]
@inbounds x = stringtype ? _codeunits(val)[chunk_i] : val[chunk_i]
@@ -217,13 +230,21 @@ function arrowvector(::ListKind, x, i, nl, fi, de, ded, meta; largelists::Bool=f
data = flat
T = origtype(flat)
else
- data = arrowvector(flat, i, nl + 1, fi, de, ded, nothing; largelists=largelists, kw...)
+ data =
+ arrowvector(flat, i, nl + 1, fi, de, ded, nothing; largelists=largelists, kw...)
T = withmissing(eltype(x), Vector{eltype(data)})
end
- return List{T, eltype(flat.inds), typeof(data)}(UInt8[], validity, offsets, data, len, meta)
+ return List{T,eltype(flat.inds),typeof(data)}(
+ UInt8[],
+ validity,
+ offsets,
+ data,
+ len,
+ meta,
+ )
end
-function compress(Z::Meta.CompressionType.T, comp, x::List{T, O, A}) where {T, O, A}
+function compress(Z::Meta.CompressionType.T, comp, x::List{T,O,A}) where {T,O,A}
len = length(x)
nc = nullcount(x)
validity = compress(Z, comp, x.validity)
@@ -235,5 +256,5 @@ function compress(Z::Meta.CompressionType.T, comp, x::List{T, O, A}) where {T, O
else
push!(children, compress(Z, comp, x.data))
end
- return Compressed{Z, typeof(x)}(x, buffers, len, nc, children)
+ return Compressed{Z,typeof(x)}(x, buffers, len, nc, children)
end
diff --git a/src/arraytypes/map.jl b/src/arraytypes/map.jl
index c117a2b..ebd4454 100644
--- a/src/arraytypes/map.jl
+++ b/src/arraytypes/map.jl
@@ -19,12 +19,12 @@
An `ArrowVector` where each element is a "map" of some kind, like a `Dict`.
"""
-struct Map{T, O, A} <: ArrowVector{T}
+struct Map{T,O,A} <: ArrowVector{T}
validity::ValidityBitmap
offsets::Offsets{O}
data::A
ℓ::Int
- metadata::Union{Nothing, Base.ImmutableDict{String,String}}
+ metadata::Union{Nothing,Base.ImmutableDict{String,String}}
end
Base.size(l::Map) = (l.ℓ,)
@@ -33,7 +33,11 @@ Base.size(l::Map) = (l.ℓ,)
@boundscheck checkbounds(l, i)
@inbounds lo, hi = l.offsets[i]
if Base.nonmissingtype(T) !== T
- return l.validity[i] ? ArrowTypes.fromarrow(T, Dict(x.key => x.value for x in view(l.data, lo:hi))) : missing
+ return l.validity[i] ?
+ ArrowTypes.fromarrow(
+ T,
+ Dict(x.key => x.value for x in view(l.data, lo:hi)),
+ ) : missing
else
return ArrowTypes.fromarrow(T, Dict(x.key => x.value for x in view(l.data, lo:hi)))
end
@@ -42,7 +46,7 @@ end
keyvalues(KT, ::Missing) = missing
keyvalues(KT, x::AbstractDict) = [KT(k, v) for (k, v) in pairs(x)]
-keyvaluetypes(::Type{NamedTuple{(:key, :value), Tuple{K, V}}}) where {K, V} = (K, V)
+keyvaluetypes(::Type{NamedTuple{(:key, :value),Tuple{K,V}}}) where {K,V} = (K, V)
arrowvector(::MapKind, x::Map, i, nl, fi, de, ded, meta; kw...) = x
@@ -52,19 +56,33 @@ function arrowvector(::MapKind, x, i, nl, fi, de, ded, meta; largelists::Bool=fa
ET = eltype(x)
DT = Base.nonmissingtype(ET)
KDT, VDT = keytype(DT), valtype(DT)
- ArrowTypes.concrete_or_concreteunion(KDT) || throw(ArgumentError("`keytype(d)` must be concrete to serialize map-like `d`, but `keytype(d) == $KDT`"))
- ArrowTypes.concrete_or_concreteunion(VDT) || throw(ArgumentError("`valtype(d)` must be concrete to serialize map-like `d`, but `valtype(d) == $VDT`"))
+ ArrowTypes.concrete_or_concreteunion(KDT) || throw(
+ ArgumentError(
+ "`keytype(d)` must be concrete to serialize map-like `d`, but `keytype(d) == $KDT`",
+ ),
+ )
+ ArrowTypes.concrete_or_concreteunion(VDT) || throw(
+ ArgumentError(
+ "`valtype(d)` must be concrete to serialize map-like `d`, but `valtype(d) == $VDT`",
+ ),
+ )
KT = KeyValue{KDT,VDT}
VT = Vector{KT}
- T = DT !== ET ? Union{Missing, VT} : VT
+ T = DT !== ET ? Union{Missing,VT} : VT
flat = ToList(T[keyvalues(KT, y) for y in x]; largelists=largelists)
offsets = Offsets(UInt8[], flat.inds)
data = arrowvector(flat, i, nl + 1, fi, de, ded, nothing; largelists=largelists, kw...)
K, V = keyvaluetypes(eltype(data))
- return Map{withmissing(ET, Dict{K, V}), eltype(flat.inds), typeof(data)}(validity, offsets, data, len, meta)
+ return Map{withmissing(ET, Dict{K,V}),eltype(flat.inds),typeof(data)}(
+ validity,
+ offsets,
+ data,
+ len,
+ meta,
+ )
end
-function compress(Z::Meta.CompressionType.T, comp, x::A) where {A <: Map}
+function compress(Z::Meta.CompressionType.T, comp, x::A) where {A<:Map}
len = length(x)
nc = nullcount(x)
validity = compress(Z, comp, x.validity)
@@ -72,10 +90,16 @@ function compress(Z::Meta.CompressionType.T, comp, x::A) where {A <: Map}
buffers = [validity, offsets]
children = Compressed[]
push!(children, compress(Z, comp, x.data))
- return Compressed{Z, A}(x, buffers, len, nc, children)
+ return Compressed{Z,A}(x, buffers, len, nc, children)
end
-function makenodesbuffers!(col::Union{Map{T, O, A}, List{T, O, A}}, fieldnodes, fieldbuffers, bufferoffset, alignment) where {T, O, A}
+function makenodesbuffers!(
+ col::Union{Map{T,O,A},List{T,O,A}},
+ fieldnodes,
+ fieldbuffers,
+ bufferoffset,
+ alignment,
+) where {T,O,A}
len = length(col)
nc = nullcount(col)
push!(fieldnodes, FieldNode(len, nc))
@@ -96,12 +120,13 @@ function makenodesbuffers!(col::Union{Map{T, O, A}, List{T, O, A}}, fieldnodes,
@debugv 1 "made field buffer: bufferidx = $(length(fieldbuffers)), offset = $(fieldbuffers[end].offset), len = $(fieldbuffers[end].length), padded = $(padding(fieldbuffers[end].length, alignment))"
bufferoffset += padding(blen, alignment)
else
- bufferoffset = makenodesbuffers!(col.data, fieldnodes, fieldbuffers, bufferoffset, alignment)
+ bufferoffset =
+ makenodesbuffers!(col.data, fieldnodes, fieldbuffers, bufferoffset, alignment)
end
return bufferoffset
end
-function writebuffer(io, col::Union{Map{T, O, A}, List{T, O, A}}, alignment) where {T, O, A}
+function writebuffer(io, col::Union{Map{T,O,A},List{T,O,A}}, alignment) where {T,O,A}
@debugv 1 "writebuffer: col = $(typeof(col))"
@debugv 2 col
writebitmap(io, col, alignment)
diff --git a/src/arraytypes/primitive.jl b/src/arraytypes/primitive.jl
index 7d9f61b..c187c77 100644
--- a/src/arraytypes/primitive.jl
+++ b/src/arraytypes/primitive.jl
@@ -19,20 +19,20 @@
An `ArrowVector` where each element is a "fixed size" scalar of some kind, like an integer, float, decimal, or time type.
"""
-struct Primitive{T, A} <: ArrowVector{T}
+struct Primitive{T,A} <: ArrowVector{T}
arrow::Vector{UInt8} # need to hold a reference to arrow memory blob
validity::ValidityBitmap
data::A
ℓ::Int64
- metadata::Union{Nothing, Base.ImmutableDict{String,String}}
+ metadata::Union{Nothing,Base.ImmutableDict{String,String}}
end
-Primitive(::Type{T}, b::Vector{UInt8}, v::ValidityBitmap, data::A, l, meta) where {T, A} =
- Primitive{T, A}(b, v, data, l, meta)
+Primitive(::Type{T}, b::Vector{UInt8}, v::ValidityBitmap, data::A, l, meta) where {T,A} =
+ Primitive{T,A}(b, v, data, l, meta)
Base.size(p::Primitive) = (p.ℓ,)
-function Base.copy(p::Primitive{T, A}) where {T, A}
+function Base.copy(p::Primitive{T,A}) where {T,A}
if nullcount(p) == 0 && T === eltype(A)
return copy(p.data)
else
@@ -70,15 +70,21 @@ function arrowvector(::PrimitiveKind, x, i, nl, fi, de, ded, meta; kw...)
return Primitive(eltype(x), UInt8[], validity, x, length(x), meta)
end
-function compress(Z::Meta.CompressionType.T, comp, p::P) where {P <: Primitive}
+function compress(Z::Meta.CompressionType.T, comp, p::P) where {P<:Primitive}
len = length(p)
nc = nullcount(p)
validity = compress(Z, comp, p.validity)
data = compress(Z, comp, p.data)
- return Compressed{Z, P}(p, [validity, data], len, nc, Compressed[])
+ return Compressed{Z,P}(p, [validity, data], len, nc, Compressed[])
end
-function makenodesbuffers!(col::Primitive{T}, fieldnodes, fieldbuffers, bufferoffset, alignment) where {T}
+function makenodesbuffers!(
+ col::Primitive{T},
+ fieldnodes,
+ fieldbuffers,
+ bufferoffset,
+ alignment,
+) where {T}
len = length(col)
nc = nullcount(col)
push!(fieldnodes, FieldNode(len, nc))
diff --git a/src/arraytypes/struct.jl b/src/arraytypes/struct.jl
index 2b38847..4ad9752 100644
--- a/src/arraytypes/struct.jl
+++ b/src/arraytypes/struct.jl
@@ -19,11 +19,11 @@
An `ArrowVector` where each element is a "struct" of some kind with ordered, named fields, like a `NamedTuple{names, types}` or regular julia `struct`.
"""
-struct Struct{T, S} <: ArrowVector{T}
+struct Struct{T,S} <: ArrowVector{T}
validity::ValidityBitmap
data::S # Tuple of ArrowVector
ℓ::Int
- metadata::Union{Nothing, Base.ImmutableDict{String,String}}
+ metadata::Union{Nothing,Base.ImmutableDict{String,String}}
end
Base.size(s::Struct) = (s.ℓ,)
@@ -38,13 +38,15 @@ istuple(T) = false
NT = Base.nonmissingtype(T)
if isnamedtuple(NT) || istuple(NT)
if NT !== T
- return s.validity[i] ? NT(ntuple(j->s.data[j][i], fieldcount(S))) : missing
+ return s.validity[i] ? NT(ntuple(j -> s.data[j][i], fieldcount(S))) : missing
else
- return NT(ntuple(j->s.data[j][i], fieldcount(S)))
+ return NT(ntuple(j -> s.data[j][i], fieldcount(S)))
end
else
if NT !== T
- return s.validity[i] ? ArrowTypes.fromarrow(NT, (s.data[j][i] for j = 1:fieldcount(S))...) : missing
+ return s.validity[i] ?
+ ArrowTypes.fromarrow(NT, (s.data[j][i] for j = 1:fieldcount(S))...) :
+ missing
else
return ArrowTypes.fromarrow(NT, (s.data[j][i] for j = 1:fieldcount(S))...)
end
@@ -65,16 +67,17 @@ end
# return v
# end
-struct ToStruct{T, i, A} <: AbstractVector{T}
+struct ToStruct{T,i,A} <: AbstractVector{T}
data::A # eltype is NamedTuple or some struct
end
-ToStruct(x::A, j::Integer) where {A} = ToStruct{fieldtype(Base.nonmissingtype(eltype(A)), j), j, A}(x)
+ToStruct(x::A, j::Integer) where {A} =
+ ToStruct{fieldtype(Base.nonmissingtype(eltype(A)), j),j,A}(x)
Base.IndexStyle(::Type{<:ToStruct}) = Base.IndexLinear()
Base.size(x::ToStruct) = (length(x.data),)
-Base.@propagate_inbounds function Base.getindex(A::ToStruct{T, j}, i::Integer) where {T, j}
+Base.@propagate_inbounds function Base.getindex(A::ToStruct{T,j}, i::Integer) where {T,j}
@boundscheck checkbounds(A, i)
@inbounds x = A.data[i]
return x === missing ? ArrowTypes.default(T) : getfield(x, j)
@@ -82,19 +85,30 @@ end
arrowvector(::StructKind, x::Struct, i, nl, fi, de, ded, meta; kw...) = x
-namedtupletype(::Type{NamedTuple{names, types}}, data) where {names, types} = NamedTuple{names, Tuple{(eltype(x) for x in data)...}}
-namedtupletype(::Type{T}, data) where {T} = NamedTuple{fieldnames(T), Tuple{(eltype(x) for x in data)...}}
-namedtupletype(::Type{T}, data) where {T <: Tuple} = NamedTuple{map(Symbol, fieldnames(T)), Tuple{(eltype(x) for x in data)...}}
+namedtupletype(::Type{NamedTuple{names,types}}, data) where {names,types} =
+ NamedTuple{names,Tuple{(eltype(x) for x in data)...}}
+namedtupletype(::Type{T}, data) where {T} =
+ NamedTuple{fieldnames(T),Tuple{(eltype(x) for x in data)...}}
+namedtupletype(::Type{T}, data) where {T<:Tuple} =
+ NamedTuple{map(Symbol, fieldnames(T)),Tuple{(eltype(x) for x in data)...}}
function arrowvector(::StructKind, x, i, nl, fi, de, ded, meta; kw...)
len = length(x)
validity = ValidityBitmap(x)
T = Base.nonmissingtype(eltype(x))
- data = Tuple(arrowvector(ToStruct(x, j), i, nl + 1, j, de, ded, nothing; kw...) for j = 1:fieldcount(T))
- return Struct{withmissing(eltype(x), namedtupletype(T, data)), typeof(data)}(validity, data, len, meta)
+ data = Tuple(
+ arrowvector(ToStruct(x, j), i, nl + 1, j, de, ded, nothing; kw...) for
+ j = 1:fieldcount(T)
+ )
+ return Struct{withmissing(eltype(x), namedtupletype(T, data)),typeof(data)}(
+ validity,
+ data,
+ len,
+ meta,
+ )
end
-function compress(Z::Meta.CompressionType.T, comp, x::A) where {A <: Struct}
+function compress(Z::Meta.CompressionType.T, comp, x::A) where {A<:Struct}
len = length(x)
nc = nullcount(x)
validity = compress(Z, comp, x.validity)
@@ -103,10 +117,16 @@ function compress(Z::Meta.CompressionType.T, comp, x::A) where {A <: Struct}
for y in x.data
push!(children, compress(Z, comp, y))
end
- return Compressed{Z, A}(x, buffers, len, nc, children)
+ return Compressed{Z,A}(x, buffers, len, nc, children)
end
-function makenodesbuffers!(col::Struct{T}, fieldnodes, fieldbuffers, bufferoffset, alignment) where {T}
+function makenodesbuffers!(
+ col::Struct{T},
+ fieldnodes,
+ fieldbuffers,
+ bufferoffset,
+ alignment,
+) where {T}
len = length(col)
nc = nullcount(col)
push!(fieldnodes, FieldNode(len, nc))
@@ -117,7 +137,8 @@ function makenodesbuffers!(col::Struct{T}, fieldnodes, fieldbuffers, bufferoffse
@debugv 1 "made field buffer: bufferidx = $(length(fieldbuffers)), offset = $(fieldbuffers[end].offset), len = $(fieldbuffers[end].length), padded = $(padding(fieldbuffers[end].length, alignment))"
bufferoffset += blen
for child in col.data
- bufferoffset = makenodesbuffers!(child, fieldnodes, fieldbuffers, bufferoffset, alignment)
+ bufferoffset =
+ makenodesbuffers!(child, fieldnodes, fieldbuffers, bufferoffset, alignment)
end
return bufferoffset
end
diff --git a/src/arraytypes/unions.jl b/src/arraytypes/unions.jl
index d759fbf..a688055 100644
--- a/src/arraytypes/unions.jl
+++ b/src/arraytypes/unions.jl
@@ -20,13 +20,13 @@
# here, T is Meta.UnionMode.Dense or Meta.UnionMode.Sparse,
# typeIds is a NTuple{N, Int32}, and U is a Tuple{...} of the
# unioned types
-struct UnionT{T, typeIds, U}
-end
+struct UnionT{T,typeIds,U} end
-unionmode(::Type{UnionT{T, typeIds, U}}) where {T, typeIds, U} = T
-typeids(::Type{UnionT{T, typeIds, U}}) where {T, typeIds, U} = typeIds
-Base.eltype(::Type{UnionT{T, typeIds, U}}) where {T, typeIds, U} = U
-uniontypewith(::Type{UnionT{T, typeIds, U}}, ::Type{U2}) where {T, typeIds, U, U2 <: Tuple} = UnionT{T, typeIds, U2}
+unionmode(::Type{UnionT{T,typeIds,U}}) where {T,typeIds,U} = T
+typeids(::Type{UnionT{T,typeIds,U}}) where {T,typeIds,U} = typeIds
+Base.eltype(::Type{UnionT{T,typeIds,U}}) where {T,typeIds,U} = U
+uniontypewith(::Type{UnionT{T,typeIds,U}}, ::Type{U2}) where {T,typeIds,U,U2<:Tuple} =
+ UnionT{T,typeIds,U2}
ArrowTypes.ArrowKind(::Type{<:UnionT}) = ArrowTypes.UnionKind()
@@ -46,8 +46,9 @@ function eachunion(T, elems)
end
# produce typeIds, offsets, data tuple for DenseUnion
-isatypeid(x::T, ::Type{types}) where {T, types} = isatypeid(x, fieldtype(types, 1), types, 1)
-isatypeid(x::T, ::Type{S}, ::Type{types}, i) where {T, S, types} = x isa S ? i : isatypeid(x, fieldtype(types, i + 1), types, i + 1)
+isatypeid(x::T, ::Type{types}) where {T,types} = isatypeid(x, fieldtype(types, 1), types, 1)
+isatypeid(x::T, ::Type{S}, ::Type{types}, i) where {T,S,types} =
+ x isa S ? i : isatypeid(x, fieldtype(types, i + 1), types, i + 1)
"""
Arrow.DenseUnion
@@ -57,19 +58,22 @@ An `Arrow.DenseUnion`, in comparison to `Arrow.SparseUnion`, stores elements in
array, where each offset element is the index into one of the typed arrays. This allows a sort of "compression", where no extra space is
used/allocated to store all the elements.
"""
-struct DenseUnion{T, U, S} <: ArrowVector{T}
+struct DenseUnion{T,U,S} <: ArrowVector{T}
arrow::Vector{UInt8} # need to hold a reference to arrow memory blob
arrow2::Vector{UInt8} # if arrow blob is compressed, need a 2nd reference for uncompressed offsets bytes
typeIds::Vector{UInt8}
offsets::Vector{Int32}
data::S # Tuple of ArrowVector
- metadata::Union{Nothing, Base.ImmutableDict{String,String}}
+ metadata::Union{Nothing,Base.ImmutableDict{String,String}}
end
Base.size(s::DenseUnion) = size(s.typeIds)
nullcount(x::DenseUnion) = 0 # DenseUnion has no validity bitmap; only children do
-@propagate_inbounds function Base.getindex(s::DenseUnion{T, UnionT{M, typeIds, U}}, i::Integer) where {T, M, typeIds, U}
+@propagate_inbounds function Base.getindex(
+ s::DenseUnion{T,UnionT{M,typeIds,U}},
+ i::Integer,
+) where {T,M,typeIds,U}
@boundscheck checkbounds(s, i)
@inbounds typeId = s.typeIds[i]
@inbounds off = s.offsets[i]
@@ -93,22 +97,25 @@ end
# convenience wrappers for signaling that an array shoudld be written
# as with dense/sparse union arrow buffers
-struct DenseUnionVector{T, U} <: AbstractVector{UnionT{Meta.UnionMode.Dense, nothing, U}}
+struct DenseUnionVector{T,U} <: AbstractVector{UnionT{Meta.UnionMode.Dense,nothing,U}}
itr::T
end
-DenseUnionVector(x::T) where {T} = DenseUnionVector{T, Tuple{eachunion(eltype(x))...}}(x)
+DenseUnionVector(x::T) where {T} = DenseUnionVector{T,Tuple{eachunion(eltype(x))...}}(x)
Base.IndexStyle(::Type{<:DenseUnionVector}) = Base.IndexLinear()
Base.size(x::DenseUnionVector) = (length(x.itr),)
Base.iterate(x::DenseUnionVector, st...) = iterate(x.itr, st...)
Base.getindex(x::DenseUnionVector, i::Int) = getindex(x.itr, i)
-function todense(::Type{UnionT{T, typeIds, U}}, x) where {T, typeIds, U}
+function todense(::Type{UnionT{T,typeIds,U}}, x) where {T,typeIds,U}
typeids = typeIds === nothing ? (0:(fieldcount(U) - 1)) : typeIds
len = length(x)
types = Vector{UInt8}(undef, len)
offsets = Vector{Int32}(undef, len)
- data = Tuple(Vector{i == 1 ? Union{Missing, fieldtype(U, i)} : fieldtype(U, i)}(undef, 0) for i = 1:fieldcount(U))
+ data = Tuple(
+ Vector{i == 1 ? Union{Missing,fieldtype(U, i)} : fieldtype(U, i)}(undef, 0) for
+ i = 1:fieldcount(U)
+ )
for (i, y) in enumerate(x)
typeid = y === missing ? 0x00 : UInt8(typeids[isatypeid(y, U)])
@inbounds types[i] = typeid
@@ -118,11 +125,11 @@ function todense(::Type{UnionT{T, typeIds, U}}, x) where {T, typeIds, U}
return types, offsets, data
end
-struct SparseUnionVector{T, U} <: AbstractVector{UnionT{Meta.UnionMode.Sparse, nothing, U}}
+struct SparseUnionVector{T,U} <: AbstractVector{UnionT{Meta.UnionMode.Sparse,nothing,U}}
itr::T
end
-SparseUnionVector(x::T) where {T} = SparseUnionVector{T, Tuple{eachunion(eltype(x))...}}(x)
+SparseUnionVector(x::T) where {T} = SparseUnionVector{T,Tuple{eachunion(eltype(x))...}}(x)
Base.IndexStyle(::Type{<:SparseUnionVector}) = Base.IndexLinear()
Base.size(x::SparseUnionVector) = (length(x.itr),)
Base.iterate(x::SparseUnionVector, st...) = iterate(x.itr, st...)
@@ -134,7 +141,7 @@ Base.getindex(x::SparseUnionVector, i::Int) = getindex(x.itr, i)
# but with one child array per unioned type; each child
# should include the elements from parent of its type
# and other elements can be missing/default
-function sparsetypeids(::Type{UnionT{T, typeIds, U}}, x) where {T, typeIds, U}
+function sparsetypeids(::Type{UnionT{T,typeIds,U}}, x) where {T,typeIds,U}
typeids = typeIds === nothing ? (0:(fieldcount(U) - 1)) : typeIds
len = length(x)
types = Vector{UInt8}(undef, len)
@@ -145,11 +152,11 @@ function sparsetypeids(::Type{UnionT{T, typeIds, U}}, x) where {T, typeIds, U}
return types
end
-struct ToSparseUnion{T, A} <: AbstractVector{T}
+struct ToSparseUnion{T,A} <: AbstractVector{T}
data::A
end
-ToSparseUnion(::Type{T}, data::A) where {T, A} = ToSparseUnion{T, A}(data)
+ToSparseUnion(::Type{T}, data::A) where {T,A} = ToSparseUnion{T,A}(data)
Base.IndexStyle(::Type{<:ToSparseUnion}) = Base.IndexLinear()
Base.size(x::ToSparseUnion) = (length(x.data),)
@@ -160,7 +167,7 @@ Base.@propagate_inbounds function Base.getindex(A::ToSparseUnion{T}, i::Integer)
return @inbounds x isa T ? x : ArrowTypes.default(T)
end
-function compress(Z::Meta.CompressionType.T, comp, x::A) where {A <: DenseUnion}
+function compress(Z::Meta.CompressionType.T, comp, x::A) where {A<:DenseUnion}
len = length(x)
nc = nullcount(x)
typeIds = compress(Z, comp, x.typeIds)
@@ -170,7 +177,7 @@ function compress(Z::Meta.CompressionType.T, comp, x::A) where {A <: DenseUnion}
for y in x.data
push!(children, compress(Z, comp, y))
end
- return Compressed{Z, A}(x, buffers, len, nc, children)
+ return Compressed{Z,A}(x, buffers, len, nc, children)
end
"""
@@ -181,17 +188,20 @@ An `Arrow.SparseUnion`, in comparison to `Arrow.DenseUnion`, stores elements in
array has the same length as the full array. This ends up with "wasted" space, since only one slot among the typed arrays is valid per full
array element, but can allow for certain optimizations when each typed array has the same length.
"""
-struct SparseUnion{T, U, S} <: ArrowVector{T}
+struct SparseUnion{T,U,S} <: ArrowVector{T}
arrow::Vector{UInt8} # need to hold a reference to arrow memory blob
typeIds::Vector{UInt8}
data::S # Tuple of ArrowVector
- metadata::Union{Nothing, Base.ImmutableDict{String,String}}
+ metadata::Union{Nothing,Base.ImmutableDict{String,String}}
end
Base.size(s::SparseUnion) = size(s.typeIds)
nullcount(x::SparseUnion) = 0
-@propagate_inbounds function Base.getindex(s::SparseUnion{T, UnionT{M, typeIds, U}}, i::Integer) where {T, M, typeIds, U}
+@propagate_inbounds function Base.getindex(
+ s::SparseUnion{T,UnionT{M,typeIds,U}},
+ i::Integer,
+) where {T,M,typeIds,U}
@boundscheck checkbounds(s, i)
@inbounds typeId = s.typeIds[i]
@inbounds x = s.data[typeId + 1][i]
@@ -208,28 +218,74 @@ end
# end
arrowvector(U::Union, x, i, nl, fi, de, ded, meta; denseunions::Bool=true, kw...) =
- arrowvector(denseunions ? DenseUnionVector(x) : SparseUnionVector(x), i, nl, fi, de, ded, meta; denseunions=denseunions, kw...)
-
-arrowvector(::UnionKind, x::Union{DenseUnion, SparseUnion}, i, nl, fi, de, ded, meta; kw...) = x
+ arrowvector(
+ denseunions ? DenseUnionVector(x) : SparseUnionVector(x),
+ i,
+ nl,
+ fi,
+ de,
+ ded,
+ meta;
+ denseunions=denseunions,
+ kw...,
+ )
+
+arrowvector(
+ ::UnionKind,
+ x::Union{DenseUnion,SparseUnion},
+ i,
+ nl,
+ fi,
+ de,
+ ded,
+ meta;
+ kw...,
+) = x
function arrowvector(::UnionKind, x, i, nl, fi, de, ded, meta; kw...)
UT = eltype(x)
if unionmode(UT) == Meta.UnionMode.Dense
x = x isa DenseUnionVector ? x.itr : x
typeids, offsets, data = todense(UT, x)
- data2 = map(y -> arrowvector(y[2], i, nl + 1, y[1], de, ded, nothing; kw...), enumerate(data))
+ data2 = map(
+ y -> arrowvector(y[2], i, nl + 1, y[1], de, ded, nothing; kw...),
+ enumerate(data),
+ )
UT2 = uniontypewith(UT, Tuple{(eltype(x) for x in data2)...})
- return DenseUnion{Union{(eltype(x) for x in data2)...}, UT2, typeof(data2)}(UInt8[], UInt8[], typeids, offsets, data2, meta)
+ return DenseUnion{Union{(eltype(x) for x in data2)...},UT2,typeof(data2)}(
+ UInt8[],
+ UInt8[],
+ typeids,
+ offsets,
+ data2,
+ meta,
+ )
else
x = x isa SparseUnionVector ? x.itr : x
typeids = sparsetypeids(UT, x)
- data3 = Tuple(arrowvector(ToSparseUnion(fieldtype(eltype(UT), j), x), i, nl + 1, j, de, ded, nothing; kw...) for j = 1:fieldcount(eltype(UT)))
+ data3 = Tuple(
+ arrowvector(
+ ToSparseUnion(fieldtype(eltype(UT), j), x),
+ i,
+ nl + 1,
+ j,
+ de,
+ ded,
+ nothing;
+ kw...,
+ ) for j = 1:fieldcount(eltype(UT))
+ )
UT2 = uniontypewith(UT, Tuple{(eltype(x) for x in data3)...})
- return SparseUnion{Union{(eltype(x) for x in data3)...}, UT2, typeof(data3)}(UInt8[], typeids, data3, meta)
+ return SparseUnion{Union{(eltype(x) for x in data3)...},UT2,typeof(data3)}(
+ UInt8[],
+ typeids,
+ data3,
+ meta,
+ )
end
end
-function compress(Z::Meta.CompressionType.T, comp, x::A) where {A <: SparseUnion}
+function compress(Z::Meta.CompressionType.T, comp, x::A) where {A<:SparseUnion}
len = length(x)
nc = nullcount(x)
typeIds = compress(Z, comp, x.typeIds)
@@ -238,10 +294,16 @@ function compress(Z::Meta.CompressionType.T, comp, x::A) where {A <: SparseUnion
for y in x.data
push!(children, compress(Z, comp, y))
end
- return Compressed{Z, A}(x, buffers, len, nc, children)
+ return Compressed{Z,A}(x, buffers, len, nc, children)
end
-function makenodesbuffers!(col::Union{DenseUnion, SparseUnion}, fieldnodes, fieldbuffers, bufferoffset, alignment)
+function makenodesbuffers!(
+ col::Union{DenseUnion,SparseUnion},
+ fieldnodes,
+ fieldbuffers,
+ bufferoffset,
+ alignment,
+)
len = length(col)
nc = nullcount(col)
push!(fieldnodes, FieldNode(len, nc))
@@ -258,12 +320,13 @@ function makenodesbuffers!(col::Union{DenseUnion, SparseUnion}, fieldnodes, fiel
bufferoffset += padding(blen, alignment)
end
for child in col.data
- bufferoffset = makenodesbuffers!(child, fieldnodes, fieldbuffers, bufferoffset, alignment)
+ bufferoffset =
+ makenodesbuffers!(child, fieldnodes, fieldbuffers, bufferoffset, alignment)
end
return bufferoffset
end
-function writebuffer(io, col::Union{DenseUnion, SparseUnion}, alignment)
+function writebuffer(io, col::Union{DenseUnion,SparseUnion}, alignment)
@debugv 1 "writebuffer: col = $(typeof(col))"
@debugv 2 col
# typeIds buffer
diff --git a/src/eltypes.jl b/src/eltypes.jl
index 8fae026..ffc53c0 100644
--- a/src/eltypes.jl
+++ b/src/eltypes.jl
@@ -22,7 +22,7 @@ function juliaeltype end
finaljuliatype(T) = T
finaljuliatype(::Type{Missing}) = Missing
-finaljuliatype(::Type{Union{T, Missing}}) where {T} = Union{Missing, finaljuliatype(T)}
+finaljuliatype(::Type{Union{T,Missing}}) where {T} = Union{Missing,finaljuliatype(T)}
"""
Given a FlatBuffers.Builder and a Julia column or column eltype,
@@ -39,7 +39,7 @@ function juliaeltype(f::Meta.Field, ::Nothing, convert::Bool)
return convert ? finaljuliatype(T) : T
end
-function juliaeltype(f::Meta.Field, meta::AbstractDict{String, String}, convert::Bool)
+function juliaeltype(f::Meta.Field, meta::AbstractDict{String,String}, convert::Bool)
TT = juliaeltype(f, convert)
!convert && return TT
T = finaljuliatype(TT)
@@ -48,9 +48,10 @@ function juliaeltype(f::Meta.Field, meta::AbstractDict{String, String}, convert:
metadata = get(meta, "ARROW:extension:metadata", "")
JT = ArrowTypes.JuliaType(Val(Symbol(typename)), maybemissing(TT), metadata)
if JT !== nothing
- return f.nullable ? Union{JT, Missing} : JT
+ return f.nullable ? Union{JT,Missing} : JT
else
- @warn "unsupported ARROW:extension:name type: \"$typename\", arrow type = $TT" maxlog=1 _id=hash((:juliaeltype, typename, TT))
+ @warn "unsupported ARROW:extension:name type: \"$typename\", arrow type = $TT" maxlog =
+ 1 _id = hash((:juliaeltype, typename, TT))
end
end
return something(TT, T)
@@ -58,7 +59,7 @@ end
function juliaeltype(f::Meta.Field, convert::Bool)
T = juliaeltype(f, f.type, convert)
- return f.nullable ? Union{T, Missing} : T
+ return f.nullable ? Union{T,Missing} : T
end
juliaeltype(f::Meta.Field, ::Meta.Null, convert) = Missing
@@ -100,7 +101,7 @@ function juliaeltype(f::Meta.Field, int::Meta.Int, convert)
end
end
-function arrowtype(b, ::Type{T}) where {T <: Integer}
+function arrowtype(b, ::Type{T}) where {T<:Integer}
Meta.intStart(b)
Meta.intAddBitWidth(b, Int32(8 * sizeof(T)))
Meta.intAddIsSigned(b, T <: Signed)
@@ -118,23 +119,28 @@ function juliaeltype(f::Meta.Field, fp::Meta.FloatingPoint, convert)
end
end
-function arrowtype(b, ::Type{T}) where {T <: AbstractFloat}
+function arrowtype(b, ::Type{T}) where {T<:AbstractFloat}
Meta.floatingPointStart(b)
- Meta.floatingPointAddPrecision(b, T === Float16 ? Meta.Precision.HALF : T === Float32 ? Meta.Precision.SINGLE : Meta.Precision.DOUBLE)
+ Meta.floatingPointAddPrecision(
+ b,
+ T === Float16 ? Meta.Precision.HALF :
+ T === Float32 ? Meta.Precision.SINGLE : Meta.Precision.DOUBLE,
+ )
return Meta.FloatingPoint, Meta.floatingPointEnd(b), nothing
end
-juliaeltype(f::Meta.Field, b::Union{Meta.Utf8, Meta.LargeUtf8}, convert) = String
+juliaeltype(f::Meta.Field, b::Union{Meta.Utf8,Meta.LargeUtf8}, convert) = String
datasizeof(x) = sizeof(x)
datasizeof(x::AbstractVector) = sum(datasizeof, x)
-juliaeltype(f::Meta.Field, b::Union{Meta.Binary, Meta.LargeBinary}, convert) = Base.CodeUnits
+juliaeltype(f::Meta.Field, b::Union{Meta.Binary,Meta.LargeBinary}, convert) = Base.CodeUnits
-juliaeltype(f::Meta.Field, x::Meta.FixedSizeBinary, convert) = NTuple{Int(x.byteWidth), UInt8}
+juliaeltype(f::Meta.Field, x::Meta.FixedSizeBinary, convert) =
+ NTuple{Int(x.byteWidth),UInt8}
# arggh!
-Base.write(io::IO, x::NTuple{N, T}) where {N, T} = sum(y -> Base.write(io, y), x)
+Base.write(io::IO, x::NTuple{N,T}) where {N,T} = sum(y -> Base.write(io, y), x)
juliaeltype(f::Meta.Field, x::Meta.Bool, convert) = Bool
@@ -143,21 +149,21 @@ function arrowtype(b, ::Type{Bool})
return Meta.Bool, Meta.boolEnd(b), nothing
end
-struct Decimal{P, S, T}
+struct Decimal{P,S,T}
value::T # only Int128 or Int256
end
-Base.zero(::Type{Decimal{P, S, T}}) where {P, S, T} = Decimal{P, S, T}(T(0))
-==(a::Decimal{P, S, T}, b::Decimal{P, S, T}) where {P, S, T} = ==(a.value, b.value)
-Base.isequal(a::Decimal{P, S, T}, b::Decimal{P, S, T}) where {P, S, T} = isequal(a.value, b.value)
+Base.zero(::Type{Decimal{P,S,T}}) where {P,S,T} = Decimal{P,S,T}(T(0))
+==(a::Decimal{P,S,T}, b::Decimal{P,S,T}) where {P,S,T} = ==(a.value, b.value)
+Base.isequal(a::Decimal{P,S,T}, b::Decimal{P,S,T}) where {P,S,T} = isequal(a.value, b.value)
function juliaeltype(f::Meta.Field, x::Meta.Decimal, convert)
- return Decimal{x.precision, x.scale, x.bitWidth == 256 ? Int256 : Int128}
+ return Decimal{x.precision,x.scale,x.bitWidth == 256 ? Int256 : Int128}
end
ArrowTypes.ArrowKind(::Type{<:Decimal}) = PrimitiveKind()
-function arrowtype(b, ::Type{Decimal{P, S, T}}) where {P, S, T}
+function arrowtype(b, ::Type{Decimal{P,S,T}}) where {P,S,T}
Meta.decimalStart(b)
Meta.decimalAddPrecision(b, Int32(P))
Meta.decimalAddScale(b, Int32(S))
@@ -171,24 +177,26 @@ abstract type ArrowTimeType end
Base.write(io::IO, x::ArrowTimeType) = Base.write(io, x.x)
ArrowTypes.ArrowKind(::Type{<:ArrowTimeType}) = PrimitiveKind()
-struct Date{U, T} <: ArrowTimeType
+struct Date{U,T} <: ArrowTimeType
x::T
end
-const DATE = Date{Meta.DateUnit.DAY, Int32}
-Base.zero(::Type{Date{U, T}}) where {U, T} = Date{U, T}(T(0))
-storagetype(::Type{Date{U, T}}) where {U, T} = T
+const DATE = Date{Meta.DateUnit.DAY,Int32}
+Base.zero(::Type{Date{U,T}}) where {U,T} = Date{U,T}(T(0))
+storagetype(::Type{Date{U,T}}) where {U,T} = T
bitwidth(x::Meta.DateUnit.T) = x == Meta.DateUnit.DAY ? Int32 : Int64
Date{Meta.DateUnit.DAY}(days) = DATE(Int32(days))
-Date{Meta.DateUnit.MILLISECOND}(ms) = Date{Meta.DateUnit.MILLISECOND, Int64}(Int64(ms))
+Date{Meta.DateUnit.MILLISECOND}(ms) = Date{Meta.DateUnit.MILLISECOND,Int64}(Int64(ms))
-juliaeltype(f::Meta.Field, x::Meta.Date, convert) = Date{x.unit, bitwidth(x.unit)}
+juliaeltype(f::Meta.Field, x::Meta.Date, convert) = Date{x.unit,bitwidth(x.unit)}
finaljuliatype(::Type{DATE}) = Dates.Date
-Base.convert(::Type{Dates.Date}, x::DATE) = Dates.Date(Dates.UTD(Int64(x.x + UNIX_EPOCH_DATE)))
-finaljuliatype(::Type{Date{Meta.DateUnit.MILLISECOND, Int64}}) = Dates.DateTime
-Base.convert(::Type{Dates.DateTime}, x::Date{Meta.DateUnit.MILLISECOND, Int64}) = Dates.DateTime(Dates.UTM(Int64(x.x + UNIX_EPOCH_DATETIME)))
+Base.convert(::Type{Dates.Date}, x::DATE) =
+ Dates.Date(Dates.UTD(Int64(x.x + UNIX_EPOCH_DATE)))
+finaljuliatype(::Type{Date{Meta.DateUnit.MILLISECOND,Int64}}) = Dates.DateTime
+Base.convert(::Type{Dates.DateTime}, x::Date{Meta.DateUnit.MILLISECOND,Int64}) =
+ Dates.DateTime(Dates.UTM(Int64(x.x + UNIX_EPOCH_DATETIME)))
-function arrowtype(b, ::Type{Date{U, T}}) where {U, T}
+function arrowtype(b, ::Type{Date{U,T}}) where {U,T}
Meta.dateStart(b)
Meta.dateAddUnit(b, U)
return Meta.Date, Meta.dateEnd(b), nothing
@@ -198,7 +206,8 @@ const UNIX_EPOCH_DATE = Dates.value(Dates.Date(1970))
Base.convert(::Type{DATE}, x::Dates.Date) = DATE(Int32(Dates.value(x) - UNIX_EPOCH_DATE))
const UNIX_EPOCH_DATETIME = Dates.value(Dates.DateTime(1970))
-Base.convert(::Type{Date{Meta.DateUnit.MILLISECOND, Int64}}, x::Dates.DateTime) = Date{Meta.DateUnit.MILLISECOND, Int64}(Int64(Dates.value(x) - UNIX_EPOCH_DATETIME))
+Base.convert(::Type{Date{Meta.DateUnit.MILLISECOND,Int64}}, x::Dates.DateTime) =
+ Date{Meta.DateUnit.MILLISECOND,Int64}(Int64(Dates.value(x) - UNIX_EPOCH_DATETIME))
ArrowTypes.ArrowType(::Type{Dates.Date}) = DATE
ArrowTypes.toarrow(x::Dates.Date) = convert(DATE, x)
@@ -206,26 +215,29 @@ const DATE_SYMBOL = Symbol("JuliaLang.Date")
ArrowTypes.arrowname(::Type{Dates.Date}) = DATE_SYMBOL
ArrowTypes.JuliaType(::Val{DATE_SYMBOL}, S) = Dates.Date
ArrowTypes.fromarrow(::Type{Dates.Date}, x::DATE) = convert(Dates.Date, x)
-ArrowTypes.default(::Type{Dates.Date}) = Dates.Date(1,1,1)
+ArrowTypes.default(::Type{Dates.Date}) = Dates.Date(1, 1, 1)
-struct Time{U, T} <: ArrowTimeType
+struct Time{U,T} <: ArrowTimeType
x::T
end
-Base.zero(::Type{Time{U, T}}) where {U, T} = Time{U, T}(T(0))
-const TIME = Time{Meta.TimeUnit.NANOSECOND, Int64}
+Base.zero(::Type{Time{U,T}}) where {U,T} = Time{U,T}(T(0))
+const TIME = Time{Meta.TimeUnit.NANOSECOND,Int64}
-bitwidth(x::Meta.TimeUnit.T) = x == Meta.TimeUnit.SECOND || x == Meta.TimeUnit.MILLISECOND ? Int32 : Int64
-Time{U}(x) where {U <: Meta.TimeUnit.T} = Time{U, bitwidth(U)}(bitwidth(U)(x))
-storagetype(::Type{Time{U, T}}) where {U, T} = T
-juliaeltype(f::Meta.Field, x::Meta.Time, convert) = Time{x.unit, bitwidth(x.unit)}
+bitwidth(x::Meta.TimeUnit.T) =
+ x == Meta.TimeUnit.SECOND || x == Meta.TimeUnit.MILLISECOND ? Int32 : Int64
+Time{U}(x) where {U<:Meta.TimeUnit.T} = Time{U,bitwidth(U)}(bitwidth(U)(x))
+storagetype(::Type{Time{U,T}}) where {U,T} = T
+juliaeltype(f::Meta.Field, x::Meta.Time, convert) = Time{x.unit,bitwidth(x.unit)}
finaljuliatype(::Type{<:Time}) = Dates.Time
-periodtype(U::Meta.TimeUnit.T) = U === Meta.TimeUnit.SECOND ? Dates.Second :
- U === Meta.TimeUnit.MILLISECOND ? Dates.Millisecond :
- U === Meta.TimeUnit.MICROSECOND ? Dates.Microsecond : Dates.Nanosecond
-Base.convert(::Type{Dates.Time}, x::Time{U, T}) where {U, T} = Dates.Time(Dates.Nanosecond(Dates.tons(periodtype(U)(x.x))))
-
-function arrowtype(b, ::Type{Time{U, T}}) where {U, T}
+periodtype(U::Meta.TimeUnit.T) =
+ U === Meta.TimeUnit.SECOND ? Dates.Second :
+ U === Meta.TimeUnit.MILLISECOND ? Dates.Millisecond :
+ U === Meta.TimeUnit.MICROSECOND ? Dates.Microsecond : Dates.Nanosecond
+Base.convert(::Type{Dates.Time}, x::Time{U,T}) where {U,T} =
+ Dates.Time(Dates.Nanosecond(Dates.tons(periodtype(U)(x.x))))
+
+function arrowtype(b, ::Type{Time{U,T}}) where {U,T}
Meta.timeStart(b)
Meta.timeAddUnit(b, U)
Meta.timeAddBitWidth(b, Int32(8 * sizeof(T)))
@@ -240,42 +252,57 @@ const TIME_SYMBOL = Symbol("JuliaLang.Time")
ArrowTypes.arrowname(::Type{Dates.Time}) = TIME_SYMBOL
ArrowTypes.JuliaType(::Val{TIME_SYMBOL}, S) = Dates.Time
ArrowTypes.fromarrow(::Type{Dates.Time}, x::TIME) = convert(Dates.Time, x)
-ArrowTypes.default(::Type{Dates.Time}) = Dates.Time(1,1,1)
+ArrowTypes.default(::Type{Dates.Time}) = Dates.Time(1, 1, 1)
-struct Timestamp{U, TZ} <: ArrowTimeType
+struct Timestamp{U,TZ} <: ArrowTimeType
x::Int64
end
-Base.zero(::Type{Timestamp{U, T}}) where {U, T} = Timestamp{U, T}(Int64(0))
+Base.zero(::Type{Timestamp{U,T}}) where {U,T} = Timestamp{U,T}(Int64(0))
function juliaeltype(f::Meta.Field, x::Meta.Timestamp, convert)
- return Timestamp{x.unit, x.timezone === nothing ? nothing : Symbol(x.timezone)}
+ return Timestamp{x.unit,x.timezone === nothing ? nothing : Symbol(x.timezone)}
end
-const DATETIME = Timestamp{Meta.TimeUnit.MILLISECOND, nothing}
+const DATETIME = Timestamp{Meta.TimeUnit.MILLISECOND,nothing}
-finaljuliatype(::Type{Timestamp{U, TZ}}) where {U, TZ} = ZonedDateTime
-finaljuliatype(::Type{Timestamp{U, nothing}}) where {U} = DateTime
+finaljuliatype(::Type{Timestamp{U,TZ}}) where {U,TZ} = ZonedDateTime
+finaljuliatype(::Type{Timestamp{U,nothing}}) where {U} = DateTime
@noinline warntimestamp(U, T) =
- @warn "automatically converting Arrow.Timestamp with precision = $U to `$T` which only supports millisecond precision; conversion may be lossy; to avoid converting, pass `Arrow.Table(source; convert=false)" maxlog=1 _id=hash((:warntimestamp, U, T))
-
-function Base.convert(::Type{ZonedDateTime}, x::Timestamp{U, TZ}) where {U, TZ}
- (U === Meta.TimeUnit.MICROSECOND || U == Meta.TimeUnit.NANOSECOND) && warntimestamp(U, ZonedDateTime)
- return ZonedDateTime(Dates.DateTime(Dates.UTM(Int64(Dates.toms(periodtype(U)(x.x)) + UNIX_EPOCH_DATETIME))), TimeZone(String(TZ)); from_utc=true)
-end
-
-function Base.convert(::Type{DateTime}, x::Timestamp{U, nothing}) where {U}
- (U === Meta.TimeUnit.MICROSECOND || U == Meta.TimeUnit.NANOSECOND) && warntimestamp(U, DateTime)
- return Dates.DateTime(Dates.UTM(Int64(Dates.toms(periodtype(U)(x.x)) + UNIX_EPOCH_DATETIME)))
-end
-
-Base.convert(::Type{Timestamp{Meta.TimeUnit.MILLISECOND, TZ}}, x::ZonedDateTime) where {TZ} =
- Timestamp{Meta.TimeUnit.MILLISECOND, TZ}(Int64(Dates.value(DateTime(x, UTC)) - UNIX_EPOCH_DATETIME))
-Base.convert(::Type{Timestamp{Meta.TimeUnit.MILLISECOND, nothing}}, x::DateTime) =
- Timestamp{Meta.TimeUnit.MILLISECOND, nothing}(Int64(Dates.value(x) - UNIX_EPOCH_DATETIME))
-
-function arrowtype(b, ::Type{Timestamp{U, TZ}}) where {U, TZ}
+ @warn "automatically converting Arrow.Timestamp with precision = $U to `$T` which only supports millisecond precision; conversion may be lossy; to avoid converting, pass `Arrow.Table(source; convert=false)" maxlog =
+ 1 _id = hash((:warntimestamp, U, T))
+
+function Base.convert(::Type{ZonedDateTime}, x::Timestamp{U,TZ}) where {U,TZ}
+ (U === Meta.TimeUnit.MICROSECOND || U == Meta.TimeUnit.NANOSECOND) &&
+ warntimestamp(U, ZonedDateTime)
+ return ZonedDateTime(
+ Dates.DateTime(
+ Dates.UTM(Int64(Dates.toms(periodtype(U)(x.x)) + UNIX_EPOCH_DATETIME)),
+ ),
+ TimeZone(String(TZ));
+ from_utc=true,
+ )
+end
+
+function Base.convert(::Type{DateTime}, x::Timestamp{U,nothing}) where {U}
+ (U === Meta.TimeUnit.MICROSECOND || U == Meta.TimeUnit.NANOSECOND) &&
+ warntimestamp(U, DateTime)
+ return Dates.DateTime(
+ Dates.UTM(Int64(Dates.toms(periodtype(U)(x.x)) + UNIX_EPOCH_DATETIME)),
+ )
+end
+
+Base.convert(::Type{Timestamp{Meta.TimeUnit.MILLISECOND,TZ}}, x::ZonedDateTime) where {TZ} =
+ Timestamp{Meta.TimeUnit.MILLISECOND,TZ}(
+ Int64(Dates.value(DateTime(x, UTC)) - UNIX_EPOCH_DATETIME),
+ )
+Base.convert(::Type{Timestamp{Meta.TimeUnit.MILLISECOND,nothing}}, x::DateTime) =
+ Timestamp{Meta.TimeUnit.MILLISECOND,nothing}(
+ Int64(Dates.value(x) - UNIX_EPOCH_DATETIME),
+ )
+
+function arrowtype(b, ::Type{Timestamp{U,TZ}}) where {U,TZ}
tz = TZ !== nothing ? FlatBuffers.createstring!(b, String(TZ)) : FlatBuffers.UOffsetT(0)
Meta.timestampStart(b)
Meta.timestampAddUnit(b, U)
@@ -289,28 +316,36 @@ const DATETIME_SYMBOL = Symbol("JuliaLang.DateTime")
ArrowTypes.arrowname(::Type{Dates.DateTime}) = DATETIME_SYMBOL
ArrowTypes.JuliaType(::Val{DATETIME_SYMBOL}, S) = Dates.DateTime
ArrowTypes.fromarrow(::Type{Dates.DateTime}, x::Timestamp) = convert(Dates.DateTime, x)
-ArrowTypes.fromarrow(::Type{Dates.DateTime}, x::Date{Meta.DateUnit.MILLISECOND, Int64}) = convert(Dates.DateTime, x)
-ArrowTypes.default(::Type{Dates.DateTime}) = Dates.DateTime(1,1,1,1,1,1)
+ArrowTypes.fromarrow(::Type{Dates.DateTime}, x::Date{Meta.DateUnit.MILLISECOND,Int64}) =
+ convert(Dates.DateTime, x)
+ArrowTypes.default(::Type{Dates.DateTime}) = Dates.DateTime(1, 1, 1, 1, 1, 1)
ArrowTypes.ArrowType(::Type{ZonedDateTime}) = Timestamp
-ArrowTypes.toarrow(x::ZonedDateTime) = convert(Timestamp{Meta.TimeUnit.MILLISECOND, Symbol(x.timezone)}, x)
+ArrowTypes.toarrow(x::ZonedDateTime) =
+ convert(Timestamp{Meta.TimeUnit.MILLISECOND,Symbol(x.timezone)}, x)
const ZONEDDATETIME_SYMBOL = Symbol("JuliaLang.ZonedDateTime-UTC")
ArrowTypes.arrowname(::Type{ZonedDateTime}) = ZONEDDATETIME_SYMBOL
ArrowTypes.JuliaType(::Val{ZONEDDATETIME_SYMBOL}, S) = ZonedDateTime
ArrowTypes.fromarrow(::Type{ZonedDateTime}, x::Timestamp) = convert(ZonedDateTime, x)
-ArrowTypes.default(::Type{TimeZones.ZonedDateTime}) = TimeZones.ZonedDateTime(1,1,1,1,1,1,TimeZones.tz"UTC")
+ArrowTypes.default(::Type{TimeZones.ZonedDateTime}) =
+ TimeZones.ZonedDateTime(1, 1, 1, 1, 1, 1, TimeZones.tz"UTC")
# Backwards compatibility: older versions of Arrow saved ZonedDateTime's with this metdata:
const OLD_ZONEDDATETIME_SYMBOL = Symbol("JuliaLang.ZonedDateTime")
# and stored the local time instead of the UTC time.
struct LocalZonedDateTime end
ArrowTypes.JuliaType(::Val{OLD_ZONEDDATETIME_SYMBOL}, S) = LocalZonedDateTime
-function ArrowTypes.fromarrow(::Type{LocalZonedDateTime}, x::Timestamp{U, TZ}) where {U, TZ}
- (U === Meta.TimeUnit.MICROSECOND || U == Meta.TimeUnit.NANOSECOND) && warntimestamp(U, ZonedDateTime)
- return ZonedDateTime(Dates.DateTime(Dates.UTM(Int64(Dates.toms(periodtype(U)(x.x)) + UNIX_EPOCH_DATETIME))), TimeZone(String(TZ)))
+function ArrowTypes.fromarrow(::Type{LocalZonedDateTime}, x::Timestamp{U,TZ}) where {U,TZ}
+ (U === Meta.TimeUnit.MICROSECOND || U == Meta.TimeUnit.NANOSECOND) &&
+ warntimestamp(U, ZonedDateTime)
+ return ZonedDateTime(
+ Dates.DateTime(
+ Dates.UTM(Int64(Dates.toms(periodtype(U)(x.x)) + UNIX_EPOCH_DATETIME)),
+ ),
+ TimeZone(String(TZ)),
+ )
end
-
"""
Arrow.ToTimestamp(x::AbstractVector{ZonedDateTime})
@@ -321,31 +356,36 @@ scan each element to check each timezone. `Arrow.ToTimestamp` provides a "bypass
first element of the `AbstractVector{ZonedDateTime}`, which in turn allows `Arrow.write` to avoid costly checking/conversion and
can encode the `ZonedDateTime` as `Arrow.Timestamp` directly.
"""
-struct ToTimestamp{A, TZ} <: AbstractVector{Timestamp{Meta.TimeUnit.MILLISECOND, TZ}}
+struct ToTimestamp{A,TZ} <: AbstractVector{Timestamp{Meta.TimeUnit.MILLISECOND,TZ}}
data::A # AbstractVector{ZonedDateTime}
end
-ToTimestamp(x::A) where {A <: AbstractVector{ZonedDateTime}} = ToTimestamp{A, Symbol(x[1].timezone)}(x)
+ToTimestamp(x::A) where {A<:AbstractVector{ZonedDateTime}} =
+ ToTimestamp{A,Symbol(x[1].timezone)}(x)
Base.IndexStyle(::Type{<:ToTimestamp}) = Base.IndexLinear()
Base.size(x::ToTimestamp) = (length(x.data),)
-Base.eltype(::Type{ToTimestamp{A, TZ}}) where {A, TZ} = Timestamp{Meta.TimeUnit.MILLISECOND, TZ}
-Base.getindex(x::ToTimestamp{A, TZ}, i::Integer) where {A, TZ} = convert(Timestamp{Meta.TimeUnit.MILLISECOND, TZ}, getindex(x.data, i))
+Base.eltype(::Type{ToTimestamp{A,TZ}}) where {A,TZ} =
+ Timestamp{Meta.TimeUnit.MILLISECOND,TZ}
+Base.getindex(x::ToTimestamp{A,TZ}, i::Integer) where {A,TZ} =
+ convert(Timestamp{Meta.TimeUnit.MILLISECOND,TZ}, getindex(x.data, i))
-struct Interval{U, T} <: ArrowTimeType
+struct Interval{U,T} <: ArrowTimeType
x::T
end
-Base.zero(::Type{Interval{U, T}}) where {U, T} = Interval{U, T}(T(0))
+Base.zero(::Type{Interval{U,T}}) where {U,T} = Interval{U,T}(T(0))
bitwidth(x::Meta.IntervalUnit.T) = x == Meta.IntervalUnit.YEAR_MONTH ? Int32 : Int64
-Interval{Meta.IntervalUnit.YEAR_MONTH}(x) = Interval{Meta.IntervalUnit.YEAR_MONTH, Int32}(Int32(x))
-Interval{Meta.IntervalUnit.DAY_TIME}(x) = Interval{Meta.IntervalUnit.DAY_TIME, Int64}(Int64(x))
+Interval{Meta.IntervalUnit.YEAR_MONTH}(x) =
+ Interval{Meta.IntervalUnit.YEAR_MONTH,Int32}(Int32(x))
+Interval{Meta.IntervalUnit.DAY_TIME}(x) =
+ Interval{Meta.IntervalUnit.DAY_TIME,Int64}(Int64(x))
function juliaeltype(f::Meta.Field, x::Meta.Interval, convert)
- return Interval{x.unit, bitwidth(x.unit)}
+ return Interval{x.unit,bitwidth(x.unit)}
end
-function arrowtype(b, ::Type{Interval{U, T}}) where {U, T}
+function arrowtype(b, ::Type{Interval{U,T}}) where {U,T}
Meta.intervalStart(b)
Meta.intervalAddUnit(b, U)
return Meta.Interval, Meta.intervalEnd(b), nothing
@@ -362,7 +402,7 @@ function juliaeltype(f::Meta.Field, x::Meta.Duration, convert)
end
finaljuliatype(::Type{Duration{U}}) where {U} = periodtype(U)
-Base.convert(::Type{P}, x::Duration{U}) where {P <: Dates.Period, U} = P(periodtype(U)(x.x))
+Base.convert(::Type{P}, x::Duration{U}) where {P<:Dates.Period,U} = P(periodtype(U)(x.x))
function arrowtype(b, ::Type{Duration{U}}) where {U}
Meta.durationStart(b)
@@ -370,31 +410,32 @@ function arrowtype(b, ::Type{Duration{U}}) where {U}
return Meta.Duration, Meta.durationEnd(b), nothing
end
-arrowtype(b, ::Type{P}) where {P <: Dates.Period} = arrowtype(b, Duration{arrowperiodtype(P)})
+arrowtype(b, ::Type{P}) where {P<:Dates.Period} = arrowtype(b, Duration{arrowperiodtype(P)})
arrowperiodtype(P) = Meta.TimeUnit.SECOND
arrowperiodtype(::Type{Dates.Millisecond}) = Meta.TimeUnit.MILLISECOND
arrowperiodtype(::Type{Dates.Microsecond}) = Meta.TimeUnit.MICROSECOND
arrowperiodtype(::Type{Dates.Nanosecond}) = Meta.TimeUnit.NANOSECOND
-Base.convert(::Type{Duration{U}}, x::Dates.Period) where {U} = Duration{U}(Dates.value(periodtype(U)(x)))
+Base.convert(::Type{Duration{U}}, x::Dates.Period) where {U} =
+ Duration{U}(Dates.value(periodtype(U)(x)))
-ArrowTypes.ArrowType(::Type{P}) where {P <: Dates.Period} = Duration{arrowperiodtype(P)}
-ArrowTypes.toarrow(x::P) where {P <: Dates.Period} = convert(Duration{arrowperiodtype(P)}, x)
+ArrowTypes.ArrowType(::Type{P}) where {P<:Dates.Period} = Duration{arrowperiodtype(P)}
+ArrowTypes.toarrow(x::P) where {P<:Dates.Period} = convert(Duration{arrowperiodtype(P)}, x)
const PERIOD_SYMBOL = Symbol("JuliaLang.Dates.Period")
-ArrowTypes.arrowname(::Type{P}) where {P <: Dates.Period} = PERIOD_SYMBOL
+ArrowTypes.arrowname(::Type{P}) where {P<:Dates.Period} = PERIOD_SYMBOL
ArrowTypes.JuliaType(::Val{PERIOD_SYMBOL}, ::Type{Duration{U}}) where {U} = periodtype(U)
-ArrowTypes.fromarrow(::Type{P}, x::Duration{U}) where {P <: Dates.Period, U} = convert(P, x)
+ArrowTypes.fromarrow(::Type{P}, x::Duration{U}) where {P<:Dates.Period,U} = convert(P, x)
# nested types; call juliaeltype recursively on nested children
-function juliaeltype(f::Meta.Field, list::Union{Meta.List, Meta.LargeList}, convert)
+function juliaeltype(f::Meta.Field, list::Union{Meta.List,Meta.LargeList}, convert)
return Vector{juliaeltype(f.children[1], buildmetadata(f.children[1]), convert)}
end
# arrowtype will call fieldoffset recursively for children
-function arrowtype(b, x::List{T, O, A}) where {T, O, A}
+function arrowtype(b, x::List{T,O,A}) where {T,O,A}
if liststringtype(x)
- if T <: AbstractString || T <: Union{AbstractString, Missing}
+ if T <: AbstractString || T <: Union{AbstractString,Missing}
if O == Int32
Meta.utf8Start(b)
return Meta.Utf8, Meta.utf8End(b), nothing
@@ -425,11 +466,13 @@ end
function juliaeltype(f::Meta.Field, list::Meta.FixedSizeList, convert)
type = juliaeltype(f.children[1], buildmetadata(f.children[1]), convert)
- return NTuple{Int(list.listSize), type}
+ return NTuple{Int(list.listSize),type}
end
-function arrowtype(b, x::FixedSizeList{T, A}) where {T, A}
- N = ArrowTypes.getsize(ArrowTypes.ArrowKind(ArrowTypes.ArrowType(Base.nonmissingtype(T))))
+function arrowtype(b, x::FixedSizeList{T,A}) where {T,A}
+ N = ArrowTypes.getsize(
+ ArrowTypes.ArrowKind(ArrowTypes.ArrowType(Base.nonmissingtype(T))),
+ )
if eltype(A) == UInt8
Meta.fixedSizeBinaryStart(b)
Meta.fixedSizeBinaryAddByteWidth(b, Int32(N))
@@ -443,9 +486,17 @@ function arrowtype(b, x::FixedSizeList{T, A}) where {T, A}
end
function juliaeltype(f::Meta.Field, map::Meta.Map, convert)
- K = juliaeltype(f.children[1].children[1], buildmetadata(f.children[1].children[1]), convert)
- V = juliaeltype(f.children[1].children[2], buildmetadata(f.children[1].children[2]), convert)
- return Dict{K, V}
+ K = juliaeltype(
+ f.children[1].children[1],
+ buildmetadata(f.children[1].children[1]),
+ convert,
+ )
+ V = juliaeltype(
+ f.children[1].children[2],
+ buildmetadata(f.children[1].children[2]),
+ convert,
+ )
+ return Dict{K,V}
end
function arrowtype(b, x::Map)
@@ -454,17 +505,17 @@ function arrowtype(b, x::Map)
return Meta.Map, Meta.mapEnd(b), children
end
-struct KeyValue{K, V}
+struct KeyValue{K,V}
key::K
value::V
end
-keyvalueK(::Type{KeyValue{K, V}}) where {K, V} = K
-keyvalueV(::Type{KeyValue{K, V}}) where {K, V} = V
+keyvalueK(::Type{KeyValue{K,V}}) where {K,V} = K
+keyvalueV(::Type{KeyValue{K,V}}) where {K,V} = V
Base.length(kv::KeyValue) = 1
Base.iterate(kv::KeyValue, st=1) = st === nothing ? nothing : (kv, nothing)
-ArrowTypes.default(::Type{KeyValue{K, V}}) where {K, V} = KeyValue(default(K), default(V))
+ArrowTypes.default(::Type{KeyValue{K,V}}) where {K,V} = KeyValue(default(K), default(V))
-function arrowtype(b, ::Type{KeyValue{K, V}}) where {K, V}
+function arrowtype(b, ::Type{KeyValue{K,V}}) where {K,V}
children = [fieldoffset(b, "key", K), fieldoffset(b, "value", V)]
Meta.structStart(b)
return Meta.Struct, Meta.structEnd(b), children
@@ -473,10 +524,10 @@ end
function juliaeltype(f::Meta.Field, list::Meta.Struct, convert)
names = Tuple(Symbol(x.name) for x in f.children)
types = Tuple(juliaeltype(x, buildmetadata(x), convert) for x in f.children)
- return NamedTuple{names, Tuple{types...}}
+ return NamedTuple{names,Tuple{types...}}
end
-function arrowtype(b, x::Struct{T, S}) where {T, S}
+function arrowtype(b, x::Struct{T,S}) where {T,S}
names = fieldnames(Base.nonmissingtype(T))
children = [fieldoffset(b, names[i], x.data[i]) for i = 1:length(names)]
Meta.structStart(b)
@@ -486,13 +537,21 @@ end
# Unions
function UnionT(f::Meta.Field, convert)
typeids = f.type.typeIds === nothing ? nothing : Tuple(Int(x) for x in f.type.typeIds)
- UT = UnionT{f.type.mode, typeids, Tuple{(juliaeltype(x, buildmetadata(x), convert) for x in f.children)...}}
+ UT = UnionT{
+ f.type.mode,
+ typeids,
+ Tuple{(juliaeltype(x, buildmetadata(x), convert) for x in f.children)...},
+ }
return UT
end
-juliaeltype(f::Meta.Field, u::Meta.Union, convert) = Union{(juliaeltype(x, buildmetadata(x), convert) for x in f.children)...}
+juliaeltype(f::Meta.Field, u::Meta.Union, convert) =
+ Union{(juliaeltype(x, buildmetadata(x), convert) for x in f.children)...}
-function arrowtype(b, x::Union{DenseUnion{S, UnionT{T, typeIds, U}}, SparseUnion{S, UnionT{T, typeIds, U}}}) where {S, T, typeIds, U}
+function arrowtype(
+ b,
+ x::Union{DenseUnion{S,UnionT{T,typeIds,U}},SparseUnion{S,UnionT{T,typeIds,U}}},
+) where {S,T,typeIds,U}
if typeIds !== nothing
Meta.unionStartTypeIdsVector(b, length(typeIds))
for id in Iterators.reverse(typeIds)
diff --git a/src/metadata/File.jl b/src/metadata/File.jl
index 301a23c..184833c 100644
--- a/src/metadata/File.jl
+++ b/src/metadata/File.jl
@@ -19,7 +19,8 @@ struct Footer <: FlatBuffers.Table
pos::Base.Int
end
-Base.propertynames(x::Footer) = (:version, :schema, :dictionaries, :recordBatches, :custom_metadata)
+Base.propertynames(x::Footer) =
+ (:version, :schema, :dictionaries, :recordBatches, :custom_metadata)
function Base.getproperty(x::Footer, field::Symbol)
if field === :version
@@ -52,12 +53,18 @@ function Base.getproperty(x::Footer, field::Symbol)
end
footerStart(b::FlatBuffers.Builder) = FlatBuffers.startobject!(b, 4)
-footerAddVersion(b::FlatBuffers.Builder, version::MetadataVersion.T) = FlatBuffers.prependslot!(b, 0, version, 0)
-footerAddSchema(b::FlatBuffers.Builder, schema::FlatBuffers.UOffsetT) = FlatBuffers.prependoffsetslot!(b, 1, schema, 0)
-footerAddDictionaries(b::FlatBuffers.Builder, dictionaries::FlatBuffers.UOffsetT) = FlatBuffers.prependoffsetslot!(b, 2, dictionaries, 0)
-footerStartDictionariesVector(b::FlatBuffers.Builder, numelems) = FlatBuffers.startvector!(b, 24, numelems, 8)
-footerAddRecordBatches(b::FlatBuffers.Builder, recordbatches::FlatBuffers.UOffsetT) = FlatBuffers.prependoffsetslot!(b, 3, recordbatches, 0)
-footerStartRecordBatchesVector(b::FlatBuffers.Builder, numelems) = FlatBuffers.startvector!(b, 24, numelems, 8)
+footerAddVersion(b::FlatBuffers.Builder, version::MetadataVersion.T) =
+ FlatBuffers.prependslot!(b, 0, version, 0)
+footerAddSchema(b::FlatBuffers.Builder, schema::FlatBuffers.UOffsetT) =
+ FlatBuffers.prependoffsetslot!(b, 1, schema, 0)
+footerAddDictionaries(b::FlatBuffers.Builder, dictionaries::FlatBuffers.UOffsetT) =
+ FlatBuffers.prependoffsetslot!(b, 2, dictionaries, 0)
+footerStartDictionariesVector(b::FlatBuffers.Builder, numelems) =
+ FlatBuffers.startvector!(b, 24, numelems, 8)
+footerAddRecordBatches(b::FlatBuffers.Builder, recordbatches::FlatBuffers.UOffsetT) =
+ FlatBuffers.prependoffsetslot!(b, 3, recordbatches, 0)
+footerStartRecordBatchesVector(b::FlatBuffers.Builder, numelems) =
+ FlatBuffers.startvector!(b, 24, numelems, 8)
footerEnd(b::FlatBuffers.Builder) = FlatBuffers.endobject!(b)
struct Block <: FlatBuffers.Struct
@@ -80,7 +87,12 @@ function Base.getproperty(x::Block, field::Symbol)
return nothing
end
-function createBlock(b::FlatBuffers.Builder, offset::Int64, metadatalength::Int32, bodylength::Int64)
+function createBlock(
+ b::FlatBuffers.Builder,
+ offset::Int64,
+ metadatalength::Int32,
+ bodylength::Int64,
+)
FlatBuffers.prep!(b, 8, 24)
prepend!(b, bodylength)
FlatBuffers.pad!(b, 4)
diff --git a/src/metadata/Flatbuf.jl b/src/metadata/Flatbuf.jl
index 127e91b..9e9f2e4 100644
--- a/src/metadata/Flatbuf.jl
+++ b/src/metadata/Flatbuf.jl
@@ -23,4 +23,4 @@ include("Schema.jl")
include("File.jl")
include("Message.jl")
-end # module
\ No newline at end of file
+end # module
diff --git a/src/metadata/Message.jl b/src/metadata/Message.jl
index 95c4675..139793d 100644
--- a/src/metadata/Message.jl
+++ b/src/metadata/Message.jl
@@ -64,8 +64,10 @@ function Base.getproperty(x::BodyCompression, field::Symbol)
end
bodyCompressionStart(b::FlatBuffers.Builder) = FlatBuffers.startobject!(b, 2)
-bodyCompressionAddCodec(b::FlatBuffers.Builder, codec::CompressionType.T) = FlatBuffers.prependslot!(b, 0, codec, 0)
-bodyCompressionAddMethod(b::FlatBuffers.Builder, method::BodyCompressionMethod.T) = FlatBuffers.prependslot!(b, 1, method, 0)
+bodyCompressionAddCodec(b::FlatBuffers.Builder, codec::CompressionType.T) =
+ FlatBuffers.prependslot!(b, 0, codec, 0)
+bodyCompressionAddMethod(b::FlatBuffers.Builder, method::BodyCompressionMethod.T) =
+ FlatBuffers.prependslot!(b, 1, method, 0)
bodyCompressionEnd(b::FlatBuffers.Builder) = FlatBuffers.endobject!(b)
struct RecordBatch <: FlatBuffers.Table
@@ -100,12 +102,18 @@ function Base.getproperty(x::RecordBatch, field::Symbol)
end
recordBatchStart(b::FlatBuffers.Builder) = FlatBuffers.startobject!(b, 4)
-recordBatchAddLength(b::FlatBuffers.Builder, length::Int64) = FlatBuffers.prependslot!(b, 0, length, 0)
-recordBatchAddNodes(b::FlatBuffers.Builder, nodes::FlatBuffers.UOffsetT) = FlatBuffers.prependoffsetslot!(b, 1, nodes, 0)
-recordBatchStartNodesVector(b::FlatBuffers.Builder, numelems) = FlatBuffers.startvector!(b, 16, numelems, 8)
-recordBatchAddBuffers(b::FlatBuffers.Builder, buffers::FlatBuffers.UOffsetT) = FlatBuffers.prependoffsetslot!(b, 2, buffers, 0)
-recordBatchStartBuffersVector(b::FlatBuffers.Builder, numelems) = FlatBuffers.startvector!(b, 16, numelems, 8)
-recordBatchAddCompression(b::FlatBuffers.Builder, c::FlatBuffers.UOffsetT) = FlatBuffers.prependoffsetslot!(b, 3, c, 0)
+recordBatchAddLength(b::FlatBuffers.Builder, length::Int64) =
+ FlatBuffers.prependslot!(b, 0, length, 0)
+recordBatchAddNodes(b::FlatBuffers.Builder, nodes::FlatBuffers.UOffsetT) =
+ FlatBuffers.prependoffsetslot!(b, 1, nodes, 0)
+recordBatchStartNodesVector(b::FlatBuffers.Builder, numelems) =
+ FlatBuffers.startvector!(b, 16, numelems, 8)
+recordBatchAddBuffers(b::FlatBuffers.Builder, buffers::FlatBuffers.UOffsetT) =
+ FlatBuffers.prependoffsetslot!(b, 2, buffers, 0)
+recordBatchStartBuffersVector(b::FlatBuffers.Builder, numelems) =
+ FlatBuffers.startvector!(b, 16, numelems, 8)
+recordBatchAddCompression(b::FlatBuffers.Builder, c::FlatBuffers.UOffsetT) =
+ FlatBuffers.prependoffsetslot!(b, 3, c, 0)
recordBatchEnd(b::FlatBuffers.Builder) = FlatBuffers.endobject!(b)
struct DictionaryBatch <: FlatBuffers.Table
@@ -135,9 +143,12 @@ function Base.getproperty(x::DictionaryBatch, field::Symbol)
end
dictionaryBatchStart(b::FlatBuffers.Builder) = FlatBuffers.startobject!(b, 3)
-dictionaryBatchAddId(b::FlatBuffers.Builder, id::Int64) = FlatBuffers.prependslot!(b, 0, id, 0)
-dictionaryBatchAddData(b::FlatBuffers.Builder, data::FlatBuffers.UOffsetT) = FlatBuffers.prependoffsetslot!(b, 1, data, 0)
-dictionaryBatchAddIsDelta(b::FlatBuffers.Builder, isdelta::Base.Bool) = FlatBuffers.prependslot!(b, 2, isdelta, false)
+dictionaryBatchAddId(b::FlatBuffers.Builder, id::Int64) =
+ FlatBuffers.prependslot!(b, 0, id, 0)
+dictionaryBatchAddData(b::FlatBuffers.Builder, data::FlatBuffers.UOffsetT) =
+ FlatBuffers.prependoffsetslot!(b, 1, data, 0)
+dictionaryBatchAddIsDelta(b::FlatBuffers.Builder, isdelta::Base.Bool) =
+ FlatBuffers.prependslot!(b, 2, isdelta, false)
dictionaryBatchEnd(b::FlatBuffers.Builder) = FlatBuffers.endobject!(b)
function MessageHeader(b::UInt8)
@@ -193,10 +204,16 @@ function Base.getproperty(x::Message, field::Symbol)
end
messageStart(b::FlatBuffers.Builder) = FlatBuffers.startobject!(b, 5)
-messageAddVersion(b::FlatBuffers.Builder, version::MetadataVersion.T) = FlatBuffers.prependslot!(b, 0, version, 0)
-messageAddHeaderType(b::FlatBuffers.Builder, ::Core.Type{T}) where {T} = FlatBuffers.prependslot!(b, 1, MessageHeader(T), 0)
-messageAddHeader(b::FlatBuffers.Builder, header::FlatBuffers.UOffsetT) = FlatBuffers.prependoffsetslot!(b, 2, header, 0)
-messageAddBodyLength(b::FlatBuffers.Builder, bodyLength::Int64) = FlatBuffers.prependslot!(b, 3, bodyLength, 0)
-messageAddCustomMetadata(b::FlatBuffers.Builder, meta::FlatBuffers.UOffsetT) = FlatBuffers.prependoffsetslot!(b, 4, meta, 0)
-messageStartCustomMetadataVector(b::FlatBuffers.Builder, numelems) = FlatBuffers.startvector!(b, 4, numelems, 4)
+messageAddVersion(b::FlatBuffers.Builder, version::MetadataVersion.T) =
+ FlatBuffers.prependslot!(b, 0, version, 0)
+messageAddHeaderType(b::FlatBuffers.Builder, ::Core.Type{T}) where {T} =
+ FlatBuffers.prependslot!(b, 1, MessageHeader(T), 0)
+messageAddHeader(b::FlatBuffers.Builder, header::FlatBuffers.UOffsetT) =
+ FlatBuffers.prependoffsetslot!(b, 2, header, 0)
+messageAddBodyLength(b::FlatBuffers.Builder, bodyLength::Int64) =
+ FlatBuffers.prependslot!(b, 3, bodyLength, 0)
+messageAddCustomMetadata(b::FlatBuffers.Builder, meta::FlatBuffers.UOffsetT) =
+ FlatBuffers.prependoffsetslot!(b, 4, meta, 0)
+messageStartCustomMetadataVector(b::FlatBuffers.Builder, numelems) =
+ FlatBuffers.startvector!(b, 4, numelems, 4)
messageEnd(b::FlatBuffers.Builder) = FlatBuffers.endobject!(b)
diff --git a/src/metadata/Schema.jl b/src/metadata/Schema.jl
index b8f321a..d4cfc82 100644
--- a/src/metadata/Schema.jl
+++ b/src/metadata/Schema.jl
@@ -73,7 +73,8 @@ function Base.getproperty(x::FixedSizeList, field::Symbol)
end
fixedSizeListStart(b::FlatBuffers.Builder) = FlatBuffers.startobject!(b, 1)
-fixedSizeListAddListSize(b::FlatBuffers.Builder, listSize::Int32) = FlatBuffers.prependslot!(b, 0, listSize, 0)
+fixedSizeListAddListSize(b::FlatBuffers.Builder, listSize::Int32) =
+ FlatBuffers.prependslot!(b, 0, listSize, 0)
fixedSizeListEnd(b::FlatBuffers.Builder) = FlatBuffers.endobject!(b)
struct Map <: FlatBuffers.Table
@@ -92,7 +93,8 @@ function Base.getproperty(x::Map, field::Symbol)
end
mapStart(b::FlatBuffers.Builder) = FlatBuffers.startobject!(b, 1)
-mapAddKeysSorted(b::FlatBuffers.Builder, keyssorted::Base.Bool) = FlatBuffers.prependslot!(b, 0, keyssorted, 0)
+mapAddKeysSorted(b::FlatBuffers.Builder, keyssorted::Base.Bool) =
+ FlatBuffers.prependslot!(b, 0, keyssorted, 0)
mapEnd(b::FlatBuffers.Builder) = FlatBuffers.endobject!(b)
@enumx UnionMode::Int16 Sparse Dense
@@ -117,9 +119,12 @@ function Base.getproperty(x::Union, field::Symbol)
end
unionStart(b::FlatBuffers.Builder) = FlatBuffers.startobject!(b, 2)
-unionAddMode(b::FlatBuffers.Builder, mode::UnionMode.T) = FlatBuffers.prependslot!(b, 0, mode, 0)
-unionAddTypeIds(b::FlatBuffers.Builder, typeIds::FlatBuffers.UOffsetT) = FlatBuffers.prependoffsetslot!(b, 1, typeIds, 0)
-unionStartTypeIdsVector(b::FlatBuffers.Builder, numelems) = FlatBuffers.startvector!(b, 4, numelems, 4)
+unionAddMode(b::FlatBuffers.Builder, mode::UnionMode.T) =
+ FlatBuffers.prependslot!(b, 0, mode, 0)
+unionAddTypeIds(b::FlatBuffers.Builder, typeIds::FlatBuffers.UOffsetT) =
+ FlatBuffers.prependoffsetslot!(b, 1, typeIds, 0)
+unionStartTypeIdsVector(b::FlatBuffers.Builder, numelems) =
+ FlatBuffers.startvector!(b, 4, numelems, 4)
unionEnd(b::FlatBuffers.Builder) = FlatBuffers.endobject!(b)
struct Int <: FlatBuffers.Table
@@ -142,8 +147,10 @@ function Base.getproperty(x::Int, field::Symbol)
end
intStart(b::FlatBuffers.Builder) = FlatBuffers.startobject!(b, 2)
-intAddBitWidth(b::FlatBuffers.Builder, bitwidth::Int32) = FlatBuffers.prependslot!(b, 0, bitwidth, 0)
-intAddIsSigned(b::FlatBuffers.Builder, issigned::Base.Bool) = FlatBuffers.prependslot!(b, 1, issigned, 0)
+intAddBitWidth(b::FlatBuffers.Builder, bitwidth::Int32) =
+ FlatBuffers.prependslot!(b, 0, bitwidth, 0)
+intAddIsSigned(b::FlatBuffers.Builder, issigned::Base.Bool) =
+ FlatBuffers.prependslot!(b, 1, issigned, 0)
intEnd(b::FlatBuffers.Builder) = FlatBuffers.endobject!(b)
@enumx Precision::Int16 HALF SINGLE DOUBLE
@@ -165,7 +172,8 @@ function Base.getproperty(x::FloatingPoint, field::Symbol)
end
floatingPointStart(b::FlatBuffers.Builder) = FlatBuffers.startobject!(b, 1)
-floatingPointAddPrecision(b::FlatBuffers.Builder, precision::Precision.T) = FlatBuffers.prependslot!(b, 0, precision, 0)
+floatingPointAddPrecision(b::FlatBuffers.Builder, precision::Precision.T) =
+ FlatBuffers.prependslot!(b, 0, precision, 0)
floatingPointEnd(b::FlatBuffers.Builder) = FlatBuffers.endobject!(b)
struct Utf8 <: FlatBuffers.Table
@@ -224,7 +232,8 @@ function Base.getproperty(x::FixedSizeBinary, field::Symbol)
end
fixedSizeBinaryStart(b::FlatBuffers.Builder) = FlatBuffers.startobject!(b, 1)
-fixedSizeBinaryAddByteWidth(b::FlatBuffers.Builder, bytewidth::Int32) = FlatBuffers.prependslot!(b, 0, bytewidth, 0)
+fixedSizeBinaryAddByteWidth(b::FlatBuffers.Builder, bytewidth::Int32) =
+ FlatBuffers.prependslot!(b, 0, bytewidth, 0)
fixedSizeBinaryEnd(b::FlatBuffers.Builder) = FlatBuffers.endobject!(b)
struct Bool <: FlatBuffers.Table
@@ -262,9 +271,12 @@ function Base.getproperty(x::Decimal, field::Symbol)
end
decimalStart(b::FlatBuffers.Builder) = FlatBuffers.startobject!(b, 3)
-decimalAddPrecision(b::FlatBuffers.Builder, precision::Int32) = FlatBuffers.prependslot!(b, 0, precision, 0)
-decimalAddScale(b::FlatBuffers.Builder, scale::Int32) = FlatBuffers.prependslot!(b, 1, scale, 0)
-decimalAddBitWidth(b::FlatBuffers.Builder, bitWidth::Int32) = FlatBuffers.prependslot!(b, 2, bitWidth, Int32(128))
+decimalAddPrecision(b::FlatBuffers.Builder, precision::Int32) =
+ FlatBuffers.prependslot!(b, 0, precision, 0)
+decimalAddScale(b::FlatBuffers.Builder, scale::Int32) =
+ FlatBuffers.prependslot!(b, 1, scale, 0)
+decimalAddBitWidth(b::FlatBuffers.Builder, bitWidth::Int32) =
+ FlatBuffers.prependslot!(b, 2, bitWidth, Int32(128))
decimalEnd(b::FlatBuffers.Builder) = FlatBuffers.endobject!(b)
@enumx DateUnit::Int16 DAY MILLISECOND
@@ -286,7 +298,8 @@ function Base.getproperty(x::Date, field::Symbol)
end
dateStart(b::FlatBuffers.Builder) = FlatBuffers.startobject!(b, 1)
-dateAddUnit(b::FlatBuffers.Builder, unit::DateUnit.T) = FlatBuffers.prependslot!(b, 0, unit, 1)
+dateAddUnit(b::FlatBuffers.Builder, unit::DateUnit.T) =
+ FlatBuffers.prependslot!(b, 0, unit, 1)
dateEnd(b::FlatBuffers.Builder) = FlatBuffers.endobject!(b)
@enumx TimeUnit::Int16 SECOND MILLISECOND MICROSECOND NANOSECOND
@@ -312,8 +325,10 @@ function Base.getproperty(x::Time, field::Symbol)
end
timeStart(b::FlatBuffers.Builder) = FlatBuffers.startobject!(b, 2)
-timeAddUnit(b::FlatBuffers.Builder, unit::TimeUnit.T) = FlatBuffers.prependslot!(b, 0, unit, 1)
-timeAddBitWidth(b::FlatBuffers.Builder, bitwidth::Int32) = FlatBuffers.prependslot!(b, 1, bitwidth, 32)
+timeAddUnit(b::FlatBuffers.Builder, unit::TimeUnit.T) =
+ FlatBuffers.prependslot!(b, 0, unit, 1)
+timeAddBitWidth(b::FlatBuffers.Builder, bitwidth::Int32) =
+ FlatBuffers.prependslot!(b, 1, bitwidth, 32)
timeEnd(b::FlatBuffers.Builder) = FlatBuffers.endobject!(b)
struct Timestamp <: FlatBuffers.Table
@@ -336,8 +351,10 @@ function Base.getproperty(x::Timestamp, field::Symbol)
end
timestampStart(b::FlatBuffers.Builder) = FlatBuffers.startobject!(b, 2)
-timestampAddUnit(b::FlatBuffers.Builder, unit::TimeUnit.T) = FlatBuffers.prependslot!(b, 0, unit, 0)
-timestampAddTimezone(b::FlatBuffers.Builder, timezone::FlatBuffers.UOffsetT) = FlatBuffers.prependoffsetslot!(b, 1, timezone, 0)
+timestampAddUnit(b::FlatBuffers.Builder, unit::TimeUnit.T) =
+ FlatBuffers.prependslot!(b, 0, unit, 0)
+timestampAddTimezone(b::FlatBuffers.Builder, timezone::FlatBuffers.UOffsetT) =
+ FlatBuffers.prependoffsetslot!(b, 1, timezone, 0)
timestampEnd(b::FlatBuffers.Builder) = FlatBuffers.endobject!(b)
@enumx IntervalUnit::Int16 YEAR_MONTH DAY_TIME
@@ -359,7 +376,8 @@ function Base.getproperty(x::Interval, field::Symbol)
end
intervalStart(b::FlatBuffers.Builder) = FlatBuffers.startobject!(b, 1)
-intervalAddUnit(b::FlatBuffers.Builder, unit::IntervalUnit.T) = FlatBuffers.prependslot!(b, 0, unit, 0)
+intervalAddUnit(b::FlatBuffers.Builder, unit::IntervalUnit.T) =
+ FlatBuffers.prependslot!(b, 0, unit, 0)
intervalEnd(b::FlatBuffers.Builder) = FlatBuffers.endobject!(b)
struct Duration <: FlatBuffers.Table
@@ -379,7 +397,8 @@ function Base.getproperty(x::Duration, field::Symbol)
end
durationStart(b::FlatBuffers.Builder) = FlatBuffers.startobject!(b, 1)
-durationAddUnit(b::FlatBuffers.Builder, unit::TimeUnit.T) = FlatBuffers.prependslot!(b, 0, unit, 1)
+durationAddUnit(b::FlatBuffers.Builder, unit::TimeUnit.T) =
+ FlatBuffers.prependslot!(b, 0, unit, 1)
durationEnd(b::FlatBuffers.Builder) = FlatBuffers.endobject!(b)
function Type(b::UInt8)
@@ -451,8 +470,10 @@ function Base.getproperty(x::KeyValue, field::Symbol)
end
keyValueStart(b::FlatBuffers.Builder) = FlatBuffers.startobject!(b, 2)
-keyValueAddKey(b::FlatBuffers.Builder, key::FlatBuffers.UOffsetT) = FlatBuffers.prependoffsetslot!(b, 0, key, 0)
-keyValueAddValue(b::FlatBuffers.Builder, value::FlatBuffers.UOffsetT) = FlatBuffers.prependoffsetslot!(b, 1, value, 0)
+keyValueAddKey(b::FlatBuffers.Builder, key::FlatBuffers.UOffsetT) =
+ FlatBuffers.prependoffsetslot!(b, 0, key, 0)
+keyValueAddValue(b::FlatBuffers.Builder, value::FlatBuffers.UOffsetT) =
+ FlatBuffers.prependoffsetslot!(b, 1, value, 0)
keyValueEnd(b::FlatBuffers.Builder) = FlatBuffers.endobject!(b)
@enumx DictionaryKind::Int16 DenseArray
@@ -487,9 +508,12 @@ function Base.getproperty(x::DictionaryEncoding, field::Symbol)
end
dictionaryEncodingStart(b::FlatBuffers.Builder) = FlatBuffers.startobject!(b, 3)
-dictionaryEncodingAddId(b::FlatBuffers.Builder, id::Int64) = FlatBuffers.prependslot!(b, 0, id, 0)
-dictionaryEncodingAddIndexType(b::FlatBuffers.Builder, indextype::FlatBuffers.UOffsetT) = FlatBuffers.prependoffsetslot!(b, 1, indextype, 0)
-dictionaryEncodingAddIsOrdered(b::FlatBuffers.Builder, isordered::Base.Bool) = FlatBuffers.prependslot!(b, 1, isordered, 0)
+dictionaryEncodingAddId(b::FlatBuffers.Builder, id::Int64) =
+ FlatBuffers.prependslot!(b, 0, id, 0)
+dictionaryEncodingAddIndexType(b::FlatBuffers.Builder, indextype::FlatBuffers.UOffsetT) =
+ FlatBuffers.prependoffsetslot!(b, 1, indextype, 0)
+dictionaryEncodingAddIsOrdered(b::FlatBuffers.Builder, isordered::Base.Bool) =
+ FlatBuffers.prependslot!(b, 1, isordered, 0)
dictionaryEncodingEnd(b::FlatBuffers.Builder) = FlatBuffers.endobject!(b)
struct Field <: FlatBuffers.Table
@@ -497,7 +521,8 @@ struct Field <: FlatBuffers.Table
pos::Base.Int
end
-Base.propertynames(x::Field) = (:name, :nullable, :type, :dictionary, :children, :custom_metadata)
+Base.propertynames(x::Field) =
+ (:name, :nullable, :type, :dictionary, :children, :custom_metadata)
function Base.getproperty(x::Field, field::Symbol)
if field === :name
@@ -538,15 +563,24 @@ function Base.getproperty(x::Field, field::Symbol)
end
fieldStart(b::FlatBuffers.Builder) = FlatBuffers.startobject!(b, 7)
-fieldAddName(b::FlatBuffers.Builder, name::FlatBuffers.UOffsetT) = FlatBuffers.prependoffsetslot!(b, 0, name, 0)
-fieldAddNullable(b::FlatBuffers.Builder, nullable::Base.Bool) = FlatBuffers.prependslot!(b, 1, nullable, false)
-fieldAddTypeType(b::FlatBuffers.Builder, ::Core.Type{T}) where {T} = FlatBuffers.prependslot!(b, 2, Type(T), 0)
-fieldAddType(b::FlatBuffers.Builder, type::FlatBuffers.UOffsetT) = FlatBuffers.prependoffsetslot!(b, 3, type, 0)
-fieldAddDictionary(b::FlatBuffers.Builder, dictionary::FlatBuffers.UOffsetT) = FlatBuffers.prependoffsetslot!(b, 4, dictionary, 0)
-fieldAddChildren(b::FlatBuffers.Builder, children::FlatBuffers.UOffsetT) = FlatBuffers.prependoffsetslot!(b, 5, children, 0)
-fieldStartChildrenVector(b::FlatBuffers.Builder, numelems) = FlatBuffers.startvector!(b, 4, numelems, 4)
-fieldAddCustomMetadata(b::FlatBuffers.Builder, custommetadata::FlatBuffers.UOffsetT) = FlatBuffers.prependoffsetslot!(b, 6, custommetadata, 0)
-fieldStartCustomMetadataVector(b::FlatBuffers.Builder, numelems) = FlatBuffers.startvector!(b, 4, numelems, 4)
+fieldAddName(b::FlatBuffers.Builder, name::FlatBuffers.UOffsetT) =
+ FlatBuffers.prependoffsetslot!(b, 0, name, 0)
+fieldAddNullable(b::FlatBuffers.Builder, nullable::Base.Bool) =
+ FlatBuffers.prependslot!(b, 1, nullable, false)
+fieldAddTypeType(b::FlatBuffers.Builder, ::Core.Type{T}) where {T} =
+ FlatBuffers.prependslot!(b, 2, Type(T), 0)
+fieldAddType(b::FlatBuffers.Builder, type::FlatBuffers.UOffsetT) =
+ FlatBuffers.prependoffsetslot!(b, 3, type, 0)
+fieldAddDictionary(b::FlatBuffers.Builder, dictionary::FlatBuffers.UOffsetT) =
+ FlatBuffers.prependoffsetslot!(b, 4, dictionary, 0)
+fieldAddChildren(b::FlatBuffers.Builder, children::FlatBuffers.UOffsetT) =
+ FlatBuffers.prependoffsetslot!(b, 5, children, 0)
+fieldStartChildrenVector(b::FlatBuffers.Builder, numelems) =
+ FlatBuffers.startvector!(b, 4, numelems, 4)
+fieldAddCustomMetadata(b::FlatBuffers.Builder, custommetadata::FlatBuffers.UOffsetT) =
+ FlatBuffers.prependoffsetslot!(b, 6, custommetadata, 0)
+fieldStartCustomMetadataVector(b::FlatBuffers.Builder, numelems) =
+ FlatBuffers.startvector!(b, 4, numelems, 4)
fieldEnd(b::FlatBuffers.Builder) = FlatBuffers.endobject!(b)
@enumx Endianness::Int16 Little Big
@@ -602,9 +636,14 @@ function Base.getproperty(x::Schema, field::Symbol)
end
schemaStart(b::FlatBuffers.Builder) = FlatBuffers.startobject!(b, 3)
-schemaAddEndianness(b::FlatBuffers.Builder, endianness::Endianness.T) = FlatBuffers.prependslot!(b, 0, endianness, 0)
-schemaAddFields(b::FlatBuffers.Builder, fields::FlatBuffers.UOffsetT) = FlatBuffers.prependoffsetslot!(b, 1, fields, 0)
-schemaStartFieldsVector(b::FlatBuffers.Builder, numelems) = FlatBuffers.startvector!(b, 4, numelems, 4)
-schemaAddCustomMetadata(b::FlatBuffers.Builder, custommetadata::FlatBuffers.UOffsetT) = FlatBuffers.prependoffsetslot!(b, 2, custommetadata, 0)
-schemaStartCustomMetadataVector(b::FlatBuffers.Builder, numelems) = FlatBuffers.startvector!(b, 4, numelems, 4)
+schemaAddEndianness(b::FlatBuffers.Builder, endianness::Endianness.T) =
+ FlatBuffers.prependslot!(b, 0, endianness, 0)
+schemaAddFields(b::FlatBuffers.Builder, fields::FlatBuffers.UOffsetT) =
+ FlatBuffers.prependoffsetslot!(b, 1, fields, 0)
+schemaStartFieldsVector(b::FlatBuffers.Builder, numelems) =
+ FlatBuffers.startvector!(b, 4, numelems, 4)
+schemaAddCustomMetadata(b::FlatBuffers.Builder, custommetadata::FlatBuffers.UOffsetT) =
+ FlatBuffers.prependoffsetslot!(b, 2, custommetadata, 0)
+schemaStartCustomMetadataVector(b::FlatBuffers.Builder, numelems) =
+ FlatBuffers.startvector!(b, 4, numelems, 4)
schemaEnd(b::FlatBuffers.Builder) = FlatBuffers.endobject!(b)
diff --git a/src/show.jl b/src/show.jl
index 2aea3a5..3a9b556 100644
--- a/src/show.jl
+++ b/src/show.jl
@@ -38,16 +38,27 @@ function Base.show(io::IO, mime::MIME"text/plain", table::Table)
display_rows -= 1 # decrement for metadata header line
display_rows -= min(length(meta), 2) # decrement so we can show at least 2 lines of metadata
end
- print(io, "$(typeof(table)) with $(Tables.rowcount(table)) rows, $(ncols) columns, and ")
+ print(
+ io,
+ "$(typeof(table)) with $(Tables.rowcount(table)) rows, $(ncols) columns, and ",
+ )
sch = Tables.schema(table)
print(io, "schema:\n")
- schema_context = IOContext(io, :print_schema_header => false, :displaysize => (max(display_rows, 3), display_cols))
+ schema_context = IOContext(
+ io,
+ :print_schema_header => false,
+ :displaysize => (max(display_rows, 3), display_cols),
+ )
schema_str = sprint(show, mime, sch; context=schema_context)
print(io, schema_str)
display_rows -= (count("\n", schema_str) + 1) # decrement for number of lines printed
if meta !== nothing
print(io, "\n\nwith metadata given by a ")
- show(IOContext(io, :displaysize => (max(display_rows, 5), display_cols)), mime, meta)
+ show(
+ IOContext(io, :displaysize => (max(display_rows, 5), display_cols)),
+ mime,
+ meta,
+ )
end
return nothing
end
diff --git a/src/table.jl b/src/table.jl
index e61445c..bbb3825 100644
--- a/src/table.jl
+++ b/src/table.jl
@@ -20,7 +20,8 @@ struct ArrowBlob
len::Int
end
-ArrowBlob(bytes::Vector{UInt8}, pos::Int, len::Nothing) = ArrowBlob(bytes, pos, length(bytes))
+ArrowBlob(bytes::Vector{UInt8}, pos::Int, len::Nothing) =
+ ArrowBlob(bytes, pos, length(bytes))
tobytes(bytes::Vector{UInt8}) = bytes
tobytes(io::IO) = Base.read(io)
@@ -69,8 +70,8 @@ mutable struct Stream
names::Vector{Symbol}
types::Vector{Type}
schema::Union{Nothing,Meta.Schema}
- dictencodings::Dict{Int64, DictEncoding} # dictionary id => DictEncoding
- dictencoded::Dict{Int64, Meta.Field} # dictionary id => field
+ dictencodings::Dict{Int64,DictEncoding} # dictionary id => DictEncoding
+ dictencoded::Dict{Int64,Meta.Field} # dictionary id => field
convert::Bool
compression::Ref{Union{Symbol,Nothing}}
end
@@ -81,10 +82,21 @@ function Stream(inputs::Vector{ArrowBlob}; convert::Bool=true)
names = Symbol[]
types = Type[]
schema = nothing
- dictencodings = Dict{Int64, DictEncoding}()
- dictencoded = Dict{Int64, Meta.Field}()
+ dictencodings = Dict{Int64,DictEncoding}()
+ dictencoded = Dict{Int64,Meta.Field}()
compression = Ref{Union{Symbol,Nothing}}(nothing)
- Stream(inputs, inputindex, batchiterator, names, types, schema, dictencodings, dictencoded, convert, compression)
+ Stream(
+ inputs,
+ inputindex,
+ batchiterator,
+ names,
+ types,
+ schema,
+ dictencodings,
+ dictencoded,
+ convert,
+ compression,
+ )
end
function Stream(input, pos::Integer=1, len=nothing; kw...)
@@ -176,13 +188,20 @@ function Base.iterate(x::Stream, (pos, id)=(1, 0))
# store custom_metadata?
for (i, field) in enumerate(x.schema.fields)
push!(x.names, Symbol(field.name))
- push!(x.types, juliaeltype(field, buildmetadata(field.custom_metadata), x.convert))
+ push!(
+ x.types,
+ juliaeltype(field, buildmetadata(field.custom_metadata), x.convert),
+ )
# recursively find any dictionaries for any fields
getdictionaries!(x.dictencoded, field)
@debugv 1 "parsed column from schema: field = $field"
end
elseif header != x.schema
- throw(ArgumentError("mismatched schemas between different arrow batches: $(x.schema) != $header"))
+ throw(
+ ArgumentError(
+ "mismatched schemas between different arrow batches: $(x.schema) != $header",
+ ),
+ )
end
elseif header isa Meta.DictionaryBatch
id = header.id
@@ -194,17 +213,42 @@ function Base.iterate(x::Stream, (pos, id)=(1, 0))
if haskey(x.dictencodings, id) && header.isDelta
# delta
field = x.dictencoded[id]
- values, _, _ = build(field, field.type, batch, recordbatch, x.dictencodings, Int64(1), Int64(1), x.convert)
+ values, _, _ = build(
+ field,
+ field.type,
+ batch,
+ recordbatch,
+ x.dictencodings,
+ Int64(1),
+ Int64(1),
+ x.convert,
+ )
dictencoding = x.dictencodings[id]
append!(dictencoding.data, values)
continue
end
# new dictencoding or replace
field = x.dictencoded[id]
- values, _, _ = build(field, field.type, batch, recordbatch, x.dictencodings, Int64(1), Int64(1), x.convert)
+ values, _, _ = build(
+ field,
+ field.type,
+ batch,
+ recordbatch,
+ x.dictencodings,
+ Int64(1),
+ Int64(1),
+ x.convert,
+ )
A = ChainedVector([values])
- S = field.dictionary.indexType === nothing ? Int32 : juliaeltype(field, field.dictionary.indexType, false)
- x.dictencodings[id] = DictEncoding{eltype(A), S, typeof(A)}(id, A, field.dictionary.isOrdered, values.metadata)
+ S =
+ field.dictionary.indexType === nothing ? Int32 :
+ juliaeltype(field, field.dictionary.indexType, false)
+ x.dictencodings[id] = DictEncoding{eltype(A),S,typeof(A)}(
+ id,
+ A,
+ field.dictionary.isOrdered,
+ values.metadata,
+ )
@debugv 1 "parsed dictionary batch message: id=$id, data=$values\n"
elseif header isa Meta.RecordBatch
@debugv 1 "parsing record batch message: compression = $(header.compression)"
@@ -230,7 +274,7 @@ function Base.iterate(x::Stream, (pos, id)=(1, 0))
end
end
- lookup = Dict{Symbol, AbstractVector}()
+ lookup = Dict{Symbol,AbstractVector}()
types = Type[]
for (nm, col) in zip(x.names, columns)
lookup[nm] = col
@@ -267,16 +311,30 @@ struct Table <: Tables.AbstractColumns
names::Vector{Symbol}
types::Vector{Type}
columns::Vector{AbstractVector}
- lookup::Dict{Symbol, AbstractVector}
+ lookup::Dict{Symbol,AbstractVector}
schema::Ref{Meta.Schema}
metadata::Ref{Union{Nothing,Base.ImmutableDict{String,String}}}
end
-Table() = Table(Symbol[], Type[], AbstractVector[], Dict{Symbol, AbstractVector}(), Ref{Meta.Schema}(), Ref{Union{Nothing,Base.ImmutableDict{String,String}}}(nothing))
+Table() = Table(
+ Symbol[],
+ Type[],
+ AbstractVector[],
+ Dict{Symbol,AbstractVector}(),
+ Ref{Meta.Schema}(),
+ Ref{Union{Nothing,Base.ImmutableDict{String,String}}}(nothing),
+)
function Table(names, types, columns, lookup, schema)
m = isassigned(schema) ? buildmetadata(schema[]) : nothing
- return Table(names, types, columns, lookup, schema, Ref{Union{Nothing,Base.ImmutableDict{String,String}}}(m))
+ return Table(
+ names,
+ types,
+ columns,
+ lookup,
+ schema,
+ Ref{Union{Nothing,Base.ImmutableDict{String,String}}}(m),
+ )
end
names(t::Table) = getfield(t, :names)
@@ -331,14 +389,14 @@ function Base.iterate(tp::TablePartitions, i=1)
i > tp.npartitions && return nothing
tp.npartitions == 1 && return tp.table, i + 1
cols = columns(tp.table)
- newcols = AbstractVector[cols[j].arrays[i] for j in 1:length(cols)]
+ newcols = AbstractVector[cols[j].arrays[i] for j = 1:length(cols)]
nms = names(tp.table)
tbl = Table(
nms,
types(tp.table),
newcols,
- Dict{Symbol, AbstractVector}(nms[i] => newcols[i] for i in 1:length(nms)),
- schema(tp.table)
+ Dict{Symbol,AbstractVector}(nms[i] => newcols[i] for i = 1:length(nms)),
+ schema(tp.table),
)
return tbl, i + 1
end
@@ -346,16 +404,19 @@ end
Tables.partitions(t::Table) = TablePartitions(t)
# high-level user API functions
-Table(input, pos::Integer=1, len=nothing; kw...) = Table([ArrowBlob(tobytes(input), pos, len)]; kw...)
-Table(input::Vector{UInt8}, pos::Integer=1, len=nothing; kw...) = Table([ArrowBlob(tobytes(input), pos, len)]; kw...)
-Table(inputs::Vector; kw...) = Table([ArrowBlob(tobytes(x), 1, nothing) for x in inputs]; kw...)
+Table(input, pos::Integer=1, len=nothing; kw...) =
+ Table([ArrowBlob(tobytes(input), pos, len)]; kw...)
+Table(input::Vector{UInt8}, pos::Integer=1, len=nothing; kw...) =
+ Table([ArrowBlob(tobytes(input), pos, len)]; kw...)
+Table(inputs::Vector; kw...) =
+ Table([ArrowBlob(tobytes(x), 1, nothing) for x in inputs]; kw...)
# will detect whether we're reading a Table from a file or stream
function Table(blobs::Vector{ArrowBlob}; convert::Bool=true)
t = Table()
sch = nothing
- dictencodings = Dict{Int64, DictEncoding}() # dictionary id => DictEncoding
- dictencoded = Dict{Int64, Meta.Field}() # dictionary id => field
+ dictencodings = Dict{Int64,DictEncoding}() # dictionary id => DictEncoding
+ dictencoded = Dict{Int64,Meta.Field}() # dictionary id => field
sync = OrderedSynchronizer()
tsks = Channel{Any}(Inf)
tsk = Threads.@spawn begin
@@ -395,7 +456,11 @@ function Table(blobs::Vector{ArrowBlob}; convert::Bool=true)
sch = header
schema(t)[] = sch
elseif sch != header
- throw(ArgumentError("mismatched schemas between different arrow batches: $sch != $header"))
+ throw(
+ ArgumentError(
+ "mismatched schemas between different arrow batches: $sch != $header",
+ ),
+ )
end
elseif header isa Meta.DictionaryBatch
id = header.id
@@ -404,23 +469,55 @@ function Table(blobs::Vector{ArrowBlob}; convert::Bool=true)
if haskey(dictencodings, id) && header.isDelta
# delta
field = dictencoded[id]
- values, _, _ = build(field, field.type, batch, recordbatch, dictencodings, Int64(1), Int64(1), convert)
+ values, _, _ = build(
+ field,
+ field.type,
+ batch,
+ recordbatch,
+ dictencodings,
+ Int64(1),
+ Int64(1),
+ convert,
+ )
dictencoding = dictencodings[id]
if typeof(dictencoding.data) <: ChainedVector
append!(dictencoding.data, values)
else
A = ChainedVector([dictencoding.data, values])
- S = field.dictionary.indexType === nothing ? Int32 : juliaeltype(field, field.dictionary.indexType, false)
- dictencodings[id] = DictEncoding{eltype(A), S, typeof(A)}(id, A, field.dictionary.isOrdered, values.metadata)
+ S =
+ field.dictionary.indexType === nothing ? Int32 :
+ juliaeltype(field, field.dictionary.indexType, false)
+ dictencodings[id] = DictEncoding{eltype(A),S,typeof(A)}(
+ id,
+ A,
+ field.dictionary.isOrdered,
+ values.metadata,
+ )
end
continue
end
# new dictencoding or replace
field = dictencoded[id]
- values, _, _ = build(field, field.type, batch, recordbatch, dictencodings, Int64(1), Int64(1), convert)
+ values, _, _ = build(
+ field,
+ field.type,
+ batch,
+ recordbatch,
+ dictencodings,
+ Int64(1),
+ Int64(1),
+ convert,
+ )
A = values
- S = field.dictionary.indexType === nothing ? Int32 : juliaeltype(field, field.dictionary.indexType, false)
- dictencodings[id] = DictEncoding{eltype(A), S, typeof(A)}(id, A, field.dictionary.isOrdered, values.metadata)
+ S =
+ field.dictionary.indexType === nothing ? Int32 :
+ juliaeltype(field, field.dictionary.indexType, false)
+ dictencodings[id] = DictEncoding{eltype(A),S,typeof(A)}(
+ id,
+ A,
+ field.dictionary.isOrdered,
+ values.metadata,
+ )
@debugv 1 "parsed dictionary batch message: id=$id, data=$values\n"
elseif header isa Meta.RecordBatch
anyrecordbatches = true
@@ -499,7 +596,7 @@ function Base.iterate(x::BatchIterator, (pos, id)=(x.startpos, 0))
@debugv 1 "not enough bytes left to read Meta.Message"
return nothing
end
- msg = FlatBuffers.getrootas(Meta.Message, x.bytes, pos-1)
+ msg = FlatBuffers.getrootas(Meta.Message, x.bytes, pos - 1)
pos += msglen
# pos now points to message body
@debugv 1 "parsing message: pos = $pos, msglen = $msglen, bodyLength = $(msg.bodyLength)"
@@ -513,7 +610,7 @@ end
struct VectorIterator
schema::Meta.Schema
batch::Batch # batch.msg.header MUST BE RecordBatch
- dictencodings::Dict{Int64, DictEncoding}
+ dictencodings::Dict{Int64,DictEncoding}
convert::Bool
end
@@ -522,11 +619,22 @@ buildmetadata(meta) = toidict(String(kv.key) => String(kv.value) for kv in meta)
buildmetadata(::Nothing) = nothing
buildmetadata(x::AbstractDict) = x
-function Base.iterate(x::VectorIterator, (columnidx, nodeidx, bufferidx)=(Int64(1), Int64(1), Int64(1)))
+function Base.iterate(
+ x::VectorIterator,
+ (columnidx, nodeidx, bufferidx)=(Int64(1), Int64(1), Int64(1)),
+)
columnidx > length(x.schema.fields) && return nothing
field = x.schema.fields[columnidx]
@debugv 2 "building top-level column: field = $(field), columnidx = $columnidx, nodeidx = $nodeidx, bufferidx = $bufferidx"
- A, nodeidx, bufferidx = build(field, x.batch, x.batch.msg.header, x.dictencodings, nodeidx, bufferidx, x.convert)
+ A, nodeidx, bufferidx = build(
+ field,
+ x.batch,
+ x.batch.msg.header,
+ x.dictencodings,
+ nodeidx,
+ bufferidx,
+ x.convert,
+ )
@debugv 2 "built top-level column: A = $(typeof(A)), columnidx = $columnidx, nodeidx = $nodeidx, bufferidx = $bufferidx"
@debugv 3 A
return A, (columnidx + 1, nodeidx, bufferidx)
@@ -534,8 +642,9 @@ end
Base.length(x::VectorIterator) = length(x.schema.fields)
-const ListTypes = Union{Meta.Utf8, Meta.LargeUtf8, Meta.Binary, Meta.LargeBinary, Meta.List, Meta.LargeList}
-const LargeLists = Union{Meta.LargeUtf8, Meta.LargeBinary, Meta.LargeList}
+const ListTypes =
+ Union{Meta.Utf8,Meta.LargeUtf8,Meta.Binary,Meta.LargeBinary,Meta.List,Meta.LargeList}
+const LargeLists = Union{Meta.LargeUtf8,Meta.LargeBinary,Meta.LargeList}
function build(field::Meta.Field, batch, rb, de, nodeidx, bufferidx, convert)
d = field.dictionary
@@ -546,11 +655,18 @@ function build(field::Meta.Field, batch, rb, de, nodeidx, bufferidx, convert)
S = d.indexType === nothing ? Int32 : juliaeltype(field, d.indexType, false)
bytes, indices = reinterp(S, batch, buffer, rb.compression)
encoding = de[d.id]
- A = DictEncoded(bytes, validity, indices, encoding, buildmetadata(field.custom_metadata))
+ A = DictEncoded(
+ bytes,
+ validity,
+ indices,
+ encoding,
+ buildmetadata(field.custom_metadata),
+ )
nodeidx += 1
bufferidx += 1
else
- A, nodeidx, bufferidx = build(field, field.type, batch, rb, de, nodeidx, bufferidx, convert)
+ A, nodeidx, bufferidx =
+ build(field, field.type, batch, rb, de, nodeidx, bufferidx, convert)
end
return A, nodeidx, bufferidx
end
@@ -593,7 +709,9 @@ function uncompress(ptr::Ptr{UInt8}, buffer, compression)
transcode(comp[], encodedbytes, decodedbytes)
end
else
- error("unsupported compression type when reading arrow buffers: $(typeof(compression.codec))")
+ error(
+ "unsupported compression type when reading arrow buffers: $(typeof(compression.codec))",
+ )
end
return len, decodedbytes
end
@@ -624,7 +742,7 @@ function reinterp(::Type{T}, batch, buf, compression) where {T}
end
end
-const SubVector{T, P} = SubArray{T, 1, P, Tuple{UnitRange{Int64}}, true}
+const SubVector{T,P} = SubArray{T,1,P,Tuple{UnitRange{Int64}},true}
function build(f::Meta.Field, L::ListTypes, batch, rb, de, nodeidx, bufferidx, convert)
@debugv 2 "building array: L = $L"
@@ -640,24 +758,37 @@ function build(f::Meta.Field, L::ListTypes, batch, rb, de, nodeidx, bufferidx, c
nodeidx += 1
meta = buildmetadata(f.custom_metadata)
T = juliaeltype(f, meta, convert)
- if L isa Meta.Utf8 || L isa Meta.LargeUtf8 || L isa Meta.Binary || L isa Meta.LargeBinary
+ if L isa Meta.Utf8 ||
+ L isa Meta.LargeUtf8 ||
+ L isa Meta.Binary ||
+ L isa Meta.LargeBinary
buffer = rb.buffers[bufferidx]
bytes, A = reinterp(UInt8, batch, buffer, rb.compression)
bufferidx += 1
else
bytes = UInt8[]
- A, nodeidx, bufferidx = build(f.children[1], batch, rb, de, nodeidx, bufferidx, convert)
+ A, nodeidx, bufferidx =
+ build(f.children[1], batch, rb, de, nodeidx, bufferidx, convert)
# juliaeltype returns Vector for List, translate to SubArray
S = Base.nonmissingtype(T)
if S <: Vector
- ST = SubVector{eltype(A), typeof(A)}
- T = S == T ? ST : Union{Missing, ST}
+ ST = SubVector{eltype(A),typeof(A)}
+ T = S == T ? ST : Union{Missing,ST}
end
end
- return List{T, OT, typeof(A)}(bytes, validity, offsets, A, len, meta), nodeidx, bufferidx
+ return List{T,OT,typeof(A)}(bytes, validity, offsets, A, len, meta), nodeidx, bufferidx
end
-function build(f::Meta.Field, L::Union{Meta.FixedSizeBinary, Meta.FixedSizeList}, batch, rb, de, nodeidx, bufferidx, convert)
+function build(
+ f::Meta.Field,
+ L::Union{Meta.FixedSizeBinary,Meta.FixedSizeList},
+ batch,
+ rb,
+ de,
+ nodeidx,
+ bufferidx,
+ convert,
+)
@debugv 2 "building array: L = $L"
validity = buildbitmap(batch, rb, nodeidx, bufferidx)
bufferidx += 1
@@ -669,11 +800,12 @@ function build(f::Meta.Field, L::Union{Meta.FixedSizeBinary, Meta.FixedSizeList}
bufferidx += 1
else
bytes = UInt8[]
- A, nodeidx, bufferidx = build(f.children[1], batch, rb, de, nodeidx, bufferidx, convert)
+ A, nodeidx, bufferidx =
+ build(f.children[1], batch, rb, de, nodeidx, bufferidx, convert)
end
meta = buildmetadata(f.custom_metadata)
T = juliaeltype(f, meta, convert)
- return FixedSizeList{T, typeof(A)}(bytes, validity, A, len, meta), nodeidx, bufferidx
+ return FixedSizeList{T,typeof(A)}(bytes, validity, A, len, meta), nodeidx, bufferidx
end
function build(f::Meta.Field, L::Meta.Map, batch, rb, de, nodeidx, bufferidx, convert)
@@ -691,7 +823,7 @@ function build(f::Meta.Field, L::Meta.Map, batch, rb, de, nodeidx, bufferidx, co
A, nodeidx, bufferidx = build(f.children[1], batch, rb, de, nodeidx, bufferidx, convert)
meta = buildmetadata(f.custom_metadata)
T = juliaeltype(f, meta, convert)
- return Map{T, OT, typeof(A)}(validity, offsets, A, len, meta), nodeidx, bufferidx
+ return Map{T,OT,typeof(A)}(validity, offsets, A, len, meta), nodeidx, bufferidx
end
function build(f::Meta.Field, L::Meta.Struct, batch, rb, de, nodeidx, bufferidx, convert)
@@ -708,7 +840,7 @@ function build(f::Meta.Field, L::Meta.Struct, batch, rb, de, nodeidx, bufferidx,
data = Tuple(vecs)
meta = buildmetadata(f.custom_metadata)
T = juliaeltype(f, meta, convert)
- return Struct{T, typeof(data)}(validity, data, len, meta), nodeidx, bufferidx
+ return Struct{T,typeof(data)}(validity, data, len, meta), nodeidx, bufferidx
end
function build(f::Meta.Field, L::Meta.Union, batch, rb, de, nodeidx, bufferidx, convert)
@@ -732,9 +864,9 @@ function build(f::Meta.Field, L::Meta.Union, batch, rb, de, nodeidx, bufferidx,
T = juliaeltype(f, meta, convert)
UT = UnionT(f, convert)
if L.mode == Meta.UnionMode.Dense
- B = DenseUnion{T, UT, typeof(data)}(bytes, bytes2, typeIds, offsets, data, meta)
+ B = DenseUnion{T,UT,typeof(data)}(bytes, bytes2, typeIds, offsets, data, meta)
else
- B = SparseUnion{T, UT, typeof(data)}(bytes, typeIds, data, meta)
+ B = SparseUnion{T,UT,typeof(data)}(bytes, typeIds, data, meta)
end
return B, nodeidx, bufferidx
end
@@ -743,7 +875,9 @@ function build(f::Meta.Field, L::Meta.Null, batch, rb, de, nodeidx, bufferidx, c
@debugv 2 "building array: L = $L"
meta = buildmetadata(f.custom_metadata)
T = juliaeltype(f, meta, convert)
- return NullVector{maybemissing(T)}(MissingVector(rb.nodes[nodeidx].length), meta), nodeidx + 1, bufferidx
+ return NullVector{maybemissing(T)}(MissingVector(rb.nodes[nodeidx].length), meta),
+ nodeidx + 1,
+ bufferidx
end
# primitives
diff --git a/src/utils.jl b/src/utils.jl
index f7fb23b..419bd39 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -34,7 +34,9 @@ writearray(io, col) = writearray(io, maybemissing(eltype(col)), col)
function writearray(io::IO, ::Type{T}, col) where {T}
if col isa Vector{T}
n = Base.write(io, col)
- elseif isbitstype(T) && (col isa Vector{Union{T, Missing}} || col isa SentinelVector{T, T, Missing, Vector{T}})
+ elseif isbitstype(T) && (
+ col isa Vector{Union{T,Missing}} || col isa SentinelVector{T,T,Missing,Vector{T}}
+ )
# need to write the non-selector bytes of isbits Union Arrays
n = Base.unsafe_write(io, pointer(col), sizeof(T) * length(col))
elseif col isa ChainedVector
@@ -92,21 +94,23 @@ function readbuffer(t::AbstractVector{UInt8}, pos::Integer, ::Type{T}) where {T}
end
# given a number of unique values; what dict encoding _index_ type is most appropriate
-encodingtype(n) = n < div(typemax(Int8), 2) ? Int8 : n < div(typemax(Int16), 2) ? Int16 : n < div(typemax(Int32), 2) ? Int32 : Int64
+encodingtype(n) =
+ n < div(typemax(Int8), 2) ? Int8 :
+ n < div(typemax(Int16), 2) ? Int16 : n < div(typemax(Int32), 2) ? Int32 : Int64
maybemissing(::Type{T}) where {T} = T === Missing ? Missing : Base.nonmissingtype(T)
-withmissing(U::Union, S) = U >: Missing ? Union{Missing, S} : S
-withmissing(T, S) = T === Missing ? Union{Missing, S} : S
+withmissing(U::Union, S) = U >: Missing ? Union{Missing,S} : S
+withmissing(T, S) = T === Missing ? Union{Missing,S} : S
function getfooter(filebytes)
len = readbuffer(filebytes, length(filebytes) - 9, Int32)
- FlatBuffers.getrootas(Meta.Footer, filebytes[end-(9 + len):end-10], 0)
+ FlatBuffers.getrootas(Meta.Footer, filebytes[(end - (9 + len)):(end - 10)], 0)
end
function getrb(filebytes)
f = getfooter(filebytes)
rb = f.recordBatches[1]
- return filebytes[rb.offset+1:(rb.offset+1+rb.metaDataLength)]
+ return filebytes[(rb.offset + 1):(rb.offset + 1 + rb.metaDataLength)]
# FlatBuffers.getrootas(Meta.Message, filebytes, rb.offset)
end
@@ -128,7 +132,7 @@ toidict(x::Base.ImmutableDict) = x
# ref https://github.com/apache/arrow-julia/pull/238#issuecomment-919415809
function toidict(pairs)
- isempty(pairs) && return Base.ImmutableDict{String, String}()
+ isempty(pairs) && return Base.ImmutableDict{String,String}()
dict = Base.ImmutableDict(first(pairs))
for pair in Iterators.drop(pairs, 1)
dict = Base.ImmutableDict(dict, pair)
diff --git a/src/write.jl b/src/write.jl
index a6bd40b..1376ee3 100644
--- a/src/write.jl
+++ b/src/write.jl
@@ -61,12 +61,12 @@ function write(file_path, tbl; kwargs...)
end
struct Message
- msgflatbuf
- columns
- bodylen
+ msgflatbuf::Any
+ columns::Any
+ bodylen::Any
isrecordbatch::Bool
blockmsg::Bool
- headerType
+ headerType::Any
end
struct Block
@@ -135,9 +135,28 @@ mutable struct Writer{T<:IO}
isclosed::Bool
end
-function Base.open(::Type{Writer}, io::T, compress::Union{Nothing,Symbol,LZ4FrameCompressor,ZstdCompressor}, writetofile::Bool, largelists::Bool, denseunions::Bool, dictencode::Bool, dictencodenested::Bool, alignment::Integer, maxdepth::Integer, ntasks::Integer, meta::Union{Nothing,Any}, colmeta::Union{Nothing,Any}, closeio::Bool) where {T<:IO}
+function Base.open(
+ ::Type{Writer},
+ io::T,
+ compress::Union{Nothing,Symbol,LZ4FrameCompressor,ZstdCompressor},
+ writetofile::Bool,
+ largelists::Bool,
+ denseunions::Bool,
+ dictencode::Bool,
+ dictencodenested::Bool,
+ alignment::Integer,
+ maxdepth::Integer,
+ ntasks::Integer,
+ meta::Union{Nothing,Any},
+ colmeta::Union{Nothing,Any},
+ closeio::Bool,
+) where {T<:IO}
if compress isa Symbol && compress !== :lz4 && compress !== :zstd
- throw(ArgumentError("unsupported compress keyword argument value: $compress. Valid values include `:lz4` or `:zstd`"))
+ throw(
+ ArgumentError(
+ "unsupported compress keyword argument value: $compress. Valid values include `:lz4` or `:zstd`",
+ ),
+ )
end
sync = OrderedSynchronizer(2)
msgs = Channel{Message}(ntasks)
@@ -147,28 +166,86 @@ function Base.open(::Type{Writer}, io::T, compress::Union{Nothing,Symbol,LZ4Fram
blocks = (Block[], Block[])
# start message writing from channel
threaded = Threads.nthreads() > 1
- task = threaded ? (Threads.@spawn for msg in msgs
- Base.write(io, msg, blocks, schema, alignment)
- end) : (@async for msg in msgs
- Base.write(io, msg, blocks, schema, alignment)
- end)
+ task =
+ threaded ? (Threads.@spawn for msg in msgs
+ Base.write(io, msg, blocks, schema, alignment)
+ end) : (@async for msg in msgs
+ Base.write(io, msg, blocks, schema, alignment)
+ end)
anyerror = Threads.Atomic{Bool}(false)
errorref = Ref{Any}()
meta = _normalizemeta(meta)
colmeta = _normalizecolmeta(colmeta)
- return Writer{T}(io, closeio, compress, writetofile, largelists, denseunions, dictencode, dictencodenested, threaded, alignment, maxdepth, meta, colmeta, sync, msgs, schema, firstcols, dictencodings, blocks, task, anyerror, errorref, 1, false)
+ return Writer{T}(
+ io,
+ closeio,
+ compress,
+ writetofile,
+ largelists,
+ denseunions,
+ dictencode,
+ dictencodenested,
+ threaded,
+ alignment,
+ maxdepth,
+ meta,
+ colmeta,
+ sync,
+ msgs,
+ schema,
+ firstcols,
+ dictencodings,
+ blocks,
+ task,
+ anyerror,
+ errorref,
+ 1,
+ false,
+ )
end
-function Base.open(::Type{Writer}, io::IO; compress::Union{Nothing,Symbol,LZ4FrameCompressor,ZstdCompressor}=nothing, file::Bool=true, largelists::Bool=false, denseunions::Bool=true, dictencode::Bool=false, dictencodenested::Bool=false, alignment::Integer=8, maxdepth::Integer=DEFAULT_MAX_DEPTH, ntasks::Integer=typemax(Int32), metadata::Union{Nothing,Any}=nothing, colmetadata::Union{Nothing,Any}=nothing, closeio::Bool=false)
- open(Writer, io, compress, file, largelists, denseunions, dictencode, dictencodenested, alignment, maxdepth, ntasks, metadata, colmetadata, closeio)
+function Base.open(
+ ::Type{Writer},
+ io::IO;
+ compress::Union{Nothing,Symbol,LZ4FrameCompressor,ZstdCompressor}=nothing,
+ file::Bool=true,
+ largelists::Bool=false,
+ denseunions::Bool=true,
+ dictencode::Bool=false,
+ dictencodenested::Bool=false,
+ alignment::Integer=8,
+ maxdepth::Integer=DEFAULT_MAX_DEPTH,
+ ntasks::Integer=typemax(Int32),
+ metadata::Union{Nothing,Any}=nothing,
+ colmetadata::Union{Nothing,Any}=nothing,
+ closeio::Bool=false,
+)
+ open(
+ Writer,
+ io,
+ compress,
+ file,
+ largelists,
+ denseunions,
+ dictencode,
+ dictencodenested,
+ alignment,
+ maxdepth,
+ ntasks,
+ metadata,
+ colmetadata,
+ closeio,
+ )
end
-Base.open(::Type{Writer}, file_path; kwargs...) = open(Writer, open(file_path, "w"); kwargs..., closeio=true)
+Base.open(::Type{Writer}, file_path; kwargs...) =
+ open(Writer, open(file_path, "w"); kwargs..., closeio=true)
function check_errors(writer::Writer)
if writer.anyerror[]
errorref = writer.errorref[]
- @error "error writing arrow data on partition = $(errorref[3])" exception = (errorref[1], errorref[2])
+ @error "error writing arrow data on partition = $(errorref[3])" exception =
+ (errorref[1], errorref[2])
error("fatal error writing arrow data")
end
end
@@ -184,7 +261,18 @@ function write(writer::Writer, source)
Base.write(writer.io, FILE_FORMAT_MAGIC_BYTES, b"\0\0")
end
meta = isnothing(writer.meta) ? getmetadata(source) : writer.meta
- cols = toarrowtable(tblcols, writer.dictencodings, writer.largelists, writer.compress, writer.denseunions, writer.dictencode, writer.dictencodenested, writer.maxdepth, meta, writer.colmeta)
+ cols = toarrowtable(
+ tblcols,
+ writer.dictencodings,
+ writer.largelists,
+ writer.compress,
+ writer.denseunions,
+ writer.dictencode,
+ writer.dictencodenested,
+ writer.maxdepth,
+ meta,
+ writer.colmeta,
+ )
writer.schema[] = Tables.schema(cols)
writer.firstcols[] = cols
put!(writer.msgs, makeschemamsg(writer.schema[], cols))
@@ -194,7 +282,13 @@ function write(writer::Writer, source)
# assign dict encoding ids
de = delock.value
dictsch = Tables.Schema((:col,), (eltype(de.data),))
- dictbatchmsg = makedictionarybatchmsg(dictsch, (col=de.data,), id, false, writer.alignment)
+ dictbatchmsg = makedictionarybatchmsg(
+ dictsch,
+ (col=de.data,),
+ id,
+ false,
+ writer.alignment,
+ )
put!(writer.msgs, dictbatchmsg)
end
end
@@ -202,9 +296,45 @@ function write(writer::Writer, source)
put!(writer.msgs, recbatchmsg)
else
if writer.threaded
- Threads.@spawn process_partition(tblcols, writer.dictencodings, writer.largelists, writer.compress, writer.denseunions, writer.dictencode, writer.dictencodenested, writer.maxdepth, writer.sync, writer.msgs, writer.alignment, $(writer.partition_count), writer.schema, writer.errorref, writer.anyerror, writer.meta, writer.colmeta)
+ Threads.@spawn process_partition(
+ tblcols,
+ writer.dictencodings,
+ writer.largelists,
+ writer.compress,
+ writer.denseunions,
+ writer.dictencode,
+ writer.dictencodenested,
+ writer.maxdepth,
+ writer.sync,
+ writer.msgs,
+ writer.alignment,
+ $(writer.partition_count),
+ writer.schema,
+ writer.errorref,
+ writer.anyerror,
+ writer.meta,
+ writer.colmeta,
+ )
else
- @async process_partition(tblcols, writer.dictencodings, writer.largelists, writer.compress, writer.denseunions, writer.dictencode, writer.dictencodenested, writer.maxdepth, writer.sync, writer.msgs, writer.alignment, $(writer.partition_count), writer.schema, writer.errorref, writer.anyerror, writer.meta, writer.colmeta)
+ @async process_partition(
+ tblcols,
+ writer.dictencodings,
+ writer.largelists,
+ writer.compress,
+ writer.denseunions,
+ writer.dictencode,
+ writer.dictencodenested,
+ writer.maxdepth,
+ writer.sync,
+ writer.msgs,
+ writer.alignment,
+ $(writer.partition_count),
+ writer.schema,
+ writer.errorref,
+ writer.anyerror,
+ writer.meta,
+ writer.colmeta,
+ )
end
end
writer.partition_count += 1
@@ -277,22 +407,82 @@ function write(io::IO, tbl; kwargs...)
io
end
-function write(io, source, writetofile, largelists, compress, denseunions, dictencode, dictencodenested, alignment, maxdepth, ntasks, meta, colmeta)
- open(Writer, io, compress, writetofile, largelists, denseunions, dictencode, dictencodenested, alignment, maxdepth, ntasks, meta, colmeta) do writer
+function write(
+ io,
+ source,
+ writetofile,
+ largelists,
+ compress,
+ denseunions,
+ dictencode,
+ dictencodenested,
+ alignment,
+ maxdepth,
+ ntasks,
+ meta,
+ colmeta,
+)
+ open(
+ Writer,
+ io,
+ compress,
+ writetofile,
+ largelists,
+ denseunions,
+ dictencode,
+ dictencodenested,
+ alignment,
+ maxdepth,
+ ntasks,
+ meta,
+ colmeta,
+ ) do writer
write(writer, source)
end
io
end
-function process_partition(cols, dictencodings, largelists, compress, denseunions, dictencode, dictencodenested, maxdepth, sync, msgs, alignment, i, sch, errorref, anyerror, meta, colmeta)
+function process_partition(
+ cols,
+ dictencodings,
+ largelists,
+ compress,
+ denseunions,
+ dictencode,
+ dictencodenested,
+ maxdepth,
+ sync,
+ msgs,
+ alignment,
+ i,
+ sch,
+ errorref,
+ anyerror,
+ meta,
+ colmeta,
+)
try
- cols = toarrowtable(cols, dictencodings, largelists, compress, denseunions, dictencode, dictencodenested, maxdepth, meta, colmeta)
+ cols = toarrowtable(
+ cols,
+ dictencodings,
+ largelists,
+ compress,
+ denseunions,
+ dictencode,
+ dictencodenested,
+ maxdepth,
+ meta,
+ colmeta,
+ )
dictmsgs = nothing
if !isempty(cols.dictencodingdeltas)
dictmsgs = []
for de in cols.dictencodingdeltas
dictsch = Tables.Schema((:col,), (eltype(de.data),))
- push!(dictmsgs, makedictionarybatchmsg(dictsch, (col=de.data,), de.id, true, alignment))
+ push!(
+ dictmsgs,
+ makedictionarybatchmsg(dictsch, (col=de.data,), de.id, true, alignment),
+ )
end
end
put!(sync, i) do
@@ -315,7 +505,18 @@ struct ToArrowTable
dictencodingdeltas::Vector{DictEncoding}
end
-function toarrowtable(cols, dictencodings, largelists, compress, denseunions, dictencode, dictencodenested, maxdepth, meta, colmeta)
+function toarrowtable(
+ cols,
+ dictencodings,
+ largelists,
+ compress,
+ denseunions,
+ dictencode,
+ dictencodenested,
+ maxdepth,
+ meta,
+ colmeta,
+)
@debugv 1 "converting input table to arrow formatted columns"
sch = Tables.schema(cols)
types = collect(sch.types)
@@ -326,14 +527,32 @@ function toarrowtable(cols, dictencodings, largelists, compress, denseunions, di
Tables.eachcolumn(sch, cols) do col, i, nm
oldcolmeta = getmetadata(col)
newcolmeta = isnothing(colmeta) ? oldcolmeta : get(colmeta, nm, oldcolmeta)
- newcol = toarrowvector(col, i, dictencodings, dictencodingdeltas, newcolmeta; compression=compress, largelists=largelists, denseunions=denseunions, dictencode=dictencode, dictencodenested=dictencodenested, maxdepth=maxdepth)
+ newcol = toarrowvector(
+ col,
+ i,
+ dictencodings,
+ dictencodingdeltas,
+ newcolmeta;
+ compression=compress,
+ largelists=largelists,
+ denseunions=denseunions,
+ dictencode=dictencode,
+ dictencodenested=dictencodenested,
+ maxdepth=maxdepth,
+ )
newtypes[i] = eltype(newcol)
newcols[i] = newcol
end
minlen, maxlen = isempty(newcols) ? (0, 0) : extrema(length, newcols)
- minlen == maxlen || throw(ArgumentError("columns with unequal lengths detected: $minlen < $maxlen"))
+ minlen == maxlen ||
+ throw(ArgumentError("columns with unequal lengths detected: $minlen < $maxlen"))
meta = _normalizemeta(meta)
- return ToArrowTable(Tables.Schema(sch.names, newtypes), newcols, meta, dictencodingdeltas)
+ return ToArrowTable(
+ Tables.Schema(sch.names, newtypes),
+ newcols,
+ meta,
+ dictencodingdeltas,
+ )
end
Tables.columns(x::ToArrowTable) = x
@@ -346,7 +565,10 @@ function Base.write(io::IO, msg::Message, blocks, sch, alignment)
metalen = padding(length(msg.msgflatbuf), alignment)
@debugv 1 "writing message: metalen = $metalen, bodylen = $(msg.bodylen), isrecordbatch = $(msg.isrecordbatch), headerType = $(msg.headerType)"
if msg.blockmsg
- push!(blocks[msg.isrecordbatch ? 1 : 2], Block(position(io), metalen + 8, msg.bodylen))
+ push!(
+ blocks[msg.isrecordbatch ? 1 : 2],
+ Block(position(io), metalen + 8, msg.bodylen),
+ )
end
# now write the final message spec out
# continuation byte
@@ -377,7 +599,14 @@ function makemessage(b, headerType, header, columns=nothing, bodylen=0)
# Meta.messageStartCustomMetadataVector(b, num_meta_elems)
msg = Meta.messageEnd(b)
FlatBuffers.finish!(b, msg)
- return Message(FlatBuffers.finishedbytes(b), columns, bodylen, headerType == Meta.RecordBatch, headerType == Meta.RecordBatch || headerType == Meta.DictionaryBatch, headerType)
+ return Message(
+ FlatBuffers.finishedbytes(b),
+ columns,
+ bodylen,
+ headerType == Meta.RecordBatch,
+ headerType == Meta.RecordBatch || headerType == Meta.DictionaryBatch,
+ headerType,
+ )
end
function makeschema(b, sch::Tables.Schema, columns)
@@ -499,13 +728,22 @@ struct Buffer
length::Int64
end
-function makerecordbatchmsg(sch::Tables.Schema{names,types}, columns, alignment) where {names,types}
+function makerecordbatchmsg(
+ sch::Tables.Schema{names,types},
+ columns,
+ alignment,
+) where {names,types}
b = FlatBuffers.Builder(1024)
recordbatch, bodylen = makerecordbatch(b, sch, columns, alignment)
return makemessage(b, Meta.RecordBatch, recordbatch, columns, bodylen)
end
-function makerecordbatch(b, sch::Tables.Schema{names,types}, columns, alignment) where {names,types}
+function makerecordbatch(
+ b,
+ sch::Tables.Schema{names,types},
+ columns,
+ alignment,
+) where {names,types}
nrows = Tables.rowcount(columns)
compress = nothing
@@ -516,7 +754,8 @@ function makerecordbatch(b, sch::Tables.Schema{names,types}, columns, alignment)
if col isa Compressed
compress = compressiontype(col)
end
- bufferoffset = makenodesbuffers!(col, fieldnodes, fieldbuffers, bufferoffset, alignment)
+ bufferoffset =
+ makenodesbuffers!(col, fieldnodes, fieldbuffers, bufferoffset, alignment)
end
@debugv 1 "building record batch message: nrows = $nrows, sch = $sch, compress = $compress"
diff --git a/test/arrowjson.jl b/test/arrowjson.jl
index 7cd7f45..a7eee75 100644
--- a/test/arrowjson.jl
+++ b/test/arrowjson.jl
@@ -36,10 +36,12 @@ mutable struct Int <: Type
end
Int() = Int("", 0, true)
-Type(::Base.Type{T}) where {T <: Integer} = Int("int", 8 * sizeof(T), T <: Signed)
+Type(::Base.Type{T}) where {T<:Integer} = Int("int", 8 * sizeof(T), T <: Signed)
StructTypes.StructType(::Base.Type{Int}) = StructTypes.Mutable()
function juliatype(f, x::Int)
- T = x.bitWidth == 8 ? Int8 : x.bitWidth == 16 ? Int16 :
+ T =
+ x.bitWidth == 8 ? Int8 :
+ x.bitWidth == 16 ? Int16 :
x.bitWidth == 32 ? Int32 : x.bitWidth == 64 ? Int64 : Int128
return x.isSigned ? T : unsigned(T)
end
@@ -49,19 +51,23 @@ struct FloatingPoint <: Type
precision::String
end
-Type(::Base.Type{T}) where {T <: AbstractFloat} = FloatingPoint("floatingpoint", T == Float16 ? "HALF" : T == Float32 ? "SINGLE" : "DOUBLE")
+Type(::Base.Type{T}) where {T<:AbstractFloat} = FloatingPoint(
+ "floatingpoint",
+ T == Float16 ? "HALF" : T == Float32 ? "SINGLE" : "DOUBLE",
+)
StructTypes.StructType(::Base.Type{FloatingPoint}) = StructTypes.Struct()
-juliatype(f, x::FloatingPoint) = x.precision == "HALF" ? Float16 : x.precision == "SINGLE" ? Float32 : Float64
+juliatype(f, x::FloatingPoint) =
+ x.precision == "HALF" ? Float16 : x.precision == "SINGLE" ? Float32 : Float64
struct FixedSizeBinary <: Type
name::String
byteWidth::Int64
end
-Type(::Base.Type{NTuple{N, UInt8}}) where {N} = FixedSizeBinary("fixedsizebinary", N)
-children(::Base.Type{NTuple{N, UInt8}}) where {N} = Field[]
+Type(::Base.Type{NTuple{N,UInt8}}) where {N} = FixedSizeBinary("fixedsizebinary", N)
+children(::Base.Type{NTuple{N,UInt8}}) where {N} = Field[]
StructTypes.StructType(::Base.Type{FixedSizeBinary}) = StructTypes.Struct()
-juliatype(f, x::FixedSizeBinary) = NTuple{x.byteWidth, UInt8}
+juliatype(f, x::FixedSizeBinary) = NTuple{x.byteWidth,UInt8}
struct Decimal <: Type
name::String
@@ -69,26 +75,30 @@ struct Decimal <: Type
scale::Int32
end
-Type(::Base.Type{Arrow.Decimal{P, S, T}}) where {P, S, T} = Decimal("decimal", P, S)
+Type(::Base.Type{Arrow.Decimal{P,S,T}}) where {P,S,T} = Decimal("decimal", P, S)
StructTypes.StructType(::Base.Type{Decimal}) = StructTypes.Struct()
-juliatype(f, x::Decimal) = Arrow.Decimal{x.precision, x.scale, Int128}
+juliatype(f, x::Decimal) = Arrow.Decimal{x.precision,x.scale,Int128}
mutable struct Timestamp <: Type
name::String
unit::String
- timezone::Union{Nothing ,String}
+ timezone::Union{Nothing,String}
end
Timestamp() = Timestamp("", "", nothing)
-unit(U) = U == Arrow.Meta.TimeUnit.SECOND ? "SECOND" :
- U == Arrow.Meta.TimeUnit.MILLISECOND ? "MILLISECOND" :
- U == Arrow.Meta.TimeUnit.MICROSECOND ? "MICROSECOND" : "NANOSECOND"
-Type(::Base.Type{Arrow.Timestamp{U, TZ}}) where {U, TZ} = Timestamp("timestamp", unit(U), TZ === nothing ? nothing : String(TZ))
+unit(U) =
+ U == Arrow.Meta.TimeUnit.SECOND ? "SECOND" :
+ U == Arrow.Meta.TimeUnit.MILLISECOND ? "MILLISECOND" :
+ U == Arrow.Meta.TimeUnit.MICROSECOND ? "MICROSECOND" : "NANOSECOND"
+Type(::Base.Type{Arrow.Timestamp{U,TZ}}) where {U,TZ} =
+ Timestamp("timestamp", unit(U), TZ === nothing ? nothing : String(TZ))
StructTypes.StructType(::Base.Type{Timestamp}) = StructTypes.Mutable()
-unitT(u) = u == "SECOND" ? Arrow.Meta.TimeUnit.SECOND :
- u == "MILLISECOND" ? Arrow.Meta.TimeUnit.MILLISECOND :
- u == "MICROSECOND" ? Arrow.Meta.TimeUnit.MICROSECOND : Arrow.Meta.TimeUnit.NANOSECOND
-juliatype(f, x::Timestamp) = Arrow.Timestamp{unitT(x.unit), x.timezone === nothing ? nothing : Symbol(x.timezone)}
+unitT(u) =
+ u == "SECOND" ? Arrow.Meta.TimeUnit.SECOND :
+ u == "MILLISECOND" ? Arrow.Meta.TimeUnit.MILLISECOND :
+ u == "MICROSECOND" ? Arrow.Meta.TimeUnit.MICROSECOND : Arrow.Meta.TimeUnit.NANOSECOND
+juliatype(f, x::Timestamp) =
+ Arrow.Timestamp{unitT(x.unit),x.timezone === nothing ? nothing : Symbol(x.timezone)}
struct Duration <: Type
name::String
@@ -97,16 +107,20 @@ end
Type(::Base.Type{Arrow.Duration{U}}) where {U} = Duration("duration", unit(U))
StructTypes.StructType(::Base.Type{Duration}) = StructTypes.Struct()
-juliatype(f, x::Duration) = Arrow.Duration{unit%(x.unit)}
+juliatype(f, x::Duration) = Arrow.Duration{unit % (x.unit)}
struct Date <: Type
name::String
unit::String
end
-Type(::Base.Type{Arrow.Date{U, T}}) where {U, T} = Date("date", U == Arrow.Meta.DateUnit.DAY ? "DAY" : "MILLISECOND")
+Type(::Base.Type{Arrow.Date{U,T}}) where {U,T} =
+ Date("date", U == Arrow.Meta.DateUnit.DAY ? "DAY" : "MILLISECOND")
StructTypes.StructType(::Base.Type{Date}) = StructTypes.Struct()
-juliatype(f, x::Date) = Arrow.Date{x.unit == "DAY" ? Arrow.Meta.DateUnit.DAY : Arrow.Meta.DateUnit.MILLISECOND, x.unit == "DAY" ? Int32 : Int64}
+juliatype(f, x::Date) = Arrow.Date{
+ x.unit == "DAY" ? Arrow.Meta.DateUnit.DAY : Arrow.Meta.DateUnit.MILLISECOND,
+ x.unit == "DAY" ? Int32 : Int64,
+}
struct Time <: Type
name::String
@@ -114,18 +128,26 @@ struct Time <: Type
bitWidth::Int64
end
-Type(::Base.Type{Arrow.Time{U, T}}) where {U, T} = Time("time", unit(U), 8 * sizeof(T))
+Type(::Base.Type{Arrow.Time{U,T}}) where {U,T} = Time("time", unit(U), 8 * sizeof(T))
StructTypes.StructType(::Base.Type{Time}) = StructTypes.Struct()
-juliatype(f, x::Time) = Arrow.Time{unitT(x.unit), x.unit == "SECOND" || x.unit == "MILLISECOND" ? Int32 : Int64}
+juliatype(f, x::Time) =
+ Arrow.Time{unitT(x.unit),x.unit == "SECOND" || x.unit == "MILLISECOND" ? Int32 : Int64}
struct Interval <: Type
name::String
unit::String
end
-Type(::Base.Type{Arrow.Interval{U, T}}) where {U, T} = Interval("interval", U == Arrow.Meta.IntervalUnit.YEAR_MONTH ? "YEAR_MONTH" : "DAY_TIME")
+Type(::Base.Type{Arrow.Interval{U,T}}) where {U,T} = Interval(
+ "interval",
+ U == Arrow.Meta.IntervalUnit.YEAR_MONTH ? "YEAR_MONTH" : "DAY_TIME",
+)
StructTypes.StructType(::Base.Type{Interval}) = StructTypes.Struct()
-juliatype(f, x::Interval) = Arrow.Interval{x.unit == "YEAR_MONTH" ? Arrow.Meta.IntervalUnit.YEAR_MONTH : Arrow.Meta.IntervalUnit.DAY_TIME, x.unit == "YEAR_MONTH" ? Int32 : Int64}
+juliatype(f, x::Interval) = Arrow.Interval{
+ x.unit == "YEAR_MONTH" ? Arrow.Meta.IntervalUnit.YEAR_MONTH :
+ Arrow.Meta.IntervalUnit.DAY_TIME,
+ x.unit == "YEAR_MONTH" ? Int32 : Int64,
+}
struct UnionT <: Type
name::String
@@ -133,10 +155,16 @@ struct UnionT <: Type
typIds::Vector{Int64}
end
-Type(::Base.Type{Arrow.UnionT{T, typeIds, U}}) where {T, typeIds, U} = UnionT("union", T == Arrow.Meta.UnionMode.Dense ? "DENSE" : "SPARSE", collect(typeIds))
-children(::Base.Type{Arrow.UnionT{T, typeIds, U}}) where {T, typeIds, U} = Field[Field("", fieldtype(U, i), nothing) for i = 1:fieldcount(U)]
+Type(::Base.Type{Arrow.UnionT{T,typeIds,U}}) where {T,typeIds,U} =
+ UnionT("union", T == Arrow.Meta.UnionMode.Dense ? "DENSE" : "SPARSE", collect(typeIds))
+children(::Base.Type{Arrow.UnionT{T,typeIds,U}}) where {T,typeIds,U} =
+ Field[Field("", fieldtype(U, i), nothing) for i = 1:fieldcount(U)]
StructTypes.StructType(::Base.Type{UnionT}) = StructTypes.Struct()
-juliatype(f, x::UnionT) = Arrow.UnionT{x.mode == "DENSE" ? Arrow.Meta.UnionMode.DENSE : Arrow.Meta.UnionMode.SPARSE, Tuple(x.typeIds), Tuple{(juliatype(y) for y in f.children)...}}
+juliatype(f, x::UnionT) = Arrow.UnionT{
+ x.mode == "DENSE" ? Arrow.Meta.UnionMode.DENSE : Arrow.Meta.UnionMode.SPARSE,
+ Tuple(x.typeIds),
+ Tuple{(juliatype(y) for y in f.children)...},
+}
struct List <: Type
name::String
@@ -159,32 +187,39 @@ struct FixedSizeList <: Type
listSize::Int64
end
-Type(::Base.Type{NTuple{N, T}}) where {N, T} = FixedSizeList("fixedsizelist", N)
-children(::Base.Type{NTuple{N, T}}) where {N, T} = [Field("item", T, nothing)]
+Type(::Base.Type{NTuple{N,T}}) where {N,T} = FixedSizeList("fixedsizelist", N)
+children(::Base.Type{NTuple{N,T}}) where {N,T} = [Field("item", T, nothing)]
StructTypes.StructType(::Base.Type{FixedSizeList}) = StructTypes.Struct()
-juliatype(f, x::FixedSizeList) = NTuple{x.listSize, juliatype(f.children[1])}
+juliatype(f, x::FixedSizeList) = NTuple{x.listSize,juliatype(f.children[1])}
struct Struct <: Type
name::String
end
-Type(::Base.Type{NamedTuple{names, types}}) where {names, types} = Struct("struct")
-children(::Base.Type{NamedTuple{names, types}}) where {names, types} = [Field(names[i], fieldtype(types, i), nothing) for i = 1:length(names)]
+Type(::Base.Type{NamedTuple{names,types}}) where {names,types} = Struct("struct")
+children(::Base.Type{NamedTuple{names,types}}) where {names,types} =
+ [Field(names[i], fieldtype(types, i), nothing) for i = 1:length(names)]
StructTypes.StructType(::Base.Type{Struct}) = StructTypes.Struct()
-juliatype(f, x::Struct) = NamedTuple{Tuple(Symbol(x.name) for x in f.children), Tuple{(juliatype(y) for y in f.children)...}}
+juliatype(f, x::Struct) = NamedTuple{
+ Tuple(Symbol(x.name) for x in f.children),
+ Tuple{(juliatype(y) for y in f.children)...},
+}
struct Map <: Type
name::String
keysSorted::Base.Bool
end
-Type(::Base.Type{Dict{K, V}}) where {K, V} = Map("map", false)
-children(::Base.Type{Dict{K, V}}) where {K, V} = [Field("entries", Arrow.KeyValue{K, V}, nothing)]
+Type(::Base.Type{Dict{K,V}}) where {K,V} = Map("map", false)
+children(::Base.Type{Dict{K,V}}) where {K,V} =
+ [Field("entries", Arrow.KeyValue{K,V}, nothing)]
StructTypes.StructType(::Base.Type{Map}) = StructTypes.Struct()
-juliatype(f, x::Map) = Dict{juliatype(f.children[1].children[1]), juliatype(f.children[1].children[2])}
+juliatype(f, x::Map) =
+ Dict{juliatype(f.children[1].children[1]),juliatype(f.children[1].children[2])}
-Type(::Base.Type{Arrow.KeyValue{K, V}}) where {K, V} = Struct("struct")
-children(::Base.Type{Arrow.KeyValue{K, V}}) where {K, V} = [Field("key", K, nothing), Field("value", V, nothing)]
+Type(::Base.Type{Arrow.KeyValue{K,V}}) where {K,V} = Struct("struct")
+children(::Base.Type{Arrow.KeyValue{K,V}}) where {K,V} =
+ [Field("key", K, nothing), Field("value", V, nothing)]
struct Null <: Type
name::String
@@ -256,12 +291,12 @@ const SUBTYPES = @eval (
largeutf8=LargeUtf8,
binary=Binary,
largebinary=LargeBinary,
- bool=Bool
+ bool=Bool,
)
StructTypes.subtypes(::Base.Type{Type}) = SUBTYPES
-const Metadata = Union{Nothing, Vector{NamedTuple{(:key, :value), Tuple{String, String}}}}
+const Metadata = Union{Nothing,Vector{NamedTuple{(:key, :value),Tuple{String,String}}}}
Metadata() = nothing
mutable struct DictEncoding
@@ -278,17 +313,18 @@ mutable struct Field
nullable::Base.Bool
type::Type
children::Vector{Field}
- dictionary::Union{DictEncoding, Nothing}
+ dictionary::Union{DictEncoding,Nothing}
metadata::Metadata
end
Field() = Field("", true, Type(), Field[], nothing, Metadata())
StructTypes.StructType(::Base.Type{Field}) = StructTypes.Mutable()
-Base.copy(f::Field) = Field(f.name, f.nullable, f.type, f.children, f.dictionary, f.metadata)
+Base.copy(f::Field) =
+ Field(f.name, f.nullable, f.type, f.children, f.dictionary, f.metadata)
function juliatype(f::Field)
T = juliatype(f, f.type)
- return f.nullable ? Union{T, Missing} : T
+ return f.nullable ? Union{T,Missing} : T
end
function Field(nm, ::Base.Type{T}, dictencodings) where {T}
@@ -321,10 +357,10 @@ Base.getindex(x::Offsets, i::Base.Int) = getindex(x.data, i)
mutable struct FieldData
name::String
count::Int64
- VALIDITY::Union{Nothing, Vector{Int8}}
- OFFSET::Union{Nothing, Offsets}
- TYPE_ID::Union{Nothing, Vector{Int8}}
- DATA::Union{Nothing, Vector{Any}}
+ VALIDITY::Union{Nothing,Vector{Int8}}
+ OFFSET::Union{Nothing,Offsets}
+ TYPE_ID::Union{Nothing,Vector{Int8}}
+ DATA::Union{Nothing,Vector{Any}}
children::Vector{FieldData}
end
@@ -350,13 +386,19 @@ function FieldData(nm, ::Base.Type{T}, col, dictencodings) where {T}
VALIDITY = OFFSET = TYPE_ID = DATA = nothing
children = FieldData[]
if S <: Pair
- return FieldData(nm, Vector{Arrow.KeyValue{Arrow._keytype(S), Arrow._valtype(S)}}, (Arrow.KeyValue(k, v) for (k, v) in pairs(col)))
+ return FieldData(
+ nm,
+ Vector{Arrow.KeyValue{Arrow._keytype(S),Arrow._valtype(S)}},
+ (Arrow.KeyValue(k, v) for (k, v) in pairs(col)),
+ )
elseif S !== Missing
# VALIDITY
VALIDITY = Int8[!ismissing(x) for x in col]
# OFFSET
if S <: Vector || S == String
- lenfun = S == String ? x->ismissing(x) ? 0 : sizeof(x) : x->ismissing(x) ? 0 : length(x)
+ lenfun =
+ S == String ? x -> ismissing(x) ? 0 : sizeof(x) :
+ x -> ismissing(x) ? 0 : length(x)
tot = sum(lenfun, col)
if tot > 2147483647
OFFSET = String[String(lenfun(x)) for x in col]
@@ -366,16 +408,45 @@ function FieldData(nm, ::Base.Type{T}, col, dictencodings) where {T}
pushfirst!(OFFSET, 0)
end
OFFSET = Offsets(OFFSET)
- push!(children, FieldData("item", eltype(S), Arrow.flatten(skipmissing(col)), dictencodings))
+ push!(
+ children,
+ FieldData(
+ "item",
+ eltype(S),
+ Arrow.flatten(skipmissing(col)),
+ dictencodings,
+ ),
+ )
elseif S <: NTuple
if Arrow.ArrowTypes.gettype(Arrow.ArrowTypes.ArrowKind(S)) == UInt8
- DATA = [ismissing(x) ? Arrow.ArrowTypes.default(S) : String(collect(x)) for x in col]
+ DATA = [
+ ismissing(x) ? Arrow.ArrowTypes.default(S) : String(collect(x)) for
+ x in col
+ ]
else
- push!(children, FieldData("item", Arrow.ArrowTypes.gettype(Arrow.ArrowTypes.ArrowKind(S)), Arrow.flatten(coalesce(x, Arrow.ArrowTypes.default(S)) for x in col), dictencodings))
+ push!(
+ children,
+ FieldData(
+ "item",
+ Arrow.ArrowTypes.gettype(Arrow.ArrowTypes.ArrowKind(S)),
+ Arrow.flatten(
+ coalesce(x, Arrow.ArrowTypes.default(S)) for x in col
+ ),
+ dictencodings,
+ ),
+ )
end
elseif S <: NamedTuple
for (nm, typ) in zip(fieldnames(S), fieldtypes(S))
- push!(children, FieldData(String(nm), typ, (getfield(x, nm) for x in col), dictencodings))
+ push!(
+ children,
+ FieldData(
+ String(nm),
+ typ,
+ (getfield(x, nm) for x in col),
+ dictencodings,
+ ),
+ )
end
elseif S <: Arrow.UnionT
U = eltype(S)
@@ -391,17 +462,42 @@ function FieldData(nm, ::Base.Type{T}, col, dictencodings) where {T}
end
for i = 1:fieldcount(U)
SS = fieldtype(U, i)
- push!(children, FieldData("$i", SS, Arrow.filtered(i == 1 ? Union{SS, Missing} : Arrow.maybemissing(SS), col), dictencodings))
+ push!(
+ children,
+ FieldData(
+ "$i",
+ SS,
+ Arrow.filtered(
+ i == 1 ? Union{SS,Missing} : Arrow.maybemissing(SS),
+ col,
+ ),
+ dictencodings,
+ ),
+ )
end
else
for i = 1:fieldcount(U)
SS = fieldtype(U, i)
- push!(children, FieldData("$i", SS, Arrow.replaced(SS, col), dictencodings))
+ push!(
+ children,
+ FieldData("$i", SS, Arrow.replaced(SS, col), dictencodings),
+ )
end
end
elseif S <: KeyValue
- push!(children, FieldData("key", Arrow.keyvalueK(S), (x.key for x in col), dictencodings))
- push!(children, FieldData("value", Arrow.keyvalueV(S), (x.value for x in col), dictencodings))
+ push!(
+ children,
+ FieldData("key", Arrow.keyvalueK(S), (x.key for x in col), dictencodings),
+ )
+ push!(
+ children,
+ FieldData(
+ "value",
+ Arrow.keyvalueV(S),
+ (x.value for x in col),
+ dictencodings,
+ ),
+ )
end
end
return FieldData(nm, len, VALIDITY, OFFSET, TYPE_ID, DATA, children)
@@ -453,7 +549,9 @@ function Tables.partitions(x::DataFile)
# special case empty batches by producing a single DataFile w/ schema
return (DataFile(x.schema, RecordBatch[], x.dictionaries),)
else
- return (DataFile(x.schema, [x.batches[i]], x.dictionaries) for i = 1:length(x.batches))
+ return (
+ DataFile(x.schema, [x.batches[i]], x.dictionaries) for i = 1:length(x.batches)
+ )
end
end
@@ -465,12 +563,20 @@ function Tables.schema(x::DataFile)
return Tables.Schema(names, types)
end
-Tables.columnnames(x::DataFile) = map(x -> Symbol(x.name), x.schema.fields)
+Tables.columnnames(x::DataFile) = map(x -> Symbol(x.name), x.schema.fields)
function Tables.getcolumn(x::DataFile, i::Base.Int)
field = x.schema.fields[i]
type = juliatype(field)
- return ChainedVector(ArrowArray{type}[ArrowArray{type}(field, length(x.batches) > 0 ? x.batches[j].columns[i] : FieldData(), x.dictionaries) for j = 1:length(x.batches)])
+ return ChainedVector(
+ ArrowArray{type}[
+ ArrowArray{type}(
+ field,
+ length(x.batches) > 0 ? x.batches[j].columns[i] : FieldData(),
+ x.dictionaries,
+ ) for j = 1:length(x.batches)
+ ],
+ )
end
function Tables.getcolumn(x::DataFile, nm::Symbol)
@@ -490,7 +596,10 @@ function Base.getindex(x::ArrowArray{T}, i::Base.Int) where {T}
@boundscheck checkbounds(x, i)
S = Base.nonmissingtype(T)
if x.field.dictionary !== nothing
- fielddata = x.dictionaries[findfirst(y -> y.id == x.field.dictionary.id, x.dictionaries)].data.columns[1]
+ fielddata = x.dictionaries[findfirst(
+ y -> y.id == x.field.dictionary.id,
+ x.dictionaries,
+ )].data.columns[1]
field = copy(x.field)
field.dictionary = nothing
idx = x.fielddata.DATA[i] + 1
@@ -504,9 +613,17 @@ function Base.getindex(x::ArrowArray{T}, i::Base.Int) where {T}
typeid = tids[x.fielddata.TYPE_ID[i]]
if Arrow.unionmode(S) == Arrow.Meta.UnionMode.DENSE
off = x.fielddata.OFFSET[i]
- return ArrowArray(x.field.children[typeid+1], x.fielddata.children[typeid+1], x.dictionaries)[off]
+ return ArrowArray(
+ x.field.children[typeid + 1],
+ x.fielddata.children[typeid + 1],
+ x.dictionaries,
+ )[off]
else
- return ArrowArray(x.field.children[typeid+1], x.fielddata.children[typeid+1], x.dictionaries)[i]
+ return ArrowArray(
+ x.field.children[typeid + 1],
+ x.fielddata.children[typeid + 1],
+ x.dictionaries,
+ )[i]
end
end
x.fielddata.VALIDITY[i] == 0 && return missing
@@ -516,7 +633,11 @@ function Base.getindex(x::ArrowArray{T}, i::Base.Int) where {T}
return x.fielddata.DATA[i]
elseif S <: Vector
offs = x.fielddata.OFFSET
- A = ArrowArray{eltype(S)}(x.field.children[1], x.fielddata.children[1], x.dictionaries)
+ A = ArrowArray{eltype(S)}(
+ x.field.children[1],
+ x.fielddata.children[1],
+ x.dictionaries,
+ )
return A[(offs[i] + 1):offs[i + 1]]
elseif S <: Dict
offs = x.fielddata.OFFSET
@@ -525,15 +646,22 @@ function Base.getindex(x::ArrowArray{T}, i::Base.Int) where {T}
elseif S <: Tuple
if Arrow.ArrowTypes.gettype(Arrow.ArrowTypes.ArrowKind(S)) == UInt8
A = x.fielddata.DATA
- return Tuple(map(UInt8, collect(A[i][1:x.field.type.byteWidth])))
+ return Tuple(map(UInt8, collect(A[i][1:(x.field.type.byteWidth)])))
else
sz = x.field.type.listSize
- A = ArrowArray{Arrow.ArrowTypes.gettype(Arrow.ArrowTypes.ArrowKind(S))}(x.field.children[1], x.fielddata.children[1], x.dictionaries)
+ A = ArrowArray{Arrow.ArrowTypes.gettype(Arrow.ArrowTypes.ArrowKind(S))}(
+ x.field.children[1],
+ x.fielddata.children[1],
+ x.dictionaries,
+ )
off = (i - 1) * sz + 1
return Tuple(A[off:(off + sz - 1)])
end
elseif S <: NamedTuple
- data = (ArrowArray(x.field.children[j], x.fielddata.children[j], x.dictionaries)[i] for j = 1:length(x.field.children))
+ data = (
+ ArrowArray(x.field.children[j], x.fielddata.children[j], x.dictionaries)[i] for
+ j = 1:length(x.field.children)
+ )
return NamedTuple{fieldnames(S)}(Tuple(data))
elseif S == Int64 || S == UInt64
return parse(S, x.fielddata.DATA[i])
@@ -556,7 +684,7 @@ function DataFile(source)
metadata = nothing # TODO?
batches = RecordBatch[]
dictionaries = DictionaryBatch[]
- dictencodings = Dict{String, Tuple{Base.Type, DictEncoding}}()
+ dictencodings = Dict{String,Tuple{Base.Type,DictEncoding}}()
dictid = Ref(0)
for (i, tbl1) in Tables.partitions(source)
tbl = Arrow.toarrowtable(Table.Columns(tbl1))
diff --git a/test/dates.jl b/test/dates.jl
index ee1883f..9d33f5b 100644
--- a/test/dates.jl
+++ b/test/dates.jl
@@ -42,7 +42,8 @@ struct WrappedZonedDateTime
x::TimeZones.ZonedDateTime
end
-ArrowTypes.arrowname(::Type{WrappedZonedDateTime}) = Symbol("JuliaLang.WrappedZonedDateTime")
+ArrowTypes.arrowname(::Type{WrappedZonedDateTime}) =
+ Symbol("JuliaLang.WrappedZonedDateTime")
ArrowTypes.JuliaType(::Val{Symbol("JuliaLang.WrappedZonedDateTime")}) = WrappedZonedDateTime
@testset "Date and time wrappers with missing" begin
@@ -52,7 +53,7 @@ ArrowTypes.JuliaType(::Val{Symbol("JuliaLang.WrappedZonedDateTime")}) = WrappedZ
else
time = T(Dates.now())
end
- table = (; x = [missing, missing, time, missing, time])
+ table = (; x=[missing, missing, time, missing, time])
io = Arrow.tobuffer(table)
tbl = Arrow.Table(io)
@test isequal(collect(tbl.x), table.x)
@@ -60,7 +61,18 @@ ArrowTypes.JuliaType(::Val{Symbol("JuliaLang.WrappedZonedDateTime")}) = WrappedZ
end
@testset "`default(T) isa T`" begin
- for T in (Dates.Date, Dates.Time, Dates.DateTime, TimeZones.ZonedDateTime, Dates.Nanosecond, Dates.Millisecond, Dates.Second, Dates.Day, Dates.Month, Dates.Year)
+ for T in (
+ Dates.Date,
+ Dates.Time,
+ Dates.DateTime,
+ TimeZones.ZonedDateTime,
+ Dates.Nanosecond,
+ Dates.Millisecond,
+ Dates.Second,
+ Dates.Day,
+ Dates.Month,
+ Dates.Year,
+ )
@test Arrow.ArrowTypes.default(T) isa T
end
end
diff --git a/test/pyarrow_roundtrip.jl b/test/pyarrow_roundtrip.jl
index 97b48b7..b1b32da 100644
--- a/test/pyarrow_roundtrip.jl
+++ b/test/pyarrow_roundtrip.jl
@@ -22,36 +22,36 @@ include(joinpath(dirname(pathof(Arrow)), "../test/testtables.jl"))
for (nm, t, writekw, readkw, extratests) in testtables
nm == "unions" && continue
@testset "pyarrow roundtrip: $nm" begin
- io = IOBuffer()
- Arrow.write(io, t; writekw...)
- seekstart(io)
- buf = PyCall.pybytes(take!(io))
- reader = pa.ipc.open_stream(buf)
- sink = pa.BufferOutputStream()
- writer = pa.ipc.new_stream(sink, reader.schema)
- for batch in reader
- writer.write_batch(batch)
- end
- writer.close()
- buf = sink.getvalue()
- jbytes = copy(reinterpret(UInt8, buf))
- tt = Arrow.Table(jbytes)
+ io = IOBuffer()
+ Arrow.write(io, t; writekw...)
+ seekstart(io)
+ buf = PyCall.pybytes(take!(io))
+ reader = pa.ipc.open_stream(buf)
+ sink = pa.BufferOutputStream()
+ writer = pa.ipc.new_stream(sink, reader.schema)
+ for batch in reader
+ writer.write_batch(batch)
+ end
+ writer.close()
+ buf = sink.getvalue()
+ jbytes = copy(reinterpret(UInt8, buf))
+ tt = Arrow.Table(jbytes)
end
@testset "pyarrow roundtrip w/ compression: $nm" begin
- io = IOBuffer()
- Arrow.write(io, t; compress=((:lz4, :zstd)[rand(1:2)]), writekw...)
- seekstart(io)
- buf = PyCall.pybytes(take!(io))
- reader = pa.ipc.open_stream(buf)
- sink = pa.BufferOutputStream()
- writer = pa.ipc.new_stream(sink, reader.schema)
- for batch in reader
- writer.write_batch(batch)
- end
- writer.close()
- buf = sink.getvalue()
- jbytes = copy(reinterpret(UInt8, buf))
- tt = Arrow.Table(jbytes)
+ io = IOBuffer()
+ Arrow.write(io, t; compress=((:lz4, :zstd)[rand(1:2)]), writekw...)
+ seekstart(io)
+ buf = PyCall.pybytes(take!(io))
+ reader = pa.ipc.open_stream(buf)
+ sink = pa.BufferOutputStream()
+ writer = pa.ipc.new_stream(sink, reader.schema)
+ for batch in reader
+ writer.write_batch(batch)
+ end
+ writer.close()
+ buf = sink.getvalue()
+ jbytes = copy(reinterpret(UInt8, buf))
+ tt = Arrow.Table(jbytes)
end
end
diff --git a/test/runtests.jl b/test/runtests.jl
index 9169b81..48ca399 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -46,682 +46,974 @@ struct CustomStruct2{sym}
end
@testset "Arrow" begin
-
-@testset "table roundtrips" begin
-
-for case in testtables
- testtable(case...)
-end
-
-end # @testset "table roundtrips"
-
-@testset "table append" begin
- # skip windows since file locking prevents mktemp cleanup
- if !Sys.iswindows()
+ @testset "table roundtrips" begin
for case in testtables
- testappend(case...)
+ testtable(case...)
end
+ end # @testset "table roundtrips"
- testappend_partitions()
-
- for compression_option in (:lz4, :zstd)
- testappend_compression(compression_option)
- end
- end
-end # @testset "table append"
-
-@testset "arrow json integration tests" begin
-
-for file in readdir(joinpath(dirname(pathof(Arrow)), "../test/arrowjson"))
- jsonfile = joinpath(joinpath(dirname(pathof(Arrow)), "../test/arrowjson"), file)
- @testset "integration test for $jsonfile" begin
- df = ArrowJSON.parsefile(jsonfile);
- io = Arrow.tobuffer(df)
- tbl = Arrow.Table(io; convert=false);
- @test isequal(df, tbl)
- end
-end
-
-end # @testset "arrow json integration tests"
-
-@testset "abstract path" begin
- # Make a custom path type that simulates how AWSS3.jl's S3Path works
- struct CustomPath <: AbstractPath
- path::PosixPath
- end
-
- Base.read(p::CustomPath) = read(p.path)
-
- io = Arrow.tobuffer((col=[0],))
- tt = Arrow.Table(io)
-
- mktempdir() do dir
- p = Path(joinpath(dir, "test.arrow"))
- Arrow.write(p, tt)
- @test isfile(p)
-
+ @testset "table append" begin
# skip windows since file locking prevents mktemp cleanup
if !Sys.iswindows()
- tt2 = Arrow.Table(p)
- @test values(tt) == values(tt2)
-
- tt3 = Arrow.Table(CustomPath(p))
- @test values(tt) == values(tt3)
- end
- end
-end # @testset "abstract path"
-
-@testset "misc" begin
-
-@testset "# multiple record batches" begin
-t = Tables.partitioner(((col1=Union{Int64, Missing}[1,2,3,4,5,6,7,8,9,missing],), (col1=Union{Int64, Missing}[missing,11],)))
-io = Arrow.tobuffer(t)
-tt = Arrow.Table(io)
-@test length(tt) == 1
-@test isequal(tt.col1, vcat([1,2,3,4,5,6,7,8,9,missing], [missing,11]))
-@test eltype(tt.col1) === Union{Int64, Missing}
-
-# Arrow.Stream
-seekstart(io)
-str = Arrow.Stream(io)
-@test eltype(str) == Arrow.Table
-@test !Base.isdone(str)
-state = iterate(str)
-@test state !== nothing
-tt, st = state
-@test length(tt) == 1
-@test isequal(tt.col1, [1,2,3,4,5,6,7,8,9,missing])
-
-state = iterate(str, st)
-@test state !== nothing
-tt, st = state
-@test length(tt) == 1
-@test isequal(tt.col1, [missing,11])
-
-@test iterate(str, st) === nothing
-
-@test isequal(collect(str)[1].col1, [1,2,3,4,5,6,7,8,9,missing])
-@test isequal(collect(str)[2].col1, [missing,11])
-end
+ for case in testtables
+ testappend(case...)
+ end
-@testset "# dictionary batch isDelta" begin
-t = (
- col1=Int64[1,2,3,4],
- col2=Union{String, Missing}["hey", "there", "sailor", missing],
- col3=NamedTuple{(:a, :b), Tuple{Int64, Union{Missing, NamedTuple{(:c,), Tuple{String}}}}}[(a=Int64(1), b=missing), (a=Int64(1), b=missing), (a=Int64(3), b=(c="sailor",)), (a=Int64(4), b=(c="jo-bob",))]
-)
-t2 = (
- col1=Int64[1,2,5,6],
- col2=Union{String, Missing}["hey", "there", "sailor2", missing],
- col3=NamedTuple{(:a, :b), Tuple{Int64, Union{Missing, NamedTuple{(:c,), Tuple{String}}}}}[(a=Int64(1), b=missing), (a=Int64(1), b=missing), (a=Int64(5), b=(c="sailor2",)), (a=Int64(4), b=(c="jo-bob",))]
-)
-tt = Tables.partitioner((t, t2))
-tt = Arrow.Table(Arrow.tobuffer(tt; dictencode=true, dictencodenested=true))
-@test tt.col1 == [1,2,3,4,1,2,5,6]
-@test isequal(tt.col2, ["hey", "there", "sailor", missing, "hey", "there", "sailor2", missing])
-@test isequal(tt.col3, vcat(NamedTuple{(:a, :b), Tuple{Int64, Union{Missing, NamedTuple{(:c,), Tuple{String}}}}}[(a=Int64(1), b=missing), (a=Int64(1), b=missing), (a=Int64(3), b=(c="sailor",)), (a=Int64(4), b=(c="jo-bob",))], NamedTuple{(:a, :b), Tuple{Int64, Union{Missing, NamedTuple{(:c,), Tuple{String}}}}}[(a=Int64(1), b=missing), (a=Int64(1), b=missing), (a=Int64(5), b=(c="sailor2",)), (a=Int64(4), b=(c="jo-bob",))]))
-end
+ testappend_partitions()
-@testset "metadata" begin
-t = (col1=Int64[1,2,3,4,5,6,7,8,9,10],)
-meta = Dict("key1" => "value1", "key2" => "value2")
-meta2 = Dict("colkey1" => "colvalue1", "colkey2" => "colvalue2")
-tt = Arrow.Table(Arrow.tobuffer(t; colmetadata=Dict(:col1 => meta2), metadata=meta))
-@test length(tt) == length(t)
-@test tt.col1 == t.col1
-@test eltype(tt.col1) === Int64
-@test Arrow.getmetadata(tt) == Arrow.toidict(meta)
-@test Arrow.getmetadata(tt.col1) == Arrow.toidict(meta2)
-
-t = (col1=collect(1:10), col2=collect('a':'j'), col3=collect(1:10))
-meta = ("key1" => :value1, :key2 => "value2")
-meta2 = ("colkey1" => :colvalue1, :colkey2 => "colvalue2")
-meta3 = ("colkey3" => :colvalue3,)
-tt = Arrow.Table(Arrow.tobuffer(t; colmetadata=Dict(:col2 => meta2, :col3 => meta3), metadata=meta))
-@test Arrow.getmetadata(tt) == Arrow.toidict(String(k) => String(v) for (k, v) in meta)
-@test Arrow.getmetadata(tt.col1) === nothing
-@test Arrow.getmetadata(tt.col2)["colkey1"] == "colvalue1"
-@test Arrow.getmetadata(tt.col2)["colkey2"] == "colvalue2"
-@test Arrow.getmetadata(tt.col3)["colkey3"] == "colvalue3"
-end
-
-@testset "# custom compressors" begin
-lz4 = Arrow.CodecLz4.LZ4FrameCompressor(; compressionlevel=8)
-Arrow.CodecLz4.TranscodingStreams.initialize(lz4)
-t = (col1=Int64[1,2,3,4,5,6,7,8,9,10],)
-tt = Arrow.Table(Arrow.tobuffer(t; compress=lz4))
-@test length(tt) == length(t)
-@test all(isequal.(values(t), values(tt)))
-
-zstd = Arrow.CodecZstd.ZstdCompressor(; level=8)
-Arrow.CodecZstd.TranscodingStreams.initialize(zstd)
-t = (col1=Int64[1,2,3,4,5,6,7,8,9,10],)
-tt = Arrow.Table(Arrow.tobuffer(t; compress=zstd))
-@test length(tt) == length(t)
-@test all(isequal.(values(t), values(tt)))
-end
-
-@testset "# custom alignment" begin
-t = (col1=Int64[1,2,3,4,5,6,7,8,9,10],)
-tt = Arrow.Table(Arrow.tobuffer(t; alignment=64))
-@test length(tt) == length(t)
-@test all(isequal.(values(t), values(tt)))
-end
-
-@testset "# 53" begin
-s = "a" ^ 100
-t = (a=[SubString(s, 1:10), SubString(s, 11:20)],)
-tt = Arrow.Table(Arrow.tobuffer(t))
-@test tt.a == ["aaaaaaaaaa", "aaaaaaaaaa"]
-end
-
-@testset "# 49" begin
-@test_throws SystemError Arrow.Table("file_that_doesnt_exist")
-@test_throws SystemError Arrow.Table(p"file_that_doesnt_exist")
-end
+ for compression_option in (:lz4, :zstd)
+ testappend_compression(compression_option)
+ end
+ end
+ end # @testset "table append"
+
+ @testset "arrow json integration tests" begin
+ for file in readdir(joinpath(dirname(pathof(Arrow)), "../test/arrowjson"))
+ jsonfile = joinpath(joinpath(dirname(pathof(Arrow)), "../test/arrowjson"), file)
+ @testset "integration test for $jsonfile" begin
+ df = ArrowJSON.parsefile(jsonfile)
+ io = Arrow.tobuffer(df)
+ tbl = Arrow.Table(io; convert=false)
+ @test isequal(df, tbl)
+ end
+ end
+ end # @testset "arrow json integration tests"
-@testset "# 52" begin
-t = (a=Arrow.DictEncode(string.(1:129)),)
-tt = Arrow.Table(Arrow.tobuffer(t))
-end
+ @testset "abstract path" begin
+ # Make a custom path type that simulates how AWSS3.jl's S3Path works
+ struct CustomPath <: AbstractPath
+ path::PosixPath
+ end
-@testset "# 60: unequal column lengths" begin
-io = IOBuffer()
-@test_throws ArgumentError Arrow.write(io, (a = Int[], b = ["asd"], c=collect(1:100)))
-end
+ Base.read(p::CustomPath) = read(p.path)
-@testset "# nullability of custom extension types" begin
-t = (a=['a', missing],)
-tt = Arrow.Table(Arrow.tobuffer(t))
-@test isequal(tt.a, ['a', missing])
-end
+ io = Arrow.tobuffer((col=[0],))
+ tt = Arrow.Table(io)
-@testset "# automatic custom struct serialization/deserialization" begin
-t = (col1=[CustomStruct(1, 2.3, "hey"), CustomStruct(4, 5.6, "there")],)
+ mktempdir() do dir
+ p = Path(joinpath(dir, "test.arrow"))
+ Arrow.write(p, tt)
+ @test isfile(p)
-Arrow.ArrowTypes.arrowname(::Type{CustomStruct}) = Symbol("JuliaLang.CustomStruct")
-Arrow.ArrowTypes.JuliaType(::Val{Symbol("JuliaLang.CustomStruct")}, S) = CustomStruct
-tt = Arrow.Table(Arrow.tobuffer(t))
-@test length(tt) == length(t)
-@test all(isequal.(values(t), values(tt)))
-end
+ # skip windows since file locking prevents mktemp cleanup
+ if !Sys.iswindows()
+ tt2 = Arrow.Table(p)
+ @test values(tt) == values(tt2)
-@testset "# 76" begin
-t = (col1=NamedTuple{(:a,),Tuple{Union{Int,String}}}[(a=1,), (a="x",)],)
-tt = Arrow.Table(Arrow.tobuffer(t))
-@test length(tt) == length(t)
-@test all(isequal.(values(t), values(tt)))
-end
-
-@testset "# 89 etc. - UUID FixedSizeListKind overloads" begin
-@test Arrow.ArrowTypes.gettype(Arrow.ArrowTypes.ArrowKind(UUID)) == UInt8
-@test Arrow.ArrowTypes.getsize(Arrow.ArrowTypes.ArrowKind(UUID)) == 16
-end
-
-@testset "# 98" begin
-t = (a = [Nanosecond(0), Nanosecond(1)], b = [uuid4(), uuid4()], c = [missing, Nanosecond(1)])
-tt = Arrow.Table(Arrow.tobuffer(t))
-@test copy(tt.a) isa Vector{Nanosecond}
-@test copy(tt.b) isa Vector{UUID}
-@test copy(tt.c) isa Vector{Union{Missing,Nanosecond}}
-end
+ tt3 = Arrow.Table(CustomPath(p))
+ @test values(tt) == values(tt3)
+ end
+ end
+ end # @testset "abstract path"
+
+ @testset "misc" begin
+ @testset "# multiple record batches" begin
+ t = Tables.partitioner((
+ (col1=Union{Int64,Missing}[1, 2, 3, 4, 5, 6, 7, 8, 9, missing],),
+ (col1=Union{Int64,Missing}[missing, 11],),
+ ))
+ io = Arrow.tobuffer(t)
+ tt = Arrow.Table(io)
+ @test length(tt) == 1
+ @test isequal(
+ tt.col1,
+ vcat([1, 2, 3, 4, 5, 6, 7, 8, 9, missing], [missing, 11]),
+ )
+ @test eltype(tt.col1) === Union{Int64,Missing}
+
+ # Arrow.Stream
+ seekstart(io)
+ str = Arrow.Stream(io)
+ @test eltype(str) == Arrow.Table
+ @test !Base.isdone(str)
+ state = iterate(str)
+ @test state !== nothing
+ tt, st = state
+ @test length(tt) == 1
+ @test isequal(tt.col1, [1, 2, 3, 4, 5, 6, 7, 8, 9, missing])
+
+ state = iterate(str, st)
+ @test state !== nothing
+ tt, st = state
+ @test length(tt) == 1
+ @test isequal(tt.col1, [missing, 11])
+
+ @test iterate(str, st) === nothing
+
+ @test isequal(collect(str)[1].col1, [1, 2, 3, 4, 5, 6, 7, 8, 9, missing])
+ @test isequal(collect(str)[2].col1, [missing, 11])
+ end
-@testset "# copy on DictEncoding w/ missing values" begin
-x = PooledArray(["hey", missing])
-x2 = Arrow.toarrowvector(x)
-@test isequal(copy(x2), x)
-end
+ @testset "# dictionary batch isDelta" begin
+ t = (
+ col1=Int64[1, 2, 3, 4],
+ col2=Union{String,Missing}["hey", "there", "sailor", missing],
+ col3=NamedTuple{
+ (:a, :b),
+ Tuple{Int64,Union{Missing,NamedTuple{(:c,),Tuple{String}}}},
+ }[
+ (a=Int64(1), b=missing),
+ (a=Int64(1), b=missing),
+ (a=Int64(3), b=(c="sailor",)),
+ (a=Int64(4), b=(c="jo-bob",)),
+ ],
+ )
+ t2 = (
+ col1=Int64[1, 2, 5, 6],
+ col2=Union{String,Missing}["hey", "there", "sailor2", missing],
+ col3=NamedTuple{
+ (:a, :b),
+ Tuple{Int64,Union{Missing,NamedTuple{(:c,),Tuple{String}}}},
+ }[
+ (a=Int64(1), b=missing),
+ (a=Int64(1), b=missing),
+ (a=Int64(5), b=(c="sailor2",)),
+ (a=Int64(4), b=(c="jo-bob",)),
+ ],
+ )
+ tt = Tables.partitioner((t, t2))
+ tt = Arrow.Table(Arrow.tobuffer(tt; dictencode=true, dictencodenested=true))
+ @test tt.col1 == [1, 2, 3, 4, 1, 2, 5, 6]
+ @test isequal(
+ tt.col2,
+ ["hey", "there", "sailor", missing, "hey", "there", "sailor2", missing],
+ )
+ @test isequal(
+ tt.col3,
+ vcat(
+ NamedTuple{
+ (:a, :b),
+ Tuple{Int64,Union{Missing,NamedTuple{(:c,),Tuple{String}}}},
+ }[
+ (a=Int64(1), b=missing),
+ (a=Int64(1), b=missing),
+ (a=Int64(3), b=(c="sailor",)),
+ (a=Int64(4), b=(c="jo-bob",)),
+ ],
+ NamedTuple{
+ (:a, :b),
+ Tuple{Int64,Union{Missing,NamedTuple{(:c,),Tuple{String}}}},
+ }[
+ (a=Int64(1), b=missing),
+ (a=Int64(1), b=missing),
+ (a=Int64(5), b=(c="sailor2",)),
+ (a=Int64(4), b=(c="jo-bob",)),
+ ],
+ ),
+ )
+ end
-@testset "# some dict encoding coverage" begin
-# signed indices for DictEncodedKind #112 #113 #114
-av = Arrow.toarrowvector(PooledArray(repeat(["a", "b"], inner = 5)))
-@test isa(first(av.indices), Signed)
+ @testset "metadata" begin
+ t = (col1=Int64[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],)
+ meta = Dict("key1" => "value1", "key2" => "value2")
+ meta2 = Dict("colkey1" => "colvalue1", "colkey2" => "colvalue2")
+ tt = Arrow.Table(
+ Arrow.tobuffer(t; colmetadata=Dict(:col1 => meta2), metadata=meta),
+ )
+ @test length(tt) == length(t)
+ @test tt.col1 == t.col1
+ @test eltype(tt.col1) === Int64
+ @test Arrow.getmetadata(tt) == Arrow.toidict(meta)
+ @test Arrow.getmetadata(tt.col1) == Arrow.toidict(meta2)
+
+ t = (col1=collect(1:10), col2=collect('a':'j'), col3=collect(1:10))
+ meta = ("key1" => :value1, :key2 => "value2")
+ meta2 = ("colkey1" => :colvalue1, :colkey2 => "colvalue2")
+ meta3 = ("colkey3" => :colvalue3,)
+ tt = Arrow.Table(
+ Arrow.tobuffer(
+ t;
+ colmetadata=Dict(:col2 => meta2, :col3 => meta3),
+ metadata=meta,
+ ),
+ )
+ @test Arrow.getmetadata(tt) ==
+ Arrow.toidict(String(k) => String(v) for (k, v) in meta)
+ @test Arrow.getmetadata(tt.col1) === nothing
+ @test Arrow.getmetadata(tt.col2)["colkey1"] == "colvalue1"
+ @test Arrow.getmetadata(tt.col2)["colkey2"] == "colvalue2"
+ @test Arrow.getmetadata(tt.col3)["colkey3"] == "colvalue3"
+ end
-av = Arrow.toarrowvector(CategoricalArray(repeat(["a", "b"], inner = 5)))
-@test isa(first(av.indices), Signed)
+ @testset "# custom compressors" begin
+ lz4 = Arrow.CodecLz4.LZ4FrameCompressor(; compressionlevel=8)
+ Arrow.CodecLz4.TranscodingStreams.initialize(lz4)
+ t = (col1=Int64[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],)
+ tt = Arrow.Table(Arrow.tobuffer(t; compress=lz4))
+ @test length(tt) == length(t)
+ @test all(isequal.(values(t), values(tt)))
+
+ zstd = Arrow.CodecZstd.ZstdCompressor(; level=8)
+ Arrow.CodecZstd.TranscodingStreams.initialize(zstd)
+ t = (col1=Int64[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],)
+ tt = Arrow.Table(Arrow.tobuffer(t; compress=zstd))
+ @test length(tt) == length(t)
+ @test all(isequal.(values(t), values(tt)))
+ end
-av = Arrow.toarrowvector(CategoricalArray(["a", "bb", missing]))
-@test isa(first(av.indices), Signed)
-@test length(av) == 3
-@test eltype(av) == Union{String, Missing}
+ @testset "# custom alignment" begin
+ t = (col1=Int64[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],)
+ tt = Arrow.Table(Arrow.tobuffer(t; alignment=64))
+ @test length(tt) == length(t)
+ @test all(isequal.(values(t), values(tt)))
+ end
-av = Arrow.toarrowvector(CategoricalArray(["a", "bb", "ccc"]))
-@test isa(first(av.indices), Signed)
-@test length(av) == 3
-@test eltype(av) == String
-end
+ @testset "# 53" begin
+ s = "a"^100
+ t = (a=[SubString(s, 1:10), SubString(s, 11:20)],)
+ tt = Arrow.Table(Arrow.tobuffer(t))
+ @test tt.a == ["aaaaaaaaaa", "aaaaaaaaaa"]
+ end
-@testset "# 120" begin
-x = PooledArray(["hey", missing])
-x2 = Arrow.toarrowvector(x)
-@test eltype(DataAPI.refpool(x2)) == Union{Missing, String}
-@test eltype(DataAPI.levels(x2)) == String
-@test DataAPI.refarray(x2) == [1, 2]
-end
+ @testset "# 49" begin
+ @test_throws SystemError Arrow.Table("file_that_doesnt_exist")
+ @test_throws SystemError Arrow.Table(p"file_that_doesnt_exist")
+ end
-@testset "# 121" begin
-a = PooledArray(repeat(string.('S', 1:130), inner=5), compress=true)
-@test eltype(a.refs) == UInt8
-av = Arrow.toarrowvector(a)
-@test eltype(av.indices) == Int16
-end
+ @testset "# 52" begin
+ t = (a=Arrow.DictEncode(string.(1:129)),)
+ tt = Arrow.Table(Arrow.tobuffer(t))
+ end
-@testset "# 123" begin
-t = (x = collect(zip(rand(10), rand(10))),)
-tt = Arrow.Table(Arrow.tobuffer(t))
-@test tt.x == t.x
-end
+ @testset "# 60: unequal column lengths" begin
+ io = IOBuffer()
+ @test_throws ArgumentError Arrow.write(
+ io,
+ (a=Int[], b=["asd"], c=collect(1:100)),
+ )
+ end
-@testset "# 144" begin
-t = Tables.partitioner(((a=Arrow.DictEncode([1,2,3]),), (a=Arrow.DictEncode(fill(1, 129)),)))
-tt = Arrow.Table(Arrow.tobuffer(t))
-@test length(tt.a) == 132
-end
+ @testset "# nullability of custom extension types" begin
+ t = (a=['a', missing],)
+ tt = Arrow.Table(Arrow.tobuffer(t))
+ @test isequal(tt.a, ['a', missing])
+ end
-@testset "# 126" begin
-t = Tables.partitioner(
- (
- (a=Arrow.toarrowvector(PooledArray([1,2,3 ])),),
- (a=Arrow.toarrowvector(PooledArray([1,2,3,4])),),
- (a=Arrow.toarrowvector(PooledArray([1,2,3,4,5])),),
- )
-)
-tt = Arrow.Table(Arrow.tobuffer(t))
-@test length(tt.a) == 12
-@test tt.a == [1, 2, 3, 1, 2, 3, 4, 1, 2, 3, 4, 5]
-
-t = Tables.partitioner(
- (
- (a=Arrow.toarrowvector(PooledArray([1,2,3 ], signed=true, compress=true)),),
- (a=Arrow.toarrowvector(PooledArray(collect(1:129))),),
- )
-)
-io = IOBuffer()
-@test_logs (:error, "error writing arrow data on partition = 2") begin
- @test_throws ErrorException Arrow.write(io, t)
-end
-end
+ @testset "# automatic custom struct serialization/deserialization" begin
+ t = (col1=[CustomStruct(1, 2.3, "hey"), CustomStruct(4, 5.6, "there")],)
-@testset "# 75" begin
-tbl = Arrow.Table(Arrow.tobuffer((sets = [Set([1,2,3]), Set([1,2,3])],)))
-@test eltype(tbl.sets) <: Set
-end
+ Arrow.ArrowTypes.arrowname(::Type{CustomStruct}) =
+ Symbol("JuliaLang.CustomStruct")
+ Arrow.ArrowTypes.JuliaType(::Val{Symbol("JuliaLang.CustomStruct")}, S) =
+ CustomStruct
+ tt = Arrow.Table(Arrow.tobuffer(t))
+ @test length(tt) == length(t)
+ @test all(isequal.(values(t), values(tt)))
+ end
-@testset "# 85" begin
-tbl = Arrow.Table(Arrow.tobuffer((tups = [(1, 3.14, "hey"), (1, 3.14, "hey")],)))
-@test eltype(tbl.tups) <: Tuple
-end
+ @testset "# 76" begin
+ t = (col1=NamedTuple{(:a,),Tuple{Union{Int,String}}}[(a=1,), (a="x",)],)
+ tt = Arrow.Table(Arrow.tobuffer(t))
+ @test length(tt) == length(t)
+ @test all(isequal.(values(t), values(tt)))
+ end
-@testset "Nothing" begin
-tbl = Arrow.Table(Arrow.tobuffer((nothings=[nothing, nothing, nothing],)))
-@test tbl.nothings == [nothing, nothing, nothing]
-end
+ @testset "# 89 etc. - UUID FixedSizeListKind overloads" begin
+ @test Arrow.ArrowTypes.gettype(Arrow.ArrowTypes.ArrowKind(UUID)) == UInt8
+ @test Arrow.ArrowTypes.getsize(Arrow.ArrowTypes.ArrowKind(UUID)) == 16
+ end
-@testset "arrowmetadata" begin
-# arrowmetadata
-t = (col1=[CustomStruct2{:hey}(1), CustomStruct2{:hey}(2)],)
-ArrowTypes.arrowname(::Type{<:CustomStruct2}) = Symbol("CustomStruct2")
-@test_logs (:warn, r"unsupported ARROW:extension:name type: \"CustomStruct2\"") begin
- tbl = Arrow.Table(Arrow.tobuffer(t))
-end
-@test eltype(tbl.col1) <: NamedTuple
-ArrowTypes.arrowmetadata(::Type{CustomStruct2{sym}}) where {sym} = sym
-ArrowTypes.JuliaType(::Val{:CustomStruct2}, S, meta) = CustomStruct2{Symbol(meta)}
-tbl = Arrow.Table(Arrow.tobuffer(t))
-@test eltype(tbl.col1) == CustomStruct2{:hey}
-end
+ @testset "# 98" begin
+ t = (
+ a=[Nanosecond(0), Nanosecond(1)],
+ b=[uuid4(), uuid4()],
+ c=[missing, Nanosecond(1)],
+ )
+ tt = Arrow.Table(Arrow.tobuffer(t))
+ @test copy(tt.a) isa Vector{Nanosecond}
+ @test copy(tt.b) isa Vector{UUID}
+ @test copy(tt.c) isa Vector{Union{Missing,Nanosecond}}
+ end
-@testset "# 166" begin
-t = (
- col1=[zero(Arrow.Timestamp{Arrow.Meta.TimeUnit.NANOSECOND, nothing})],
-)
-tbl = Arrow.Table(Arrow.tobuffer(t))
-@test_logs (:warn, r"automatically converting Arrow.Timestamp with precision = NANOSECOND") begin
- @test tbl.col1[1] == Dates.DateTime(1970)
-end
-end
+ @testset "# copy on DictEncoding w/ missing values" begin
+ x = PooledArray(["hey", missing])
+ x2 = Arrow.toarrowvector(x)
+ @test isequal(copy(x2), x)
+ end
-@testset "# 95; Arrow.ToTimestamp" begin
-x = [ZonedDateTime(Dates.DateTime(2020), tz"Europe/Paris")]
-c = Arrow.ToTimestamp(x)
-@test eltype(c) == Arrow.Timestamp{Arrow.Flatbuf.TimeUnit.MILLISECOND, Symbol("Europe/Paris")}
-@test c[1] == Arrow.Timestamp{Arrow.Flatbuf.TimeUnit.MILLISECOND, Symbol("Europe/Paris")}(1577833200000)
-end
+ @testset "# some dict encoding coverage" begin
+ # signed indices for DictEncodedKind #112 #113 #114
+ av = Arrow.toarrowvector(PooledArray(repeat(["a", "b"], inner=5)))
+ @test isa(first(av.indices), Signed)
-@testset "# 158" begin
-# arrow ipc stream generated from pyarrow with no record batches
-bytes = UInt8[0xff, 0xff, 0xff, 0xff, 0x78, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x0c, 0x00,
- 0x06, 0x00, 0x05, 0x00, 0x08, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x01, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00,
- 0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00,
- 0x00, 0x00, 0x10, 0x00, 0x14, 0x00, 0x08, 0x00, 0x06, 0x00, 0x07, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x10, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x61, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x07, 0x00, 0x08, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00]
-tbl = Arrow.Table(bytes)
-@test length(tbl.a) == 0
-@test eltype(tbl.a) == Union{Int64, Missing}
-end
+ av = Arrow.toarrowvector(CategoricalArray(repeat(["a", "b"], inner=5)))
+ @test isa(first(av.indices), Signed)
-@testset "# 181" begin
-d = Dict{Int,Int}()
-for i in 1:9
- d = Dict(i => d)
-end
-tbl = (x = [d],)
-msg = "reached nested serialization level (20) deeper than provided max depth argument (19); to increase allowed nesting level, pass `maxdepth=X`"
-@test_throws ErrorException(msg) Arrow.tobuffer(tbl; maxdepth=19)
-@test Arrow.Table(Arrow.tobuffer(tbl; maxdepth=20)).x == tbl.x
-end
+ av = Arrow.toarrowvector(CategoricalArray(["a", "bb", missing]))
+ @test isa(first(av.indices), Signed)
+ @test length(av) == 3
+ @test eltype(av) == Union{String,Missing}
-@testset "# 167" begin
-t = (
- col1=[["boop", "she"], ["boop", "she"], ["boo"]],
-)
-tbl = Arrow.Table(Arrow.tobuffer(t))
-@test eltype(tbl.col1) <: AbstractVector{String}
-end
+ av = Arrow.toarrowvector(CategoricalArray(["a", "bb", "ccc"]))
+ @test isa(first(av.indices), Signed)
+ @test length(av) == 3
+ @test eltype(av) == String
+ end
-@testset "# 200 VersionNumber" begin
- t = (
- col1=[v"1"],
- )
- tbl = Arrow.Table(Arrow.tobuffer(t))
- @test eltype(tbl.col1) == VersionNumber
-end
+ @testset "# 120" begin
+ x = PooledArray(["hey", missing])
+ x2 = Arrow.toarrowvector(x)
+ @test eltype(DataAPI.refpool(x2)) == Union{Missing,String}
+ @test eltype(DataAPI.levels(x2)) == String
+ @test DataAPI.refarray(x2) == [1, 2]
+ end
-@testset "`show`" begin
- str = nothing
- table = (; a = 1:5, b = fill(1.0, 5))
- arrow_table = Arrow.Table(Arrow.tobuffer(table))
- # 2 and 3-arg show with no metadata
- for outer str in (sprint(show, arrow_table),
- sprint(show, MIME"text/plain"(), arrow_table))
- @test length(str) < 100
- @test occursin("5 rows", str)
- @test occursin("2 columns", str)
- @test occursin("Int", str)
- @test occursin("Float64", str)
- @test !occursin("metadata entries", str)
- end
-
- # 2-arg show with metadata
- big_dict = Dict((randstring(rand(5:10)) => randstring(rand(1:3)) for _ = 1:100))
- arrow_table = Arrow.Table(Arrow.tobuffer(table; metadata=big_dict))
- str2 = sprint(show, arrow_table)
- @test length(str2) > length(str)
- @test length(str2) < 200
- @test occursin("metadata entries", str2)
-
- # 3-arg show with metadata
- str3 = sprint(show, MIME"text/plain"(), arrow_table; context = IOContext(IOBuffer(), :displaysize => (24, 100), :limit=>true))
- @test length(str3) < 1000
- # some but not too many `=>`'s for printing the metadata
- @test 5 < length(collect(eachmatch(r"=>", str3))) < 20
+ @testset "# 121" begin
+ a = PooledArray(repeat(string.('S', 1:130), inner=5), compress=true)
+ @test eltype(a.refs) == UInt8
+ av = Arrow.toarrowvector(a)
+ @test eltype(av.indices) == Int16
+ end
-end
+ @testset "# 123" begin
+ t = (x=collect(zip(rand(10), rand(10))),)
+ tt = Arrow.Table(Arrow.tobuffer(t))
+ @test tt.x == t.x
+ end
-@testset "# 194" begin
-@test isempty(Arrow.Table(Arrow.tobuffer(Dict{Symbol, Vector}())))
-end
+ @testset "# 144" begin
+ t = Tables.partitioner((
+ (a=Arrow.DictEncode([1, 2, 3]),),
+ (a=Arrow.DictEncode(fill(1, 129)),),
+ ))
+ tt = Arrow.Table(Arrow.tobuffer(t))
+ @test length(tt.a) == 132
+ end
-@testset "# 229" begin
-struct Foo229{x}
- y::String
- z::Int
-end
-Arrow.ArrowTypes.arrowname(::Type{<:Foo229}) = Symbol("JuliaLang.Foo229")
-Arrow.ArrowTypes.ArrowType(::Type{Foo229{x}}) where {x} = Tuple{String,String,Int}
-Arrow.ArrowTypes.toarrow(row::Foo229{x}) where {x} = (String(x), row.y, row.z)
-Arrow.ArrowTypes.JuliaType(::Val{Symbol("JuliaLang.Foo229")}, ::Any) = Foo229
-Arrow.ArrowTypes.fromarrow(::Type{<:Foo229}, x, y, z) = Foo229{Symbol(x)}(y, z)
-cols = (k1=[Foo229{:a}("a", 1), Foo229{:b}("b", 2)], k2=[Foo229{:c}("c", 3), Foo229{:d}("d", 4)])
-tbl = Arrow.Table(Arrow.tobuffer(cols))
-@test tbl.k1 == cols.k1
-@test tbl.k2 == cols.k2
-end
+ @testset "# 126" begin
+ t = Tables.partitioner((
+ (a=Arrow.toarrowvector(PooledArray([1, 2, 3])),),
+ (a=Arrow.toarrowvector(PooledArray([1, 2, 3, 4])),),
+ (a=Arrow.toarrowvector(PooledArray([1, 2, 3, 4, 5])),),
+ ))
+ tt = Arrow.Table(Arrow.tobuffer(t))
+ @test length(tt.a) == 12
+ @test tt.a == [1, 2, 3, 1, 2, 3, 4, 1, 2, 3, 4, 5]
+
+ t = Tables.partitioner((
+ (
+ a=Arrow.toarrowvector(
+ PooledArray([1, 2, 3], signed=true, compress=true),
+ ),
+ ),
+ (a=Arrow.toarrowvector(PooledArray(collect(1:129))),),
+ ))
+ io = IOBuffer()
+ @test_logs (:error, "error writing arrow data on partition = 2") begin
+ @test_throws ErrorException Arrow.write(io, t)
+ end
+ end
-@testset "# PR 234" begin
-# bugfix parsing primitive arrays
-buf = [
- 0x14,0x00,0x00,0x00,0x00,0x00,0x0e,0x00,0x14,0x00,0x00,0x00,0x10,0x00,0x0c,0x00,0x08,
- 0x00,0x04,0x00,0x0e,0x00,0x00,0x00,0x2c,0x00,0x00,0x00,0x38,0x00,0x00,0x00,0x38,0x00,
- 0x00,0x00,0x38,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
- 0x03,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x02,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x00,
- 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
-]
-
-struct TestData <: Arrow.FlatBuffers.Table
- bytes::Vector{UInt8}
- pos::Base.Int
-end
+ @testset "# 75" begin
+ tbl = Arrow.Table(Arrow.tobuffer((sets=[Set([1, 2, 3]), Set([1, 2, 3])],)))
+ @test eltype(tbl.sets) <: Set
+ end
-function Base.getproperty(x::TestData, field::Symbol)
- if field === :DataInt32
- o = Arrow.FlatBuffers.offset(x, 12)
- o != 0 && return Arrow.FlatBuffers.Array{Int32}(x, o)
- else
- @warn "field $field not supported"
- end
-end
+ @testset "# 85" begin
+ tbl = Arrow.Table(Arrow.tobuffer((tups=[(1, 3.14, "hey"), (1, 3.14, "hey")],)))
+ @test eltype(tbl.tups) <: Tuple
+ end
-d = Arrow.FlatBuffers.getrootas(TestData, buf, 0);
-@test d.DataInt32 == UInt32[1,2,3]
-end
+ @testset "Nothing" begin
+ tbl = Arrow.Table(Arrow.tobuffer((nothings=[nothing, nothing, nothing],)))
+ @test tbl.nothings == [nothing, nothing, nothing]
+ end
-@testset "# test multiple inputs treated as one table" begin
-t = (
- col1=[1, 2, 3, 4, 5],
- col2=[1.2, 2.3, 3.4, 4.5, 5.6],
-)
-tbl = Arrow.Table([Arrow.tobuffer(t), Arrow.tobuffer(t)])
-@test tbl.col1 == [1, 2, 3, 4, 5, 1, 2, 3, 4, 5]
-@test tbl.col2 == [1.2, 2.3, 3.4, 4.5, 5.6, 1.2, 2.3, 3.4, 4.5, 5.6]
-
-# schemas must match between multiple inputs
-t2 = (
- col1=[1.2, 2.3, 3.4, 4.5, 5.6],
-)
-@test_throws ArgumentError Arrow.Table([Arrow.tobuffer(t), Arrow.tobuffer(t2)])
-
-# test multiple inputs treated as one table
-tbls = collect(Arrow.Stream([Arrow.tobuffer(t), Arrow.tobuffer(t)]))
-@test tbls[1].col1 == tbls[2].col1
-@test tbls[1].col2 == tbls[2].col2
-
-# schemas must match between multiple inputs
-t2 = (
- col1=[1.2, 2.3, 3.4, 4.5, 5.6],
-)
-@test_throws ArgumentError collect(Arrow.Stream([Arrow.tobuffer(t), Arrow.tobuffer(t2)]))
-end
+ @testset "arrowmetadata" begin
+ # arrowmetadata
+ t = (col1=[CustomStruct2{:hey}(1), CustomStruct2{:hey}(2)],)
+ ArrowTypes.arrowname(::Type{<:CustomStruct2}) = Symbol("CustomStruct2")
+ @test_logs (:warn, r"unsupported ARROW:extension:name type: \"CustomStruct2\"") begin
+ tbl = Arrow.Table(Arrow.tobuffer(t))
+ end
+ @test eltype(tbl.col1) <: NamedTuple
+ ArrowTypes.arrowmetadata(::Type{CustomStruct2{sym}}) where {sym} = sym
+ ArrowTypes.JuliaType(::Val{:CustomStruct2}, S, meta) =
+ CustomStruct2{Symbol(meta)}
+ tbl = Arrow.Table(Arrow.tobuffer(t))
+ @test eltype(tbl.col1) == CustomStruct2{:hey}
+ end
-@testset "# 253" begin
-# https://github.com/apache/arrow-julia/issues/253
-@test Arrow.toidict(Pair{String, String}[]) == Base.ImmutableDict{String, String}()
-end
+ @testset "# 166" begin
+ t = (col1=[zero(Arrow.Timestamp{Arrow.Meta.TimeUnit.NANOSECOND,nothing})],)
+ tbl = Arrow.Table(Arrow.tobuffer(t))
+ @test_logs (
+ :warn,
+ r"automatically converting Arrow.Timestamp with precision = NANOSECOND",
+ ) begin
+ @test tbl.col1[1] == Dates.DateTime(1970)
+ end
+ end
-@testset "# 232" begin
-# https://github.com/apache/arrow-julia/issues/232
-t = (; x=[Dict(true => 1.32, 1.2 => 0.53495216)])
-@test_throws ArgumentError("`keytype(d)` must be concrete to serialize map-like `d`, but `keytype(d) == Real`") Arrow.tobuffer(t)
-t = (; x=[Dict(32.0 => true, 1.2 => 0.53495216)])
-@test_throws ArgumentError("`valtype(d)` must be concrete to serialize map-like `d`, but `valtype(d) == Real`") Arrow.tobuffer(t)
-t = (; x=[Dict(true => 1.32, 1.2 => true)])
-@test_throws ArgumentError("`keytype(d)` must be concrete to serialize map-like `d`, but `keytype(d) == Real`") Arrow.tobuffer(t)
-end
+ @testset "# 95; Arrow.ToTimestamp" begin
+ x = [ZonedDateTime(Dates.DateTime(2020), tz"Europe/Paris")]
+ c = Arrow.ToTimestamp(x)
+ @test eltype(c) ==
+ Arrow.Timestamp{Arrow.Flatbuf.TimeUnit.MILLISECOND,Symbol("Europe/Paris")}
+ @test c[1] ==
+ Arrow.Timestamp{Arrow.Flatbuf.TimeUnit.MILLISECOND,Symbol("Europe/Paris")}(
+ 1577833200000,
+ )
+ end
-@testset "# 214" begin
-# https://github.com/apache/arrow-julia/issues/214
-t1 = (; x = [(Nanosecond(42),)])
-t2 = Arrow.Table(Arrow.tobuffer(t1))
-t3 = Arrow.Table(Arrow.tobuffer(t2))
-@test t3.x == t1.x
-
-t1 = (; x = [(; a=Nanosecond(i), b=Nanosecond(i+1)) for i = 1:5])
-t2 = Arrow.Table(Arrow.tobuffer(t1))
-t3 = Arrow.Table(Arrow.tobuffer(t2))
-@test t3.x == t1.x
-end
+ @testset "# 158" begin
+ # arrow ipc stream generated from pyarrow with no record batches
+ bytes = UInt8[
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0x78,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x10,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x0a,
+ 0x00,
+ 0x0c,
+ 0x00,
+ 0x06,
+ 0x00,
+ 0x05,
+ 0x00,
+ 0x08,
+ 0x00,
+ 0x0a,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x01,
+ 0x04,
+ 0x00,
+ 0x0c,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x08,
+ 0x00,
+ 0x08,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x04,
+ 0x00,
+ 0x08,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x04,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x01,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x14,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x10,
+ 0x00,
+ 0x14,
+ 0x00,
+ 0x08,
+ 0x00,
+ 0x06,
+ 0x00,
+ 0x07,
+ 0x00,
+ 0x0c,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x10,
+ 0x00,
+ 0x10,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x01,
+ 0x02,
+ 0x10,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x1c,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x04,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x01,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x61,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x08,
+ 0x00,
+ 0x0c,
+ 0x00,
+ 0x08,
+ 0x00,
+ 0x07,
+ 0x00,
+ 0x08,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x01,
+ 0x40,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0xff,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ ]
+ tbl = Arrow.Table(bytes)
+ @test length(tbl.a) == 0
+ @test eltype(tbl.a) == Union{Int64,Missing}
+ end
-@testset "Writer" begin
- io = IOBuffer()
- writer = open(Arrow.Writer, io)
- a = 1:26
- b = 'A':'Z'
- partitionsize = 10
- iter_a = Iterators.partition(a, partitionsize)
- iter_b = Iterators.partition(b, partitionsize)
- for (part_a, part_b) in zip(iter_a, iter_b)
- Arrow.write(writer, (a = part_a, b = part_b))
- end
- close(writer)
- seekstart(io)
- table = Arrow.Table(io)
- @test table.a == collect(a)
- @test table.b == collect(b)
-end
+ @testset "# 181" begin
+ d = Dict{Int,Int}()
+ for i = 1:9
+ d = Dict(i => d)
+ end
+ tbl = (x=[d],)
+ msg = "reached nested serialization level (20) deeper than provided max depth argument (19); to increase allowed nesting level, pass `maxdepth=X`"
+ @test_throws ErrorException(msg) Arrow.tobuffer(tbl; maxdepth=19)
+ @test Arrow.Table(Arrow.tobuffer(tbl; maxdepth=20)).x == tbl.x
+ end
-@testset "# Empty input" begin
-@test Arrow.Table(UInt8[]) isa Arrow.Table
-@test isempty(Tables.rows(Arrow.Table(UInt8[])))
-@test Arrow.Stream(UInt8[]) isa Arrow.Stream
-@test isempty(Tables.partitions(Arrow.Stream(UInt8[])))
-end
+ @testset "# 167" begin
+ t = (col1=[["boop", "she"], ["boop", "she"], ["boo"]],)
+ tbl = Arrow.Table(Arrow.tobuffer(t))
+ @test eltype(tbl.col1) <: AbstractVector{String}
+ end
-@testset "# 324" begin
-# https://github.com/apache/arrow-julia/issues/324
-@test_throws ArgumentError filter!(x -> x > 1, Arrow.toarrowvector([1, 2, 3]))
-end
+ @testset "# 200 VersionNumber" begin
+ t = (col1=[v"1"],)
+ tbl = Arrow.Table(Arrow.tobuffer(t))
+ @test eltype(tbl.col1) == VersionNumber
+ end
-@testset "# 327" begin
-# https://github.com/apache/arrow-julia/issues/327
-zdt = ZonedDateTime(DateTime(2020, 11, 1, 6), tz"America/New_York"; from_utc=true)
-arrow_zdt = ArrowTypes.toarrow(zdt)
-zdt_again = ArrowTypes.fromarrow(ZonedDateTime, arrow_zdt)
-@test zdt == zdt_again
-
-# Check that we still correctly read in old TimeZones
-original_table = (; col = [ ZonedDateTime(DateTime(1, 2, 3, 4, 5, 6), tz"UTC+3") for _ in 1:5])
-table = Arrow.Table(joinpath(@__DIR__, "old_zdt.arrow"))
-@test original_table.col == table.col
-end
+ @testset "`show`" begin
+ str = nothing
+ table = (; a=1:5, b=fill(1.0, 5))
+ arrow_table = Arrow.Table(Arrow.tobuffer(table))
+ # 2 and 3-arg show with no metadata
+ for outer str in
+ (sprint(show, arrow_table), sprint(show, MIME"text/plain"(), arrow_table))
+ @test length(str) < 100
+ @test occursin("5 rows", str)
+ @test occursin("2 columns", str)
+ @test occursin("Int", str)
+ @test occursin("Float64", str)
+ @test !occursin("metadata entries", str)
+ end
+
+ # 2-arg show with metadata
+ big_dict = Dict((randstring(rand(5:10)) => randstring(rand(1:3)) for _ = 1:100))
+ arrow_table = Arrow.Table(Arrow.tobuffer(table; metadata=big_dict))
+ str2 = sprint(show, arrow_table)
+ @test length(str2) > length(str)
+ @test length(str2) < 200
+ @test occursin("metadata entries", str2)
+
+ # 3-arg show with metadata
+ str3 = sprint(
+ show,
+ MIME"text/plain"(),
+ arrow_table;
+ context=IOContext(IOBuffer(), :displaysize => (24, 100), :limit => true),
+ )
+ @test length(str3) < 1000
+ # some but not too many `=>`'s for printing the metadata
+ @test 5 < length(collect(eachmatch(r"=>", str3))) < 20
+ end
-@testset "# 243" begin
-table = (; col = [(; v=v"1"), (; v=v"2"), missing])
-@test isequal(Arrow.Table(Arrow.tobuffer(table)).col, table.col)
-end
+ @testset "# 194" begin
+ @test isempty(Arrow.Table(Arrow.tobuffer(Dict{Symbol,Vector}())))
+ end
-@testset "# 367" begin
-t = (; x=Union{ZonedDateTime,Missing}[missing])
-a = Arrow.Table(Arrow.tobuffer(t))
-@test Tables.schema(a) == Tables.schema(t)
-@test isequal(a.x, t.x)
-end
+ @testset "# 229" begin
+ struct Foo229{x}
+ y::String
+ z::Int
+ end
+ Arrow.ArrowTypes.arrowname(::Type{<:Foo229}) = Symbol("JuliaLang.Foo229")
+ Arrow.ArrowTypes.ArrowType(::Type{Foo229{x}}) where {x} =
+ Tuple{String,String,Int}
+ Arrow.ArrowTypes.toarrow(row::Foo229{x}) where {x} = (String(x), row.y, row.z)
+ Arrow.ArrowTypes.JuliaType(::Val{Symbol("JuliaLang.Foo229")}, ::Any) = Foo229
+ Arrow.ArrowTypes.fromarrow(::Type{<:Foo229}, x, y, z) = Foo229{Symbol(x)}(y, z)
+ cols = (
+ k1=[Foo229{:a}("a", 1), Foo229{:b}("b", 2)],
+ k2=[Foo229{:c}("c", 3), Foo229{:d}("d", 4)],
+ )
+ tbl = Arrow.Table(Arrow.tobuffer(cols))
+ @test tbl.k1 == cols.k1
+ @test tbl.k2 == cols.k2
+ end
-# https://github.com/apache/arrow-julia/issues/414
-df = DataFrame(("$i" => rand(1000) for i in 1:65536)...)
-df_load = Arrow.Table(Arrow.tobuffer(df))
-@test Tables.schema(df) == Tables.schema(df_load)
-for (col1, col2) in zip(Tables.columns(df), Tables.columns(df_load))
- @test col1 == col2
-end
+ @testset "# PR 234" begin
+ # bugfix parsing primitive arrays
+ buf = [
+ 0x14,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x0e,
+ 0x00,
+ 0x14,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x10,
+ 0x00,
+ 0x0c,
+ 0x00,
+ 0x08,
+ 0x00,
+ 0x04,
+ 0x00,
+ 0x0e,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x2c,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x38,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x38,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x38,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x03,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x01,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x02,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x03,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ ]
+
+ struct TestData <: Arrow.FlatBuffers.Table
+ bytes::Vector{UInt8}
+ pos::Base.Int
+ end
+
+ function Base.getproperty(x::TestData, field::Symbol)
+ if field === :DataInt32
+ o = Arrow.FlatBuffers.offset(x, 12)
+ o != 0 && return Arrow.FlatBuffers.Array{Int32}(x, o)
+ else
+ @warn "field $field not supported"
+ end
+ end
+
+ d = Arrow.FlatBuffers.getrootas(TestData, buf, 0)
+ @test d.DataInt32 == UInt32[1, 2, 3]
+ end
-@testset "# 411" begin
-# Vector{UInt8} are written as List{UInt8} in Arrow
-# Base.CodeUnits are written as Binary
-t = (
- a=[[0x00, 0x01], UInt8[], [0x03]],
- am=[[0x00, 0x01], [0x03], missing],
- b=[b"01", b"", b"3"],
- bm=[b"01", b"3", missing],
- c=["a", "b", "c"],
- cm=["a", "c", missing]
-)
-buf = Arrow.tobuffer(t)
-tt = Arrow.Table(buf)
-@test t.a == tt.a
-@test isequal(t.am, tt.am)
-@test t.b == tt.b
-@test isequal(t.bm, tt.bm)
-@test t.c == tt.c
-@test isequal(t.cm, tt.cm)
-@test Arrow.schema(tt)[].fields[1].type isa Arrow.Flatbuf.List
-@test Arrow.schema(tt)[].fields[3].type isa Arrow.Flatbuf.Binary
-pos = position(buf)
-Arrow.append(buf, tt)
-seekstart(buf)
-buf1 = read(buf, pos)
-buf2 = read(buf)
-t1 = Arrow.Table(buf1)
-t2 = Arrow.Table(buf2)
-@test isequal(t1.a, t2.a)
-@test isequal(t1.am, t2.am)
-@test isequal(t1.b, t2.b)
-@test isequal(t1.bm, t2.bm)
-@test isequal(t1.c, t2.c)
-@test isequal(t1.cm, t2.cm)
+ @testset "# test multiple inputs treated as one table" begin
+ t = (col1=[1, 2, 3, 4, 5], col2=[1.2, 2.3, 3.4, 4.5, 5.6])
+ tbl = Arrow.Table([Arrow.tobuffer(t), Arrow.tobuffer(t)])
+ @test tbl.col1 == [1, 2, 3, 4, 5, 1, 2, 3, 4, 5]
+ @test tbl.col2 == [1.2, 2.3, 3.4, 4.5, 5.6, 1.2, 2.3, 3.4, 4.5, 5.6]
+
+ # schemas must match between multiple inputs
+ t2 = (col1=[1.2, 2.3, 3.4, 4.5, 5.6],)
+ @test_throws ArgumentError Arrow.Table([Arrow.tobuffer(t), Arrow.tobuffer(t2)])
+
+ # test multiple inputs treated as one table
+ tbls = collect(Arrow.Stream([Arrow.tobuffer(t), Arrow.tobuffer(t)]))
+ @test tbls[1].col1 == tbls[2].col1
+ @test tbls[1].col2 == tbls[2].col2
+
+ # schemas must match between multiple inputs
+ t2 = (col1=[1.2, 2.3, 3.4, 4.5, 5.6],)
+ @test_throws ArgumentError collect(
+ Arrow.Stream([Arrow.tobuffer(t), Arrow.tobuffer(t2)]),
+ )
+ end
-end
+ @testset "# 253" begin
+ # https://github.com/apache/arrow-julia/issues/253
+ @test Arrow.toidict(Pair{String,String}[]) ==
+ Base.ImmutableDict{String,String}()
+ end
-@testset "# 435" begin
+ @testset "# 232" begin
+ # https://github.com/apache/arrow-julia/issues/232
+ t = (; x=[Dict(true => 1.32, 1.2 => 0.53495216)])
+ @test_throws ArgumentError(
+ "`keytype(d)` must be concrete to serialize map-like `d`, but `keytype(d) == Real`",
+ ) Arrow.tobuffer(t)
+ t = (; x=[Dict(32.0 => true, 1.2 => 0.53495216)])
+ @test_throws ArgumentError(
+ "`valtype(d)` must be concrete to serialize map-like `d`, but `valtype(d) == Real`",
+ ) Arrow.tobuffer(t)
+ t = (; x=[Dict(true => 1.32, 1.2 => true)])
+ @test_throws ArgumentError(
+ "`keytype(d)` must be concrete to serialize map-like `d`, but `keytype(d) == Real`",
+ ) Arrow.tobuffer(t)
+ end
-t = Arrow.Table(joinpath(dirname(pathof(Arrow)), "../test/java_compress_len_neg_one.arrow"))
-@test length(t) == 15
-@test length(t.isA) == 102
+ @testset "# 214" begin
+ # https://github.com/apache/arrow-julia/issues/214
+ t1 = (; x=[(Nanosecond(42),)])
+ t2 = Arrow.Table(Arrow.tobuffer(t1))
+ t3 = Arrow.Table(Arrow.tobuffer(t2))
+ @test t3.x == t1.x
+
+ t1 = (; x=[(; a=Nanosecond(i), b=Nanosecond(i + 1)) for i = 1:5])
+ t2 = Arrow.Table(Arrow.tobuffer(t1))
+ t3 = Arrow.Table(Arrow.tobuffer(t2))
+ @test t3.x == t1.x
+ end
-end
+ @testset "Writer" begin
+ io = IOBuffer()
+ writer = open(Arrow.Writer, io)
+ a = 1:26
+ b = 'A':'Z'
+ partitionsize = 10
+ iter_a = Iterators.partition(a, partitionsize)
+ iter_b = Iterators.partition(b, partitionsize)
+ for (part_a, part_b) in zip(iter_a, iter_b)
+ Arrow.write(writer, (a=part_a, b=part_b))
+ end
+ close(writer)
+ seekstart(io)
+ table = Arrow.Table(io)
+ @test table.a == collect(a)
+ @test table.b == collect(b)
+ end
-@testset "# 293" begin
-
-t = (a = [1, 2, 3], b = [1.0, 2.0, 3.0])
-buf = Arrow.tobuffer(t)
-tbl = Arrow.Table(buf)
-parts = Tables.partitioner((t, t))
-buf2 = Arrow.tobuffer(parts)
-tbl2 = Arrow.Table(buf2)
-for t in Tables.partitions(tbl2)
- @test t.a == tbl.a
- @test t.b == tbl.b
-end
+ @testset "# Empty input" begin
+ @test Arrow.Table(UInt8[]) isa Arrow.Table
+ @test isempty(Tables.rows(Arrow.Table(UInt8[])))
+ @test Arrow.Stream(UInt8[]) isa Arrow.Stream
+ @test isempty(Tables.partitions(Arrow.Stream(UInt8[])))
+ end
-end
+ @testset "# 324" begin
+ # https://github.com/apache/arrow-julia/issues/324
+ @test_throws ArgumentError filter!(x -> x > 1, Arrow.toarrowvector([1, 2, 3]))
+ end
-@testset "# 437" begin
+ @testset "# 327" begin
+ # https://github.com/apache/arrow-julia/issues/327
+ zdt =
+ ZonedDateTime(DateTime(2020, 11, 1, 6), tz"America/New_York"; from_utc=true)
+ arrow_zdt = ArrowTypes.toarrow(zdt)
+ zdt_again = ArrowTypes.fromarrow(ZonedDateTime, arrow_zdt)
+ @test zdt == zdt_again
+
+ # Check that we still correctly read in old TimeZones
+ original_table =
+ (; col=[ZonedDateTime(DateTime(1, 2, 3, 4, 5, 6), tz"UTC+3") for _ = 1:5])
+ table = Arrow.Table(joinpath(@__DIR__, "old_zdt.arrow"))
+ @test original_table.col == table.col
+ end
-t = Arrow.Table(joinpath(dirname(pathof(Arrow)), "../test/java_compressed_zero_length.arrow"))
-@test length(t) == 2
-@test length(t.name) == 0
+ @testset "# 243" begin
+ table = (; col=[(; v=v"1"), (; v=v"2"), missing])
+ @test isequal(Arrow.Table(Arrow.tobuffer(table)).col, table.col)
+ end
-end
+ @testset "# 367" begin
+ t = (; x=Union{ZonedDateTime,Missing}[missing])
+ a = Arrow.Table(Arrow.tobuffer(t))
+ @test Tables.schema(a) == Tables.schema(t)
+ @test isequal(a.x, t.x)
+ end
-@testset "# 458" begin
+ # https://github.com/apache/arrow-julia/issues/414
+ df = DataFrame(("$i" => rand(1000) for i = 1:65536)...)
+ df_load = Arrow.Table(Arrow.tobuffer(df))
+ @test Tables.schema(df) == Tables.schema(df_load)
+ for (col1, col2) in zip(Tables.columns(df), Tables.columns(df_load))
+ @test col1 == col2
+ end
-x = (; a=[[[[1]]]])
-buf = Arrow.tobuffer(x)
-t = Arrow.Table(buf)
-@test t.a[1][1][1][1] == 1
+ @testset "# 411" begin
+ # Vector{UInt8} are written as List{UInt8} in Arrow
+ # Base.CodeUnits are written as Binary
+ t = (
+ a=[[0x00, 0x01], UInt8[], [0x03]],
+ am=[[0x00, 0x01], [0x03], missing],
+ b=[b"01", b"", b"3"],
+ bm=[b"01", b"3", missing],
+ c=["a", "b", "c"],
+ cm=["a", "c", missing],
+ )
+ buf = Arrow.tobuffer(t)
+ tt = Arrow.Table(buf)
+ @test t.a == tt.a
+ @test isequal(t.am, tt.am)
+ @test t.b == tt.b
+ @test isequal(t.bm, tt.bm)
+ @test t.c == tt.c
+ @test isequal(t.cm, tt.cm)
+ @test Arrow.schema(tt)[].fields[1].type isa Arrow.Flatbuf.List
+ @test Arrow.schema(tt)[].fields[3].type isa Arrow.Flatbuf.Binary
+ pos = position(buf)
+ Arrow.append(buf, tt)
+ seekstart(buf)
+ buf1 = read(buf, pos)
+ buf2 = read(buf)
+ t1 = Arrow.Table(buf1)
+ t2 = Arrow.Table(buf2)
+ @test isequal(t1.a, t2.a)
+ @test isequal(t1.am, t2.am)
+ @test isequal(t1.b, t2.b)
+ @test isequal(t1.bm, t2.bm)
+ @test isequal(t1.c, t2.c)
+ @test isequal(t1.cm, t2.cm)
+ end
-end
+ @testset "# 435" begin
+ t = Arrow.Table(
+ joinpath(dirname(pathof(Arrow)), "../test/java_compress_len_neg_one.arrow"),
+ )
+ @test length(t) == 15
+ @test length(t.isA) == 102
+ end
-@testset "# 456" begin
+ @testset "# 293" begin
+ t = (a=[1, 2, 3], b=[1.0, 2.0, 3.0])
+ buf = Arrow.tobuffer(t)
+ tbl = Arrow.Table(buf)
+ parts = Tables.partitioner((t, t))
+ buf2 = Arrow.tobuffer(parts)
+ tbl2 = Arrow.Table(buf2)
+ for t in Tables.partitions(tbl2)
+ @test t.a == tbl.a
+ @test t.b == tbl.b
+ end
+ end
-NT = @NamedTuple{x::Int, y::Union{Missing,Int}}
-data = NT[(x=1,y=2), (x=2,y=missing), (x=3,y=4), (x=4,y=5)]
-t = [(a=1,b=view(data,1:2)), (a=2,b=view(data,3:4)), missing]
-@test Arrow.toarrowvector(t) isa Arrow.Struct
+ @testset "# 437" begin
+ t = Arrow.Table(
+ joinpath(
+ dirname(pathof(Arrow)),
+ "../test/java_compressed_zero_length.arrow",
+ ),
+ )
+ @test length(t) == 2
+ @test length(t.name) == 0
+ end
-end
+ @testset "# 458" begin
+ x = (; a=[[[[1]]]])
+ buf = Arrow.tobuffer(x)
+ t = Arrow.Table(buf)
+ @test t.a[1][1][1][1] == 1
+ end
-# @testset "# 461" begin
+ @testset "# 456" begin
+ NT = @NamedTuple{x::Int, y::Union{Missing,Int}}
+ data = NT[(x=1, y=2), (x=2, y=missing), (x=3, y=4), (x=4, y=5)]
+ t = [(a=1, b=view(data, 1:2)), (a=2, b=view(data, 3:4)), missing]
+ @test Arrow.toarrowvector(t) isa Arrow.Struct
+ end
-# table = (; v=[v"1", v"2", missing])
-# buf = Arrow.tobuffer(table)
-# table2 = Arrow.Table(buf)
-# @test isequal(table.v, table2.v)
+ # @testset "# 461" begin
-# end
+ # table = (; v=[v"1", v"2", missing])
+ # buf = Arrow.tobuffer(table)
+ # table2 = Arrow.Table(buf)
+ # @test isequal(table.v, table2.v)
-end # @testset "misc"
+ # end
+ end # @testset "misc"
end
diff --git a/test/testappend.jl b/test/testappend.jl
index dace951..b289d4c 100644
--- a/test/testappend.jl
+++ b/test/testappend.jl
@@ -17,28 +17,28 @@
function testappend(nm, t, writekw, readkw, extratests)
@testset "append: $nm" begin
- io = Arrow.tobuffer(t; writekw...)
- bytes = read(io)
- mktemp() do path, io
- write(io, bytes)
- close(io)
-
- t1 = Arrow.Table(read(path); readkw...)
- f1 = first(Tables.columns(t1))
- Arrow.append(path, t1; writekw..., readkw...)
- nparts = 0
- for t2 in Arrow.Stream(path)
- @test isequal(f1, first(Tables.columns(t2)))
- nparts += 1
+ io = Arrow.tobuffer(t; writekw...)
+ bytes = read(io)
+ mktemp() do path, io
+ write(io, bytes)
+ close(io)
+
+ t1 = Arrow.Table(read(path); readkw...)
+ f1 = first(Tables.columns(t1))
+ Arrow.append(path, t1; writekw..., readkw...)
+ nparts = 0
+ for t2 in Arrow.Stream(path)
+ @test isequal(f1, first(Tables.columns(t2)))
+ nparts += 1
+ end
+ @test nparts == 2
end
- @test nparts == 2
- end
end
end
function testappend_compression(compression_option)
mktempdir() do path
- testdata = (col1=Int64[1,2,3,4,5,6,7,8,9,10],)
+ testdata = (col1=Int64[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],)
file1 = joinpath(path, "table1.arrow")
file2 = joinpath(path, "table2.arrow")
@@ -68,7 +68,7 @@ end
function testappend_partitions()
mktempdir() do path
- testdata = (col1=Int64[1,2,3,4,5,6,7,8,9,10],)
+ testdata = (col1=Int64[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],)
file1 = joinpath(path, "table1.arrow")
file2 = joinpath(path, "table2.arrow")
open(file1, "w") do io
@@ -89,14 +89,14 @@ function testappend_partitions()
# can append to an empty file
rm(file2)
- for _ in 1:5
+ for _ = 1:5
Arrow.append(file2, arrow_table1)
end
appended_table1 = Arrow.Table(file2)
@test length(Tables.columns(appended_table1)[1]) == 50
# schema must match
- testdata2 = (col2=Int64[1,2,3,4,5,6,7,8,9,10],)
+ testdata2 = (col2=Int64[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],)
open(file2, "w") do io
Arrow.write(io, testdata2; file=false)
end
@@ -114,7 +114,7 @@ function testappend_partitions()
@test length(Tables.columns(arrow_table1)[1]) == 10
@test length(Tables.columns(arrow_table2)[1]) == 10
- @test_throws ArgumentError Arrow.append(file1, arrow_table2; ntasks = -1)
+ @test_throws ArgumentError Arrow.append(file1, arrow_table2; ntasks=-1)
arrow_table2 |> Arrow.append(file1)
arrow_table1 = Arrow.Table(file1)
# now
@@ -124,7 +124,8 @@ function testappend_partitions()
@test Tables.schema(arrow_table1) == Tables.schema(arrow_table2)
@test length(Tables.columns(arrow_table1)[1]) == 20
@test length(Tables.columns(arrow_table2)[1]) == 10
- @test length(collect(Tables.partitions(Arrow.Stream(file1)))) == 2 * length(collect(Tables.partitions(Arrow.Stream(file2))))
+ @test length(collect(Tables.partitions(Arrow.Stream(file1)))) ==
+ 2 * length(collect(Tables.partitions(Arrow.Stream(file2))))
Arrow.append(file2, arrow_table1; ntasks=1) # append with single task
arrow_table2 = Arrow.Table(file2)
diff --git a/test/testtables.jl b/test/testtables.jl
index f143705..1ee5404 100644
--- a/test/testtables.jl
+++ b/test/testtables.jl
@@ -15,243 +15,326 @@
# limitations under the License.
testtables = [
- (
- "basic",
- (col1=Int64[1,2,3,4,5,6,7,8,9,10],),
- NamedTuple(),
- NamedTuple(),
- nothing
- ),
- (
- "missing values",
- (col1=Union{Int64, Missing}[1,2,3,4,5,6,7,8,9,missing],),
- NamedTuple(),
- NamedTuple(),
- nothing
- ),
- (
- "primitive types",
(
- col1=[missing, missing, missing, missing],
- col2=Union{UInt8, Missing}[0, 1, 2, missing],
- col3=Union{UInt16, Missing}[0, 1, 2, missing],
- col4=Union{UInt32, Missing}[0, 1, 2, missing],
- col5=Union{UInt64, Missing}[0, 1, 2, missing],
- col6=Union{Int8, Missing}[0, 1, 2, missing],
- col7=Union{Int16, Missing}[0, 1, 2, missing],
- col8=Union{Int32, Missing}[0, 1, 2, missing],
- col9=Union{Int64, Missing}[0, 1, 2, missing],
- col10=Union{Float16, Missing}[0, 1, 2, missing],
- col11=Union{Float32, Missing}[0, 1, 2, missing],
- col12=Union{Float64, Missing}[0, 1, 2, missing],
- col13=[true, false, true, missing],
+ "basic",
+ (col1=Int64[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],),
+ NamedTuple(),
+ NamedTuple(),
+ nothing,
),
- NamedTuple(),
- NamedTuple(),
- nothing
- ),
- (
- "arrow date/time types",
(
- col14=[zero(Arrow.Decimal{Int32(2), Int32(2), Int128}), zero(Arrow.Decimal{Int32(2), Int32(2), Int128}), zero(Arrow.Decimal{Int32(2), Int32(2), Int128}), missing],
- col15=[zero(Arrow.Date{Arrow.Meta.DateUnit.DAY, Int32}), zero(Arrow.Date{Arrow.Meta.DateUnit.DAY, Int32}), zero(Arrow.Date{Arrow.Meta.DateUnit.DAY, Int32}), missing],
- col16=[zero(Arrow.Time{Arrow.Meta.TimeUnit.SECOND, Int32}), zero(Arrow.Time{Arrow.Meta.TimeUnit.SECOND, Int32}), zero(Arrow.Time{Arrow.Meta.TimeUnit.SECOND, Int32}), missing],
- col17=[zero(Arrow.Timestamp{Arrow.Meta.TimeUnit.SECOND, nothing}), zero(Arrow.Timestamp{Arrow.Meta.TimeUnit.SECOND, nothing}), zero(Arrow.Timestamp{Arrow.Meta.TimeUnit.SECOND, nothing}), missing],
- col18=[zero(Arrow.Interval{Arrow.Meta.IntervalUnit.YEAR_MONTH, Int32}), zero(Arrow.Interval{Arrow.Meta.IntervalUnit.YEAR_MONTH, Int32}), zero(Arrow.Interval{Arrow.Meta.IntervalUnit.YEAR_MONTH, Int32}), missing],
- col19=[zero(Arrow.Duration{Arrow.Meta.TimeUnit.SECOND}), zero(Arrow.Duration{Arrow.Meta.TimeUnit.SECOND}), zero(Arrow.Duration{Arrow.Meta.TimeUnit.SECOND}), missing],
- col20=[zero(Arrow.Date{Arrow.Meta.DateUnit.MILLISECOND, Int64}), zero(Arrow.Date{Arrow.Meta.DateUnit.MILLISECOND, Int64}), zero(Arrow.Date{Arrow.Meta.DateUnit.MILLISECOND, Int64}), missing],
+ "missing values",
+ (col1=Union{Int64,Missing}[1, 2, 3, 4, 5, 6, 7, 8, 9, missing],),
+ NamedTuple(),
+ NamedTuple(),
+ nothing,
),
- NamedTuple(),
- (convert=false,),
- nothing
- ),
- (
- "list types",
(
- col1=Union{String, Missing}["hey", "there", "sailor", missing],
- col2=Union{Vector{UInt8}, Missing}[b"hey", b"there", b"sailor", missing],
- col3=Union{Vector{Int64}, Missing}[Int64[1], Int64[2], Int64[3], missing],
- col4=Union{NTuple{2, Vector{Int64}},Missing}[(Int64[1], Int64[2]), missing, missing, (Int64[3], Int64[4])],
- col5=Union{NTuple{2, UInt8}, Missing}[(0x01, 0x02), (0x03, 0x04), missing, (0x05, 0x06)],
- col6=NamedTuple{(:a, :b), Tuple{Int64, String}}[(a=Int64(1), b="hey"), (a=Int64(2), b="there"), (a=Int64(3), b="sailor"), (a=Int64(4), b="jo-bob")],
+ "primitive types",
+ (
+ col1=[missing, missing, missing, missing],
+ col2=Union{UInt8,Missing}[0, 1, 2, missing],
+ col3=Union{UInt16,Missing}[0, 1, 2, missing],
+ col4=Union{UInt32,Missing}[0, 1, 2, missing],
+ col5=Union{UInt64,Missing}[0, 1, 2, missing],
+ col6=Union{Int8,Missing}[0, 1, 2, missing],
+ col7=Union{Int16,Missing}[0, 1, 2, missing],
+ col8=Union{Int32,Missing}[0, 1, 2, missing],
+ col9=Union{Int64,Missing}[0, 1, 2, missing],
+ col10=Union{Float16,Missing}[0, 1, 2, missing],
+ col11=Union{Float32,Missing}[0, 1, 2, missing],
+ col12=Union{Float64,Missing}[0, 1, 2, missing],
+ col13=[true, false, true, missing],
+ ),
+ NamedTuple(),
+ NamedTuple(),
+ nothing,
),
- NamedTuple(),
- NamedTuple(),
- nothing
- ),
- (
- "empty list types",
(
- col1=[[]],
- col2=[()],
+ "arrow date/time types",
+ (
+ col14=[
+ zero(Arrow.Decimal{Int32(2),Int32(2),Int128}),
+ zero(Arrow.Decimal{Int32(2),Int32(2),Int128}),
+ zero(Arrow.Decimal{Int32(2),Int32(2),Int128}),
+ missing,
+ ],
+ col15=[
+ zero(Arrow.Date{Arrow.Meta.DateUnit.DAY,Int32}),
+ zero(Arrow.Date{Arrow.Meta.DateUnit.DAY,Int32}),
+ zero(Arrow.Date{Arrow.Meta.DateUnit.DAY,Int32}),
+ missing,
+ ],
+ col16=[
+ zero(Arrow.Time{Arrow.Meta.TimeUnit.SECOND,Int32}),
+ zero(Arrow.Time{Arrow.Meta.TimeUnit.SECOND,Int32}),
+ zero(Arrow.Time{Arrow.Meta.TimeUnit.SECOND,Int32}),
+ missing,
+ ],
+ col17=[
+ zero(Arrow.Timestamp{Arrow.Meta.TimeUnit.SECOND,nothing}),
+ zero(Arrow.Timestamp{Arrow.Meta.TimeUnit.SECOND,nothing}),
+ zero(Arrow.Timestamp{Arrow.Meta.TimeUnit.SECOND,nothing}),
+ missing,
+ ],
+ col18=[
+ zero(Arrow.Interval{Arrow.Meta.IntervalUnit.YEAR_MONTH,Int32}),
+ zero(Arrow.Interval{Arrow.Meta.IntervalUnit.YEAR_MONTH,Int32}),
+ zero(Arrow.Interval{Arrow.Meta.IntervalUnit.YEAR_MONTH,Int32}),
+ missing,
+ ],
+ col19=[
+ zero(Arrow.Duration{Arrow.Meta.TimeUnit.SECOND}),
+ zero(Arrow.Duration{Arrow.Meta.TimeUnit.SECOND}),
+ zero(Arrow.Duration{Arrow.Meta.TimeUnit.SECOND}),
+ missing,
+ ],
+ col20=[
+ zero(Arrow.Date{Arrow.Meta.DateUnit.MILLISECOND,Int64}),
+ zero(Arrow.Date{Arrow.Meta.DateUnit.MILLISECOND,Int64}),
+ zero(Arrow.Date{Arrow.Meta.DateUnit.MILLISECOND,Int64}),
+ missing,
+ ],
+ ),
+ NamedTuple(),
+ (convert=false,),
+ nothing,
),
- NamedTuple(),
- NamedTuple(),
- nothing
- ),
- (
- "unions",
(
- col1=Arrow.DenseUnionVector( Union{Int64, Float64, Missing}[1, 2.0, 3, 4.0, missing]),
- col2=Arrow.SparseUnionVector(Union{Int64, Float64, Missing}[1, 2.0, 3, 4.0, missing]),
+ "list types",
+ (
+ col1=Union{String,Missing}["hey", "there", "sailor", missing],
+ col2=Union{Vector{UInt8},Missing}[b"hey", b"there", b"sailor", missing],
+ col3=Union{Vector{Int64},Missing}[Int64[1], Int64[2], Int64[3], missing],
+ col4=Union{NTuple{2,Vector{Int64}},Missing}[
+ (Int64[1], Int64[2]),
+ missing,
+ missing,
+ (Int64[3], Int64[4]),
+ ],
+ col5=Union{NTuple{2,UInt8},Missing}[
+ (0x01, 0x02),
+ (0x03, 0x04),
+ missing,
+ (0x05, 0x06),
+ ],
+ col6=NamedTuple{(:a, :b),Tuple{Int64,String}}[
+ (a=Int64(1), b="hey"),
+ (a=Int64(2), b="there"),
+ (a=Int64(3), b="sailor"),
+ (a=Int64(4), b="jo-bob"),
+ ],
+ ),
+ NamedTuple(),
+ NamedTuple(),
+ nothing,
),
- NamedTuple(),
- NamedTuple(),
- nothing
- ),
- (
- "dict encodings",
+ ("empty list types", (col1=[[]], col2=[()]), NamedTuple(), NamedTuple(), nothing),
(
- col1=Arrow.DictEncode(Int64[4, 5, 6]),
+ "unions",
+ (
+ col1=Arrow.DenseUnionVector(
+ Union{Int64,Float64,Missing}[1, 2.0, 3, 4.0, missing],
+ ),
+ col2=Arrow.SparseUnionVector(
+ Union{Int64,Float64,Missing}[1, 2.0, 3, 4.0, missing],
+ ),
+ ),
+ NamedTuple(),
+ NamedTuple(),
+ nothing,
),
- NamedTuple(),
- NamedTuple(),
- function (tt)
- col1 = copy(tt.col1)
- @test typeof(col1) == PooledVector{Int64, Int8, Vector{Int8}}
- end
- ),
- (
- "more dict encodings",
(
- col1=Arrow.DictEncode(NamedTuple{(:a, :b), Tuple{Int64, Union{String, Missing}}}[(a=Int64(1), b=missing), (a=Int64(1), b=missing), (a=Int64(3), b="sailor"), (a=Int64(4), b="jo-bob")]),
+ "dict encodings",
+ (col1=Arrow.DictEncode(Int64[4, 5, 6]),),
+ NamedTuple(),
+ NamedTuple(),
+ function (tt)
+ col1 = copy(tt.col1)
+ @test typeof(col1) == PooledVector{Int64,Int8,Vector{Int8}}
+ end,
),
- NamedTuple(),
- NamedTuple(),
- nothing
- ),
- (
- "PooledArray",
(
- col1=PooledArray([4,5,6,6]),
+ "more dict encodings",
+ (
+ col1=Arrow.DictEncode(
+ NamedTuple{(:a, :b),Tuple{Int64,Union{String,Missing}}}[
+ (a=Int64(1), b=missing),
+ (a=Int64(1), b=missing),
+ (a=Int64(3), b="sailor"),
+ (a=Int64(4), b="jo-bob"),
+ ],
+ ),
+ ),
+ NamedTuple(),
+ NamedTuple(),
+ nothing,
),
- NamedTuple(),
- NamedTuple(),
- nothing
- ),
- (
- "auto-converting types",
+ ("PooledArray", (col1=PooledArray([4, 5, 6, 6]),), NamedTuple(), NamedTuple(), nothing),
(
- col1=[Date(2001, 1, 2), Date(2010, 10, 10), Date(2020, 12, 1)],
- col2=[Time(1, 1, 2), Time(13, 10, 10), Time(22, 12, 1)],
- col3=[DateTime(2001, 1, 2), DateTime(2010, 10, 10), DateTime(2020, 12, 1)],
- col4=[ZonedDateTime(2001, 1, 2, TimeZone("America/Denver")), ZonedDateTime(2010, 10, 10, TimeZone("America/Denver")), ZonedDateTime(2020, 12, 1, TimeZone("America/Denver"))]
+ "auto-converting types",
+ (
+ col1=[Date(2001, 1, 2), Date(2010, 10, 10), Date(2020, 12, 1)],
+ col2=[Time(1, 1, 2), Time(13, 10, 10), Time(22, 12, 1)],
+ col3=[DateTime(2001, 1, 2), DateTime(2010, 10, 10), DateTime(2020, 12, 1)],
+ col4=[
+ ZonedDateTime(2001, 1, 2, TimeZone("America/Denver")),
+ ZonedDateTime(2010, 10, 10, TimeZone("America/Denver")),
+ ZonedDateTime(2020, 12, 1, TimeZone("America/Denver")),
+ ],
+ ),
+ NamedTuple(),
+ NamedTuple(),
+ nothing,
),
- NamedTuple(),
- NamedTuple(),
- nothing
- ),
- (
- "Map",
(
- col1=[Dict(Int32(1) => Float32(3.14)), missing],
+ "Map",
+ (col1=[Dict(Int32(1) => Float32(3.14)), missing],),
+ NamedTuple(),
+ NamedTuple(),
+ nothing,
),
- NamedTuple(),
- NamedTuple(),
- nothing
- ),
- (
- "non-standard types",
(
- col1=[:hey, :there, :sailor],
- col2=['a', 'b', 'c'],
- col3=Arrow.DictEncode(['a', 'a', 'b']),
- col4=[UUID("48075322-8645-4ac6-b590-c9f46068565a"), UUID("99c7d976-ccfd-45b9-9793-51008607c638"), UUID("f96d9974-5a7b-47e3-bbc0-d680d11490d4")]
+ "non-standard types",
+ (
+ col1=[:hey, :there, :sailor],
+ col2=['a', 'b', 'c'],
+ col3=Arrow.DictEncode(['a', 'a', 'b']),
+ col4=[
+ UUID("48075322-8645-4ac6-b590-c9f46068565a"),
+ UUID("99c7d976-ccfd-45b9-9793-51008607c638"),
+ UUID("f96d9974-5a7b-47e3-bbc0-d680d11490d4"),
+ ],
+ ),
+ NamedTuple(),
+ NamedTuple(),
+ nothing,
),
- NamedTuple(),
- NamedTuple(),
- nothing
- ),
- (
- "large lists",
(
- col1=Union{String, Missing}["hey", "there", "sailor", missing],
- col2=Union{Vector{UInt8}, Missing}[b"hey", b"there", b"sailor", missing],
- col3=Union{Vector{Int64}, Missing}[Int64[1], Int64[2], Int64[3], missing],
- col4=Union{NTuple{2, Vector{Int64}},Missing}[(Int64[1], Int64[2]), missing, missing, (Int64[3], Int64[4])],
- col5=Union{NTuple{2, UInt8}, Missing}[(0x01, 0x02), (0x03, 0x04), missing, (0x05, 0x06)],
- col6=NamedTuple{(:a, :b), Tuple{Int64, String}}[(a=Int64(1), b="hey"), (a=Int64(2), b="there"), (a=Int64(3), b="sailor"), (a=Int64(4), b="jo-bob")],
+ "large lists",
+ (
+ col1=Union{String,Missing}["hey", "there", "sailor", missing],
+ col2=Union{Vector{UInt8},Missing}[b"hey", b"there", b"sailor", missing],
+ col3=Union{Vector{Int64},Missing}[Int64[1], Int64[2], Int64[3], missing],
+ col4=Union{NTuple{2,Vector{Int64}},Missing}[
+ (Int64[1], Int64[2]),
+ missing,
+ missing,
+ (Int64[3], Int64[4]),
+ ],
+ col5=Union{NTuple{2,UInt8},Missing}[
+ (0x01, 0x02),
+ (0x03, 0x04),
+ missing,
+ (0x05, 0x06),
+ ],
+ col6=NamedTuple{(:a, :b),Tuple{Int64,String}}[
+ (a=Int64(1), b="hey"),
+ (a=Int64(2), b="there"),
+ (a=Int64(3), b="sailor"),
+ (a=Int64(4), b="jo-bob"),
+ ],
+ ),
+ (largelists=true,),
+ NamedTuple(),
+ nothing,
),
- (largelists=true,),
- NamedTuple(),
- nothing
- ),
- (
- "dictencode keyword",
(
- col1=Int64[1,2,3,4],
- col2=Union{String, Missing}["hey", "there", "sailor", missing],
- col3=Arrow.DictEncode(NamedTuple{(:a, :b), Tuple{Int64, Union{String, Missing}}}[(a=Int64(1), b=missing), (a=Int64(1), b=missing), (a=Int64(3), b="sailor"), (a=Int64(4), b="jo-bob")]),
- col4=[:a, :b, :c, missing],
- col5=[Date(2020, 1, 1) for x = 1:4]
+ "dictencode keyword",
+ (
+ col1=Int64[1, 2, 3, 4],
+ col2=Union{String,Missing}["hey", "there", "sailor", missing],
+ col3=Arrow.DictEncode(
+ NamedTuple{(:a, :b),Tuple{Int64,Union{String,Missing}}}[
+ (a=Int64(1), b=missing),
+ (a=Int64(1), b=missing),
+ (a=Int64(3), b="sailor"),
+ (a=Int64(4), b="jo-bob"),
+ ],
+ ),
+ col4=[:a, :b, :c, missing],
+ col5=[Date(2020, 1, 1) for x = 1:4],
+ ),
+ (dictencode=true,),
+ NamedTuple(),
+ nothing,
),
- (dictencode=true,),
- NamedTuple(),
- nothing
- ),
- (
- "nesteddictencode keyword",
(
- col1=NamedTuple{(:a, :b), Tuple{Int64, Union{Missing, NamedTuple{(:c,), Tuple{String}}}}}[(a=Int64(1), b=missing), (a=Int64(1), b=missing), (a=Int64(3), b=(c="sailor",)), (a=Int64(4), b=(c="jo-bob",))],
+ "nesteddictencode keyword",
+ (
+ col1=NamedTuple{
+ (:a, :b),
+ Tuple{Int64,Union{Missing,NamedTuple{(:c,),Tuple{String}}}},
+ }[
+ (a=Int64(1), b=missing),
+ (a=Int64(1), b=missing),
+ (a=Int64(3), b=(c="sailor",)),
+ (a=Int64(4), b=(c="jo-bob",)),
+ ],
+ ),
+ (dictencode=true, dictencodenested=true),
+ NamedTuple(),
+ nothing,
),
- (dictencode=true, dictencodenested=true,),
- NamedTuple(),
- nothing
- ),
- (
- "Julia unions",
(
- col1=Union{Int, String}[1, "hey", 2, "ho"],
- col2=Union{Char, NamedTuple{(:a,), Tuple{Symbol}}}['a', (a=:hey,), 'b', (a=:ho,)],
+ "Julia unions",
+ (
+ col1=Union{Int,String}[1, "hey", 2, "ho"],
+ col2=Union{Char,NamedTuple{(:a,),Tuple{Symbol}}}['a', (a=:hey,), 'b', (a=:ho,)],
+ ),
+ (denseunions=false,),
+ NamedTuple(),
+ nothing,
),
- (denseunions=false,),
- NamedTuple(),
- nothing
- ),
- (
- "Decimal256",
(
- col1=[zero(Arrow.Decimal{Int32(2), Int32(2), Arrow.Int256}), zero(Arrow.Decimal{Int32(2), Int32(2), Arrow.Int256}), zero(Arrow.Decimal{Int32(2), Int32(2), Arrow.Int256}), missing],
+ "Decimal256",
+ (
+ col1=[
+ zero(Arrow.Decimal{Int32(2),Int32(2),Arrow.Int256}),
+ zero(Arrow.Decimal{Int32(2),Int32(2),Arrow.Int256}),
+ zero(Arrow.Decimal{Int32(2),Int32(2),Arrow.Int256}),
+ missing,
+ ],
+ ),
+ NamedTuple(),
+ (convert=false,),
+ nothing,
),
- NamedTuple(),
- (convert=false,),
- nothing
- ),
];
function testtable(nm, t, writekw, readkw, extratests)
- @testset "testing: $nm" begin
- io = Arrow.tobuffer(t; writekw...)
- tt = Arrow.Table(io; readkw...)
- @test length(tt) == length(t)
- @test all(isequal.(values(t), values(tt)))
- extratests !== nothing && extratests(tt)
- seekstart(io)
- str = Arrow.Stream(io; readkw...)
- tt = first(str)
- @test length(tt) == length(t)
- @test all(isequal.(values(t), values(tt)))
- # compressed
- io = Arrow.tobuffer(t; compress=((:lz4, :zstd)[rand(1:2)]), writekw...)
- tt = Arrow.Table(io; readkw...)
- @test length(tt) == length(t)
- @test all(isequal.(values(t), values(tt)))
- extratests !== nothing && extratests(tt)
- seekstart(io)
- str = Arrow.Stream(io; readkw...)
- tt = first(str)
- @test length(tt) == length(t)
- @test all(isequal.(values(t), values(tt)))
- # file
- io = Arrow.tobuffer(t; file=true, writekw...)
- tt = Arrow.Table(io; readkw...)
- @test length(tt) == length(t)
- @test all(isequal.(values(t), values(tt)))
- extratests !== nothing && extratests(tt)
- seekstart(io)
- str = Arrow.Stream(io; readkw...)
- tt = first(str)
- @test length(tt) == length(t)
- @test all(isequal.(values(t), values(tt)))
- end
+ @testset "testing: $nm" begin
+ io = Arrow.tobuffer(t; writekw...)
+ tt = Arrow.Table(io; readkw...)
+ @test length(tt) == length(t)
+ @test all(isequal.(values(t), values(tt)))
+ extratests !== nothing && extratests(tt)
+ seekstart(io)
+ str = Arrow.Stream(io; readkw...)
+ tt = first(str)
+ @test length(tt) == length(t)
+ @test all(isequal.(values(t), values(tt)))
+ # compressed
+ io = Arrow.tobuffer(t; compress=((:lz4, :zstd)[rand(1:2)]), writekw...)
+ tt = Arrow.Table(io; readkw...)
+ @test length(tt) == length(t)
+ @test all(isequal.(values(t), values(tt)))
+ extratests !== nothing && extratests(tt)
+ seekstart(io)
+ str = Arrow.Stream(io; readkw...)
+ tt = first(str)
+ @test length(tt) == length(t)
+ @test all(isequal.(values(t), values(tt)))
+ # file
+ io = Arrow.tobuffer(t; file=true, writekw...)
+ tt = Arrow.Table(io; readkw...)
+ @test length(tt) == length(t)
+ @test all(isequal.(values(t), values(tt)))
+ extratests !== nothing && extratests(tt)
+ seekstart(io)
+ str = Arrow.Stream(io; readkw...)
+ tt = first(str)
+ @test length(tt) == length(t)
+ @test all(isequal.(values(t), values(tt)))
+ end
end