You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ba...@apache.org on 2023/04/11 15:51:51 UTC
[arrow-julia] branch main updated: Add @testsets for misc tests (#421)
This is an automated email from the ASF dual-hosted git repository.
baumgold pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-julia.git
The following commit(s) were added to refs/heads/main by this push:
new 0a42df8 Add @testsets for misc tests (#421)
0a42df8 is described below
commit 0a42df8b3a1e1f2fce806432215b2aad4018f6fc
Author: Joao Aparicio <jp...@gmail.com>
AuthorDate: Tue Apr 11 10:51:45 2023 -0500
Add @testsets for misc tests (#421)
Tests under "misc" aren't bundled into testsets. As a consequence, tests
terminate when the first misc test fails, making it difficult to get a
good picture of how many failing misc tests remain.
In this commit I've bundled misc tests into testsets.
For example, in (unrelated) work that I'm doing I can see what's still
missing:
![with_testsets](https://user-images.githubusercontent.com/5380486/230786895-94854bda-b14d-4744-87a7-6cb315010282.jpeg)
Now I can see I have 5 errors out of 127 total misc tests.
Before it would just show 1 error out of 69 misc tests, as it was
stopping early.
Does anyone else like this?
Co-authored-by: Joao Aparicio <jo...@mavensecurities.com>
---
test/runtests.jl | 123 ++++++++++++++++++++++++++++++++++++++-----------------
1 file changed, 86 insertions(+), 37 deletions(-)
diff --git a/test/runtests.jl b/test/runtests.jl
index 9b02988..512f3e1 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -110,7 +110,7 @@ end # @testset "abstract path"
@testset "misc" begin
-# multiple record batches
+@testset "# multiple record batches" begin
t = Tables.partitioner(((col1=Union{Int64, Missing}[1,2,3,4,5,6,7,8,9,missing],), (col1=Union{Int64, Missing}[missing,11],)))
io = Arrow.tobuffer(t)
tt = Arrow.Table(io)
@@ -138,8 +138,9 @@ tt, st = state
@test isequal(collect(str)[1].col1, [1,2,3,4,5,6,7,8,9,missing])
@test isequal(collect(str)[2].col1, [missing,11])
+end
-# dictionary batch isDelta
+@testset "# dictionary batch isDelta" begin
t = (
col1=Int64[1,2,3,4],
col2=Union{String, Missing}["hey", "there", "sailor", missing],
@@ -155,7 +156,9 @@ tt = Arrow.Table(Arrow.tobuffer(tt; dictencode=true, dictencodenested=true))
@test tt.col1 == [1,2,3,4,1,2,5,6]
@test isequal(tt.col2, ["hey", "there", "sailor", missing, "hey", "there", "sailor2", missing])
@test isequal(tt.col3, vcat(NamedTuple{(:a, :b), Tuple{Int64, Union{Missing, NamedTuple{(:c,), Tuple{String}}}}}[(a=Int64(1), b=missing), (a=Int64(1), b=missing), (a=Int64(3), b=(c="sailor",)), (a=Int64(4), b=(c="jo-bob",))], NamedTuple{(:a, :b), Tuple{Int64, Union{Missing, NamedTuple{(:c,), Tuple{String}}}}}[(a=Int64(1), b=missing), (a=Int64(1), b=missing), (a=Int64(5), b=(c="sailor2",)), (a=Int64(4), b=(c="jo-bob",))]))
+end
+@testset "metadata" begin
t = (col1=Int64[1,2,3,4,5,6,7,8,9,10],)
meta = Dict("key1" => "value1", "key2" => "value2")
meta2 = Dict("colkey1" => "colvalue1", "colkey2" => "colvalue2")
@@ -176,8 +179,9 @@ tt = Arrow.Table(Arrow.tobuffer(t; colmetadata=Dict(:col2 => meta2, :col3 => met
@test Arrow.getmetadata(tt.col2)["colkey1"] == "colvalue1"
@test Arrow.getmetadata(tt.col2)["colkey2"] == "colvalue2"
@test Arrow.getmetadata(tt.col3)["colkey3"] == "colvalue3"
+end
-# custom compressors
+@testset "# custom compressors" begin
lz4 = Arrow.CodecLz4.LZ4FrameCompressor(; compressionlevel=8)
Arrow.CodecLz4.TranscodingStreams.initialize(lz4)
t = (col1=Int64[1,2,3,4,5,6,7,8,9,10],)
@@ -191,37 +195,44 @@ t = (col1=Int64[1,2,3,4,5,6,7,8,9,10],)
tt = Arrow.Table(Arrow.tobuffer(t; compress=zstd))
@test length(tt) == length(t)
@test all(isequal.(values(t), values(tt)))
+end
-# custom alignment
+@testset "# custom alignment" begin
t = (col1=Int64[1,2,3,4,5,6,7,8,9,10],)
tt = Arrow.Table(Arrow.tobuffer(t; alignment=64))
@test length(tt) == length(t)
@test all(isequal.(values(t), values(tt)))
+end
-# 53
+@testset "# 53" begin
s = "a" ^ 100
t = (a=[SubString(s, 1:10), SubString(s, 11:20)],)
tt = Arrow.Table(Arrow.tobuffer(t))
@test tt.a == ["aaaaaaaaaa", "aaaaaaaaaa"]
+end
-# 49
+@testset "# 49" begin
@test_throws SystemError Arrow.Table("file_that_doesnt_exist")
@test_throws SystemError Arrow.Table(p"file_that_doesnt_exist")
+end
-# 52
+@testset "# 52" begin
t = (a=Arrow.DictEncode(string.(1:129)),)
tt = Arrow.Table(Arrow.tobuffer(t))
+end
-# 60: unequal column lengths
+@testset "# 60: unequal column lengths" begin
io = IOBuffer()
@test_throws ArgumentError Arrow.write(io, (a = Int[], b = ["asd"], c=collect(1:100)))
+end
-# nullability of custom extension types
+@testset "# nullability of custom extension types" begin
t = (a=['a', missing],)
tt = Arrow.Table(Arrow.tobuffer(t))
@test isequal(tt.a, ['a', missing])
+end
-# automatic custom struct serialization/deserialization
+@testset "# automatic custom struct serialization/deserialization" begin
t = (col1=[CustomStruct(1, 2.3, "hey"), CustomStruct(4, 5.6, "there")],)
Arrow.ArrowTypes.arrowname(::Type{CustomStruct}) = Symbol("JuliaLang.CustomStruct")
@@ -229,31 +240,35 @@ Arrow.ArrowTypes.JuliaType(::Val{Symbol("JuliaLang.CustomStruct")}, S) = CustomS
tt = Arrow.Table(Arrow.tobuffer(t))
@test length(tt) == length(t)
@test all(isequal.(values(t), values(tt)))
+end
-# 76
+@testset "# 76" begin
t = (col1=NamedTuple{(:a,),Tuple{Union{Int,String}}}[(a=1,), (a="x",)],)
tt = Arrow.Table(Arrow.tobuffer(t))
@test length(tt) == length(t)
@test all(isequal.(values(t), values(tt)))
+end
-# 89 etc. - UUID FixedSizeListKind overloads
+@testset "# 89 etc. - UUID FixedSizeListKind overloads" begin
@test Arrow.ArrowTypes.gettype(Arrow.ArrowTypes.ArrowKind(UUID)) == UInt8
@test Arrow.ArrowTypes.getsize(Arrow.ArrowTypes.ArrowKind(UUID)) == 16
+end
-# 98
+@testset "# 98" begin
t = (a = [Nanosecond(0), Nanosecond(1)], b = [uuid4(), uuid4()], c = [missing, Nanosecond(1)])
tt = Arrow.Table(Arrow.tobuffer(t))
@test copy(tt.a) isa Vector{Nanosecond}
@test copy(tt.b) isa Vector{UUID}
@test copy(tt.c) isa Vector{Union{Missing,Nanosecond}}
+end
-# copy on DictEncoding w/ missing values
+@testset "# copy on DictEncoding w/ missing values" begin
x = PooledArray(["hey", missing])
x2 = Arrow.toarrowvector(x)
@test isequal(copy(x2), x)
+end
-# some dict encoding coverage
-
+@testset "# some dict encoding coverage" begin
# signed indices for DictEncodedKind #112 #113 #114
av = Arrow.toarrowvector(PooledArray(repeat(["a", "b"], inner = 5)))
@test isa(first(av.indices), Signed)
@@ -270,31 +285,36 @@ av = Arrow.toarrowvector(CategoricalArray(["a", "bb", "ccc"]))
@test isa(first(av.indices), Signed)
@test length(av) == 3
@test eltype(av) == String
+end
-# 120
+@testset "# 120" begin
x = PooledArray(["hey", missing])
x2 = Arrow.toarrowvector(x)
@test eltype(DataAPI.refpool(x2)) == Union{Missing, String}
@test eltype(DataAPI.levels(x2)) == String
@test DataAPI.refarray(x2) == [1, 2]
+end
-# 121
+@testset "# 121" begin
a = PooledArray(repeat(string.('S', 1:130), inner=5), compress=true)
@test eltype(a.refs) == UInt8
av = Arrow.toarrowvector(a)
@test eltype(av.indices) == Int16
+end
-# 123
+@testset "# 123" begin
t = (x = collect(zip(rand(10), rand(10))),)
tt = Arrow.Table(Arrow.tobuffer(t))
@test tt.x == t.x
+end
-# 144
+@testset "# 144" begin
t = Tables.partitioner(((a=Arrow.DictEncode([1,2,3]),), (a=Arrow.DictEncode(fill(1, 129)),)))
tt = Arrow.Table(Arrow.tobuffer(t))
@test length(tt.a) == 132
+end
-# 126
+@testset "# 126" begin
t = Tables.partitioner(
(
(a=Arrow.toarrowvector(PooledArray([1,2,3 ])),),
@@ -316,19 +336,24 @@ io = IOBuffer()
@test_logs (:error, "error writing arrow data on partition = 2") begin
@test_throws ErrorException Arrow.write(io, t)
end
+end
-# 75
+@testset "# 75" begin
tbl = Arrow.Table(Arrow.tobuffer((sets = [Set([1,2,3]), Set([1,2,3])],)))
@test eltype(tbl.sets) <: Set
+end
-# 85
+@testset "# 85" begin
tbl = Arrow.Table(Arrow.tobuffer((tups = [(1, 3.14, "hey"), (1, 3.14, "hey")],)))
@test eltype(tbl.tups) <: Tuple
+end
-# Nothing
+@testset "Nothing" begin
tbl = Arrow.Table(Arrow.tobuffer((nothings=[nothing, nothing, nothing],)))
@test tbl.nothings == [nothing, nothing, nothing]
+end
+@testset "arrowmetadata" begin
# arrowmetadata
t = (col1=[CustomStruct2{:hey}(1), CustomStruct2{:hey}(2)],)
ArrowTypes.arrowname(::Type{<:CustomStruct2}) = Symbol("CustomStruct2")
@@ -340,8 +365,9 @@ ArrowTypes.arrowmetadata(::Type{CustomStruct2{sym}}) where {sym} = sym
ArrowTypes.JuliaType(::Val{:CustomStruct2}, S, meta) = CustomStruct2{Symbol(meta)}
tbl = Arrow.Table(Arrow.tobuffer(t))
@test eltype(tbl.col1) == CustomStruct2{:hey}
+end
-# 166
+@testset "# 166" begin
t = (
col1=[zero(Arrow.Timestamp{Arrow.Meta.TimeUnits.NANOSECOND, nothing})],
)
@@ -349,14 +375,16 @@ tbl = Arrow.Table(Arrow.tobuffer(t))
@test_logs (:warn, r"automatically converting Arrow.Timestamp with precision = NANOSECOND") begin
@test tbl.col1[1] == Dates.DateTime(1970)
end
+end
-# 95; Arrow.ToTimestamp
+@testset "# 95; Arrow.ToTimestamp" begin
x = [ZonedDateTime(Dates.DateTime(2020), tz"Europe/Paris")]
c = Arrow.ToTimestamp(x)
@test eltype(c) == Arrow.Timestamp{Arrow.Flatbuf.TimeUnits.MILLISECOND, Symbol("Europe/Paris")}
@test c[1] == Arrow.Timestamp{Arrow.Flatbuf.TimeUnits.MILLISECOND, Symbol("Europe/Paris")}(1577833200000)
+end
-# 158
+@testset "# 158" begin
# arrow ipc stream generated from pyarrow with no record batches
bytes = UInt8[0xff, 0xff, 0xff, 0xff, 0x78, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x0c, 0x00,
0x06, 0x00, 0x05, 0x00, 0x08, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x01, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00,
@@ -368,8 +396,9 @@ bytes = UInt8[0xff, 0xff, 0xff, 0xff, 0x78, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00,
tbl = Arrow.Table(bytes)
@test length(tbl.a) == 0
@test eltype(tbl.a) == Union{Int64, Missing}
+end
-# 181
+@testset "# 181" begin
d = Dict{Int,Int}()
for i in 1:9
d = Dict(i => d)
@@ -378,16 +407,17 @@ tbl = (x = [d],)
msg = "reached nested serialization level (20) deeper than provided max depth argument (19); to increase allowed nesting level, pass `maxdepth=X`"
@test_throws ErrorException(msg) Arrow.tobuffer(tbl; maxdepth=19)
@test Arrow.Table(Arrow.tobuffer(tbl; maxdepth=20)).x == tbl.x
+end
-# 167
+@testset "# 167" begin
t = (
col1=[["boop", "she"], ["boop", "she"], ["boo"]],
)
tbl = Arrow.Table(Arrow.tobuffer(t))
@test eltype(tbl.col1) == Vector{String}
+end
-# 200
-@testset "VersionNumber" begin
+@testset "# 200 VersionNumber" begin
t = (
col1=[v"1"],
)
@@ -396,6 +426,7 @@ tbl = Arrow.Table(Arrow.tobuffer(t))
end
@testset "`show`" begin
+ str = nothing
table = (; a = 1:5, b = fill(1.0, 5))
arrow_table = Arrow.Table(Arrow.tobuffer(table))
# 2 and 3-arg show with no metadata
@@ -425,11 +456,11 @@ end
end
-#194
+@testset "# 194" begin
@test isempty(Arrow.Table(Arrow.tobuffer(Dict{Symbol, Vector}())))
+end
-
-#229
+@testset "# 229" begin
struct Foo229{x}
y::String
z::Int
@@ -443,8 +474,9 @@ cols = (k1=[Foo229{:a}("a", 1), Foo229{:b}("b", 2)], k2=[Foo229{:c}("c", 3), Foo
tbl = Arrow.Table(Arrow.tobuffer(cols))
@test tbl.k1 == cols.k1
@test tbl.k2 == cols.k2
+end
-# PR 234
+@testset "# PR 234" begin
# bugfix parsing primitive arrays
buf = [
0x14,0x00,0x00,0x00,0x00,0x00,0x0e,0x00,0x14,0x00,0x00,0x00,0x10,0x00,0x0c,0x00,0x08,
@@ -471,8 +503,9 @@ end
d = Arrow.FlatBuffers.getrootas(TestData, buf, 0);
@test d.DataInt32 == UInt32[1,2,3]
+end
-# test multiple inputs treated as one table
+@testset "# test multiple inputs treated as one table" begin
t = (
col1=[1, 2, 3, 4, 5],
col2=[1.2, 2.3, 3.4, 4.5, 5.6],
@@ -497,10 +530,14 @@ t2 = (
col1=[1.2, 2.3, 3.4, 4.5, 5.6],
)
@test_throws ArgumentError collect(Arrow.Stream([Arrow.tobuffer(t), Arrow.tobuffer(t2)]))
+end
+@testset "# 253" begin
# https://github.com/apache/arrow-julia/issues/253
@test Arrow.toidict(Pair{String, String}[]) == Base.ImmutableDict{String, String}()
+end
+@testset "# 232" begin
# https://github.com/apache/arrow-julia/issues/232
t = (; x=[Dict(true => 1.32, 1.2 => 0.53495216)])
@test_throws ArgumentError("`keytype(d)` must be concrete to serialize map-like `d`, but `keytype(d) == Real`") Arrow.tobuffer(t)
@@ -508,7 +545,9 @@ t = (; x=[Dict(32.0 => true, 1.2 => 0.53495216)])
@test_throws ArgumentError("`valtype(d)` must be concrete to serialize map-like `d`, but `valtype(d) == Real`") Arrow.tobuffer(t)
t = (; x=[Dict(true => 1.32, 1.2 => true)])
@test_throws ArgumentError("`keytype(d)` must be concrete to serialize map-like `d`, but `keytype(d) == Real`") Arrow.tobuffer(t)
+end
+@testset "# 214" begin
# https://github.com/apache/arrow-julia/issues/214
t1 = (; x = [(Nanosecond(42),)])
t2 = Arrow.Table(Arrow.tobuffer(t1))
@@ -519,6 +558,7 @@ t1 = (; x = [(; a=Nanosecond(i), b=Nanosecond(i+1)) for i = 1:5])
t2 = Arrow.Table(Arrow.tobuffer(t1))
t3 = Arrow.Table(Arrow.tobuffer(t2))
@test t3.x == t1.x
+end
@testset "Writer" begin
io = IOBuffer()
@@ -538,15 +578,19 @@ t3 = Arrow.Table(Arrow.tobuffer(t2))
@test table.b == collect(b)
end
-# Empty input
+@testset "# Empty input" begin
@test Arrow.Table(UInt8[]) isa Arrow.Table
@test isempty(Tables.rows(Arrow.Table(UInt8[])))
@test Arrow.Stream(UInt8[]) isa Arrow.Stream
@test isempty(Tables.partitions(Arrow.Stream(UInt8[])))
+end
+@testset "# 324" begin
# https://github.com/apache/arrow-julia/issues/324
@test_throws ArgumentError filter!(x -> x > 1, Arrow.toarrowvector([1, 2, 3]))
+end
+@testset "# 327" begin
# https://github.com/apache/arrow-julia/issues/327
zdt = ZonedDateTime(DateTime(2020, 11, 1, 6), tz"America/New_York"; from_utc=true)
arrow_zdt = ArrowTypes.toarrow(zdt)
@@ -557,13 +601,17 @@ zdt_again = ArrowTypes.fromarrow(ZonedDateTime, arrow_zdt)
original_table = (; col = [ ZonedDateTime(DateTime(1, 2, 3, 4, 5, 6), tz"UTC+3") for _ in 1:5])
table = Arrow.Table(joinpath(@__DIR__, "old_zdt.arrow"))
@test original_table.col == table.col
+end
+@testset "# 243" begin
if pkgversion(ArrowTypes) >= v"2.0.1" # need the ArrowTypes bugfix to pass this test
# https://github.com/apache/arrow-julia/issues/243
table = (; col = [(; v=v"1"), (; v=v"2"), missing])
@test isequal(Arrow.Table(Arrow.tobuffer(table)).col, table.col)
end
+end
+@testset "# 367" begin
# https://github.com/apache/arrow-julia/issues/367
if pkgversion(ArrowTypes) >= v"2.0.2"
t = (; x=Union{ZonedDateTime,Missing}[missing])
@@ -571,6 +619,7 @@ if pkgversion(ArrowTypes) >= v"2.0.2"
@test Tables.schema(a) == Tables.schema(t)
@test isequal(a.x, t.x)
end
+end
# https://github.com/apache/arrow-julia/issues/414
df = DataFrame(("$i" => rand(1000) for i in 1:65536)...)