You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by qu...@apache.org on 2022/11/03 16:20:44 UTC
[arrow-julia] branch main updated: allow append to act on non-existent/blank file/io (#358)
This is an automated email from the ASF dual-hosted git repository.
quinnj pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-julia.git
The following commit(s) were added to refs/heads/main by this push:
new 23258f1 allow append to act on non-existent/blank file/io (#358)
23258f1 is described below
commit 23258f12bb4b28eb3846d0d3a91a54e2628254d1
Author: Tanmay Mohapatra <ta...@gmail.com>
AuthorDate: Thu Nov 3 21:50:38 2022 +0530
allow append to act on non-existent/blank file/io (#358)
`Arrow.append` can now append to an empty/nonexistent file by
simply invoking `Arrow.write` in that situation.
---
src/append.jl | 48 +++++++++++++++++++++++++++++++++++-------------
test/testappend.jl | 8 ++++++++
2 files changed, 43 insertions(+), 13 deletions(-)
diff --git a/src/append.jl b/src/append.jl
index 5f521dd..4bd45b3 100644
--- a/src/append.jl
+++ b/src/append.jl
@@ -60,7 +60,7 @@ function append end
append(io_or_file; kw...) = x -> append(io_or_file, x; kw...)
function append(file::String, tbl; kwargs...)
- open(file, "r+") do io
+ open(file, isfile(file) ? "r+" : "w+") do io
append(io, tbl; file=true, kwargs...)
end
@@ -84,20 +84,42 @@ function append(io::IO, tbl;
throw(ArgumentError("ntasks keyword argument must be > 0; pass `ntasks=1` to disable multithreaded writing"))
end
- isstream, arrow_schema, compress = stream_properties(io; convert=convert)
- if !isstream
- throw(ArgumentError("append is supported only to files in arrow stream format"))
- end
+ startpos = position(io)
+ seekend(io)
+ len = position(io) - startpos
+ seek(io, startpos) # leave the stream position unchanged
- if compress === :lz4
- compress = LZ4_FRAME_COMPRESSOR
- elseif compress === :zstd
- compress = ZSTD_COMPRESSOR
- elseif compress isa Symbol
- throw(ArgumentError("unsupported compress keyword argument value: $compress. Valid values include `:lz4` or `:zstd`"))
- end
+ if len == 0 # empty file, not initialized, we can just write to it
+ kwargs = Dict{Symbol, Any}(
+ :largelists => largelists,
+ :denseunions => denseunions,
+ :dictencode => dictencode,
+ :dictencodenested => dictencodenested,
+ :alignment => alignment,
+ :maxdepth => maxdepth,
+ :metadata => metadata,
+ :colmetadata => colmetadata,
+ )
+ if isa(ntasks, Integer)
+ kwargs[:ntasks] = ntasks
+ end
+ write(io, tbl; kwargs...)
+ else
+ isstream, arrow_schema, compress = stream_properties(io; convert=convert)
+ if !isstream
+ throw(ArgumentError("append is supported only to files in arrow stream format"))
+ end
- append(io, tbl, arrow_schema, compress, largelists, denseunions, dictencode, dictencodenested, alignment, maxdepth, ntasks, metadata, colmetadata)
+ if compress === :lz4
+ compress = LZ4_FRAME_COMPRESSOR
+ elseif compress === :zstd
+ compress = ZSTD_COMPRESSOR
+ elseif compress isa Symbol
+ throw(ArgumentError("unsupported compress keyword argument value: $compress. Valid values include `:lz4` or `:zstd`"))
+ end
+
+ append(io, tbl, arrow_schema, compress, largelists, denseunions, dictencode, dictencodenested, alignment, maxdepth, ntasks, metadata, colmetadata)
+ end
return io
end
diff --git a/test/testappend.jl b/test/testappend.jl
index 0b43784..d4834dd 100644
--- a/test/testappend.jl
+++ b/test/testappend.jl
@@ -86,6 +86,14 @@ function testappend_partitions()
end
@test_throws ArgumentError Arrow.append(file2, arrow_table1)
+ # can append to an empty file
+ rm(file2)
+ for _ in 1:5
+ Arrow.append(file2, arrow_table1)
+ end
+ appended_table1 = Arrow.Table(file2)
+ @test length(Tables.columns(appended_table1)[1]) == 50
+
# schema must match
testdata2 = (col2=Int64[1,2,3,4,5,6,7,8,9,10],)
open(file2, "w") do io