You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by qu...@apache.org on 2022/11/03 16:20:44 UTC

[arrow-julia] branch main updated: allow append to act on non-existent/blank file/io (#358)

This is an automated email from the ASF dual-hosted git repository.

quinnj pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-julia.git


The following commit(s) were added to refs/heads/main by this push:
     new 23258f1  allow append to act on non-existent/blank file/io (#358)
23258f1 is described below

commit 23258f12bb4b28eb3846d0d3a91a54e2628254d1
Author: Tanmay Mohapatra <ta...@gmail.com>
AuthorDate: Thu Nov 3 21:50:38 2022 +0530

    allow append to act on non-existent/blank file/io (#358)
    
    `Arrow.append` can now append to an empty/nonexistent file by
    simply invoking `Arrow.write` in that situation.
---
 src/append.jl      | 48 +++++++++++++++++++++++++++++++++++-------------
 test/testappend.jl |  8 ++++++++
 2 files changed, 43 insertions(+), 13 deletions(-)

diff --git a/src/append.jl b/src/append.jl
index 5f521dd..4bd45b3 100644
--- a/src/append.jl
+++ b/src/append.jl
@@ -60,7 +60,7 @@ function append end
 append(io_or_file; kw...) = x -> append(io_or_file, x; kw...)
 
 function append(file::String, tbl; kwargs...)
-    open(file, "r+") do io
+    open(file, isfile(file) ? "r+" : "w+") do io
         append(io, tbl; file=true, kwargs...)
     end
 
@@ -84,20 +84,42 @@ function append(io::IO, tbl;
         throw(ArgumentError("ntasks keyword argument must be > 0; pass `ntasks=1` to disable multithreaded writing"))
     end
 
-    isstream, arrow_schema, compress = stream_properties(io; convert=convert)
-    if !isstream
-        throw(ArgumentError("append is supported only to files in arrow stream format"))
-    end
+    startpos = position(io)
+    seekend(io)
+    len = position(io) - startpos
+    seek(io, startpos) # leave the stream position unchanged
 
-    if compress === :lz4
-        compress = LZ4_FRAME_COMPRESSOR
-    elseif compress === :zstd
-        compress = ZSTD_COMPRESSOR
-    elseif compress isa Symbol
-        throw(ArgumentError("unsupported compress keyword argument value: $compress. Valid values include `:lz4` or `:zstd`"))
-    end
+    if len == 0 # empty file, not initialized, we can just write to it
+        kwargs = Dict{Symbol, Any}(
+            :largelists => largelists,
+            :denseunions => denseunions,
+            :dictencode => dictencode,
+            :dictencodenested => dictencodenested,
+            :alignment => alignment,
+            :maxdepth => maxdepth,
+            :metadata => metadata,
+            :colmetadata => colmetadata,
+        )
+        if isa(ntasks, Integer)
+            kwargs[:ntasks] = ntasks
+        end
+        write(io, tbl; kwargs...)
+    else
+        isstream, arrow_schema, compress = stream_properties(io; convert=convert)
+        if !isstream
+            throw(ArgumentError("append is supported only to files in arrow stream format"))
+        end
 
-    append(io, tbl, arrow_schema, compress, largelists, denseunions, dictencode, dictencodenested, alignment, maxdepth, ntasks, metadata, colmetadata)
+        if compress === :lz4
+            compress = LZ4_FRAME_COMPRESSOR
+        elseif compress === :zstd
+            compress = ZSTD_COMPRESSOR
+        elseif compress isa Symbol
+            throw(ArgumentError("unsupported compress keyword argument value: $compress. Valid values include `:lz4` or `:zstd`"))
+        end
+
+        append(io, tbl, arrow_schema, compress, largelists, denseunions, dictencode, dictencodenested, alignment, maxdepth, ntasks, metadata, colmetadata)
+    end
 
     return io
 end
diff --git a/test/testappend.jl b/test/testappend.jl
index 0b43784..d4834dd 100644
--- a/test/testappend.jl
+++ b/test/testappend.jl
@@ -86,6 +86,14 @@ function testappend_partitions()
         end
         @test_throws ArgumentError Arrow.append(file2, arrow_table1)
 
+        # can append to an empty file
+        rm(file2)
+        for _ in 1:5
+            Arrow.append(file2, arrow_table1)
+        end
+        appended_table1 = Arrow.Table(file2)
+        @test length(Tables.columns(appended_table1)[1]) == 50
+
         # schema must match
         testdata2 = (col2=Int64[1,2,3,4,5,6,7,8,9,10],)
         open(file2, "w") do io