You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by qu...@apache.org on 2023/05/23 15:32:19 UTC

[arrow-julia] branch main updated: Handle len of -1 in "compressed" buffers from other languages (#442)

This is an automated email from the ASF dual-hosted git repository.

quinnj pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-julia.git


The following commit(s) were added to refs/heads/main by this push:
     new 3008e7f  Handle len of -1 in "compressed" buffers from other languages (#442)
3008e7f is described below

commit 3008e7feaa37d9bd0e1cc03572ea855e2c2d97ca
Author: Jacob Quinn <qu...@gmail.com>
AuthorDate: Tue May 23 09:32:13 2023 -0600

    Handle len of -1 in "compressed" buffers from other languages (#442)
    
    It's unclear why other language implementations will have a compression
    set for arrow data, then indicate that the length is -1, as a sentinel
    value that the data is actually _not_ compressed. But since they do, we
    can handle that case pretty easily. I'm basically just adding a test
    here from @DrChainsaw's original PR (#436 ).
---
 src/table.jl                         |   5 +++--
 test/java_compress_len_neg_one.arrow | Bin 0 -> 6050 bytes
 test/runtests.jl                     |   8 ++++++++
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/table.jl b/src/table.jl
index ff44f05..49b6153 100644
--- a/src/table.jl
+++ b/src/table.jl
@@ -521,11 +521,12 @@ function uncompress(ptr::Ptr{UInt8}, buffer, compression)
     len = unsafe_load(convert(Ptr{Int64}, ptr))
     ptr += 8 # skip past uncompressed length as Int64
     encodedbytes = unsafe_wrap(Array, ptr, buffer.length - 8)
-    if len === -1
+    if len == -1
         # len = -1 means data is not compressed
+        # it's unclear why other language implementations allow this
+        # but we support to be able to read data produced as such
         return length(encodedbytes), copy(encodedbytes)
     end
-                                        
     decodedbytes = Vector{UInt8}(undef, len)
     if compression.codec === Meta.CompressionTypes.LZ4_FRAME
         transcode(LZ4FrameDecompressor, encodedbytes, decodedbytes)
diff --git a/test/java_compress_len_neg_one.arrow b/test/java_compress_len_neg_one.arrow
new file mode 100644
index 0000000..1d0f864
Binary files /dev/null and b/test/java_compress_len_neg_one.arrow differ
diff --git a/test/runtests.jl b/test/runtests.jl
index 3cdac88..47a137f 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -666,6 +666,14 @@ t2 = Arrow.Table(buf2)
 
 end
 
+@testset "# 435" begin
+
+t = Arrow.Table(joinpath(dirname(pathof(Arrow)), "../test/java_compress_len_neg_one.arrow"))
+@test length(t) == 15
+@test length(t.isA) == 102
+
+end
+
 end # @testset "misc"
 
 end