You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by qu...@apache.org on 2023/05/22 22:45:29 UTC

[arrow-julia] 01/01: Handle len of -1 in "compresses" buffers from other languages

This is an automated email from the ASF dual-hosted git repository.

quinnj pushed a commit to branch jq-neg-one-len
in repository https://gitbox.apache.org/repos/asf/arrow-julia.git

commit b622bec794243a7c98508b1e1cb9d30d68b28b22
Author: Jacob Quinn <qu...@gmail.com>
AuthorDate: Mon May 22 16:40:39 2023 -0600

    Handle len of -1 in "compresses" buffers from other languages
    
    It's unclear why other language implementations will have a compression set
    for arrow data, then indicate that the length is -1, as a sentinel value
    that the data is actually _not_ compressed. But since they do, we can handle
    that case pretty easily. I'm basically just adding a test here from @DrChainsaw's
    original PR.
---
 src/table.jl                         |   5 +++--
 test/java_compress_len_neg_one.arrow | Bin 0 -> 6050 bytes
 test/runtests.jl                     |   8 ++++++++
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/table.jl b/src/table.jl
index ff44f05..49b6153 100644
--- a/src/table.jl
+++ b/src/table.jl
@@ -521,11 +521,12 @@ function uncompress(ptr::Ptr{UInt8}, buffer, compression)
     len = unsafe_load(convert(Ptr{Int64}, ptr))
     ptr += 8 # skip past uncompressed length as Int64
     encodedbytes = unsafe_wrap(Array, ptr, buffer.length - 8)
-    if len === -1
+    if len == -1
         # len = -1 means data is not compressed
+        # it's unclear why other language implementations allow this
+        # but we support to be able to read data produced as such
         return length(encodedbytes), copy(encodedbytes)
     end
-                                        
     decodedbytes = Vector{UInt8}(undef, len)
     if compression.codec === Meta.CompressionTypes.LZ4_FRAME
         transcode(LZ4FrameDecompressor, encodedbytes, decodedbytes)
diff --git a/test/java_compress_len_neg_one.arrow b/test/java_compress_len_neg_one.arrow
new file mode 100644
index 0000000..1d0f864
Binary files /dev/null and b/test/java_compress_len_neg_one.arrow differ
diff --git a/test/runtests.jl b/test/runtests.jl
index 3cdac88..47a137f 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -666,6 +666,14 @@ t2 = Arrow.Table(buf2)
 
 end
 
+@testset "# 435" begin
+
+t = Arrow.Table(joinpath(dirname(pathof(Arrow)), "../test/java_compress_len_neg_one.arrow"))
+@test length(t) == 15
+@test length(t.isA) == 102
+
+end
+
 end # @testset "misc"
 
 end