You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by qu...@apache.org on 2023/05/22 22:45:29 UTC
[arrow-julia] 01/01: Handle len of -1 in "compresses" buffers from other languages
This is an automated email from the ASF dual-hosted git repository.
quinnj pushed a commit to branch jq-neg-one-len
in repository https://gitbox.apache.org/repos/asf/arrow-julia.git
commit b622bec794243a7c98508b1e1cb9d30d68b28b22
Author: Jacob Quinn <qu...@gmail.com>
AuthorDate: Mon May 22 16:40:39 2023 -0600
Handle len of -1 in "compresses" buffers from other languages
It's unclear why other language implementations will have a compression set
for arrow data, then indicate that the length is -1, as a sentinel value
that the data is actually _not_ compressed. But since they do, we can handle
that case pretty easily. I'm basically just adding a test here from @DrChainsaw's
original PR.
---
src/table.jl | 5 +++--
test/java_compress_len_neg_one.arrow | Bin 0 -> 6050 bytes
test/runtests.jl | 8 ++++++++
3 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/src/table.jl b/src/table.jl
index ff44f05..49b6153 100644
--- a/src/table.jl
+++ b/src/table.jl
@@ -521,11 +521,12 @@ function uncompress(ptr::Ptr{UInt8}, buffer, compression)
len = unsafe_load(convert(Ptr{Int64}, ptr))
ptr += 8 # skip past uncompressed length as Int64
encodedbytes = unsafe_wrap(Array, ptr, buffer.length - 8)
- if len === -1
+ if len == -1
# len = -1 means data is not compressed
+ # it's unclear why other language implementations allow this
+ # but we support to be able to read data produced as such
return length(encodedbytes), copy(encodedbytes)
end
-
decodedbytes = Vector{UInt8}(undef, len)
if compression.codec === Meta.CompressionTypes.LZ4_FRAME
transcode(LZ4FrameDecompressor, encodedbytes, decodedbytes)
diff --git a/test/java_compress_len_neg_one.arrow b/test/java_compress_len_neg_one.arrow
new file mode 100644
index 0000000..1d0f864
Binary files /dev/null and b/test/java_compress_len_neg_one.arrow differ
diff --git a/test/runtests.jl b/test/runtests.jl
index 3cdac88..47a137f 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -666,6 +666,14 @@ t2 = Arrow.Table(buf2)
end
+@testset "# 435" begin
+
+t = Arrow.Table(joinpath(dirname(pathof(Arrow)), "../test/java_compress_len_neg_one.arrow"))
+@test length(t) == 15
+@test length(t.isA) == 102
+
+end
+
end # @testset "misc"
end