You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by qu...@apache.org on 2023/05/23 15:32:19 UTC
[arrow-julia] branch main updated: Handle len of -1 in "compressed" buffers from other languages (#442)
This is an automated email from the ASF dual-hosted git repository.
quinnj pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-julia.git
The following commit(s) were added to refs/heads/main by this push:
new 3008e7f Handle len of -1 in "compressed" buffers from other languages (#442)
3008e7f is described below
commit 3008e7feaa37d9bd0e1cc03572ea855e2c2d97ca
Author: Jacob Quinn <qu...@gmail.com>
AuthorDate: Tue May 23 09:32:13 2023 -0600
Handle len of -1 in "compressed" buffers from other languages (#442)
It's unclear why other language implementations will have a compression
set for arrow data, then indicate that the length is -1, as a sentinel
value that the data is actually _not_ compressed. But since they do, we
can handle that case pretty easily. I'm basically just adding a test
here from @DrChainsaw's original PR (#436 ).
---
src/table.jl | 5 +++--
test/java_compress_len_neg_one.arrow | Bin 0 -> 6050 bytes
test/runtests.jl | 8 ++++++++
3 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/src/table.jl b/src/table.jl
index ff44f05..49b6153 100644
--- a/src/table.jl
+++ b/src/table.jl
@@ -521,11 +521,12 @@ function uncompress(ptr::Ptr{UInt8}, buffer, compression)
len = unsafe_load(convert(Ptr{Int64}, ptr))
ptr += 8 # skip past uncompressed length as Int64
encodedbytes = unsafe_wrap(Array, ptr, buffer.length - 8)
- if len === -1
+ if len == -1
# len = -1 means data is not compressed
+ # it's unclear why other language implementations allow this
+ # but we support to be able to read data produced as such
return length(encodedbytes), copy(encodedbytes)
end
-
decodedbytes = Vector{UInt8}(undef, len)
if compression.codec === Meta.CompressionTypes.LZ4_FRAME
transcode(LZ4FrameDecompressor, encodedbytes, decodedbytes)
diff --git a/test/java_compress_len_neg_one.arrow b/test/java_compress_len_neg_one.arrow
new file mode 100644
index 0000000..1d0f864
Binary files /dev/null and b/test/java_compress_len_neg_one.arrow differ
diff --git a/test/runtests.jl b/test/runtests.jl
index 3cdac88..47a137f 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -666,6 +666,14 @@ t2 = Arrow.Table(buf2)
end
+@testset "# 435" begin
+
+t = Arrow.Table(joinpath(dirname(pathof(Arrow)), "../test/java_compress_len_neg_one.arrow"))
+@test length(t) == 15
+@test length(t.isA) == 102
+
+end
+
end # @testset "misc"
end