You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/06/08 07:27:38 UTC
[arrow-rs] branch master updated: Don't overwrite existing data on snappy decompress (#1806) (#1807)
This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 1d31d3079 Don't overwrite existing data on snappy decompress (#1806) (#1807)
1d31d3079 is described below
commit 1d31d30798495d0669b4b1bd0c43e375c2cf1d73
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Wed Jun 8 08:27:34 2022 +0100
Don't overwrite existing data on snappy decompress (#1806) (#1807)
* Don't trample existing data on snappy decompress (#1806)
* Review feedback
---
parquet/src/compression.rs | 38 ++++++++++++++++++++++++++++++--------
1 file changed, 30 insertions(+), 8 deletions(-)
diff --git a/parquet/src/compression.rs b/parquet/src/compression.rs
index f4aecbf4e..a5e49360a 100644
--- a/parquet/src/compression.rs
+++ b/parquet/src/compression.rs
@@ -49,13 +49,14 @@ use crate::errors::{ParquetError, Result};
/// Parquet compression codec interface.
pub trait Codec: Send {
- /// Compresses data stored in slice `input_buf` and writes the compressed result
+ /// Compresses data stored in slice `input_buf` and appends the compressed result
/// to `output_buf`.
+ ///
/// Note that you'll need to call `clear()` before reusing the same `output_buf`
/// across different `compress` calls.
fn compress(&mut self, input_buf: &[u8], output_buf: &mut Vec<u8>) -> Result<()>;
- /// Decompresses data stored in slice `input_buf` and writes output to `output_buf`.
+ /// Decompresses data stored in slice `input_buf` and appends output to `output_buf`.
/// Returns the total number of bytes written.
fn decompress(&mut self, input_buf: &[u8], output_buf: &mut Vec<u8>)
-> Result<usize>;
@@ -111,9 +112,10 @@ mod snappy_codec {
output_buf: &mut Vec<u8>,
) -> Result<usize> {
let len = decompress_len(input_buf)?;
- output_buf.resize(len, 0);
+ let offset = output_buf.len();
+ output_buf.resize(offset + len, 0);
self.decoder
- .decompress(input_buf, output_buf)
+ .decompress(input_buf, &mut output_buf[offset..])
.map_err(|e| e.into())
}
@@ -340,13 +342,13 @@ mod tests {
.expect("Error when compressing");
// Decompress with c2
- let mut decompressed_size = c2
+ let decompressed_size = c2
.decompress(compressed.as_slice(), &mut decompressed)
.expect("Error when decompressing");
assert_eq!(data.len(), decompressed_size);
- decompressed.truncate(decompressed_size);
assert_eq!(data, decompressed.as_slice());
+ decompressed.clear();
compressed.clear();
// Compress with c2
@@ -354,12 +356,32 @@ mod tests {
.expect("Error when compressing");
// Decompress with c1
- decompressed_size = c1
+ let decompressed_size = c1
.decompress(compressed.as_slice(), &mut decompressed)
.expect("Error when decompressing");
assert_eq!(data.len(), decompressed_size);
- decompressed.truncate(decompressed_size);
assert_eq!(data, decompressed.as_slice());
+
+ decompressed.clear();
+ compressed.clear();
+
+ // Test does not trample existing data in output buffers
+ let prefix = &[0xDE, 0xAD, 0xBE, 0xEF];
+ decompressed.extend_from_slice(prefix);
+ compressed.extend_from_slice(prefix);
+
+ c2.compress(data, &mut compressed)
+ .expect("Error when compressing");
+
+ assert_eq!(&compressed[..4], prefix);
+
+ let decompressed_size = c2
+ .decompress(&compressed[4..], &mut decompressed)
+ .expect("Error when decompressing");
+
+ assert_eq!(data.len(), decompressed_size);
+ assert_eq!(data, &decompressed[4..]);
+ assert_eq!(&decompressed[..4], prefix);
}
fn test_codec(c: CodecType) {