You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/06/08 07:27:38 UTC

[arrow-rs] branch master updated: Don't overwrite existing data on snappy decompress (#1806) (#1807)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 1d31d3079 Don't overwrite existing data on snappy decompress (#1806) (#1807)
1d31d3079 is described below

commit 1d31d30798495d0669b4b1bd0c43e375c2cf1d73
Author: Raphael Taylor-Davies <17...@users.noreply.github.com>
AuthorDate: Wed Jun 8 08:27:34 2022 +0100

    Don't overwrite existing data on snappy decompress (#1806) (#1807)
    
    * Don't trample existing data on snappy decompress (#1806)
    
    * Review feedback
---
 parquet/src/compression.rs | 38 ++++++++++++++++++++++++++++++--------
 1 file changed, 30 insertions(+), 8 deletions(-)

diff --git a/parquet/src/compression.rs b/parquet/src/compression.rs
index f4aecbf4e..a5e49360a 100644
--- a/parquet/src/compression.rs
+++ b/parquet/src/compression.rs
@@ -49,13 +49,14 @@ use crate::errors::{ParquetError, Result};
 
 /// Parquet compression codec interface.
 pub trait Codec: Send {
-    /// Compresses data stored in slice `input_buf` and writes the compressed result
+    /// Compresses data stored in slice `input_buf` and appends the compressed result
     /// to `output_buf`.
+    ///
     /// Note that you'll need to call `clear()` before reusing the same `output_buf`
     /// across different `compress` calls.
     fn compress(&mut self, input_buf: &[u8], output_buf: &mut Vec<u8>) -> Result<()>;
 
-    /// Decompresses data stored in slice `input_buf` and writes output to `output_buf`.
+    /// Decompresses data stored in slice `input_buf` and appends output to `output_buf`.
     /// Returns the total number of bytes written.
     fn decompress(&mut self, input_buf: &[u8], output_buf: &mut Vec<u8>)
         -> Result<usize>;
@@ -111,9 +112,10 @@ mod snappy_codec {
             output_buf: &mut Vec<u8>,
         ) -> Result<usize> {
             let len = decompress_len(input_buf)?;
-            output_buf.resize(len, 0);
+            let offset = output_buf.len();
+            output_buf.resize(offset + len, 0);
             self.decoder
-                .decompress(input_buf, output_buf)
+                .decompress(input_buf, &mut output_buf[offset..])
                 .map_err(|e| e.into())
         }
 
@@ -340,13 +342,13 @@ mod tests {
             .expect("Error when compressing");
 
         // Decompress with c2
-        let mut decompressed_size = c2
+        let decompressed_size = c2
             .decompress(compressed.as_slice(), &mut decompressed)
             .expect("Error when decompressing");
         assert_eq!(data.len(), decompressed_size);
-        decompressed.truncate(decompressed_size);
         assert_eq!(data, decompressed.as_slice());
 
+        decompressed.clear();
         compressed.clear();
 
         // Compress with c2
@@ -354,12 +356,32 @@ mod tests {
             .expect("Error when compressing");
 
         // Decompress with c1
-        decompressed_size = c1
+        let decompressed_size = c1
             .decompress(compressed.as_slice(), &mut decompressed)
             .expect("Error when decompressing");
         assert_eq!(data.len(), decompressed_size);
-        decompressed.truncate(decompressed_size);
         assert_eq!(data, decompressed.as_slice());
+
+        decompressed.clear();
+        compressed.clear();
+
+        // Test does not trample existing data in output buffers
+        let prefix = &[0xDE, 0xAD, 0xBE, 0xEF];
+        decompressed.extend_from_slice(prefix);
+        compressed.extend_from_slice(prefix);
+
+        c2.compress(data, &mut compressed)
+            .expect("Error when compressing");
+
+        assert_eq!(&compressed[..4], prefix);
+
+        let decompressed_size = c2
+            .decompress(&compressed[4..], &mut decompressed)
+            .expect("Error when decompressing");
+
+        assert_eq!(data.len(), decompressed_size);
+        assert_eq!(data, &decompressed[4..]);
+        assert_eq!(&decompressed[..4], prefix);
     }
 
     fn test_codec(c: CodecType) {