You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by mg...@apache.org on 2022/01/13 08:22:21 UTC

[avro] branch master updated: AVRO-3303: Rust: Add support for Xz codec (#1455)

This is an automated email from the ASF dual-hosted git repository.

mgrigorov pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git


The following commit(s) were added to refs/heads/master by this push:
     new ee47d4c  AVRO-3303: Rust: Add support for Xz codec (#1455)
ee47d4c is described below

commit ee47d4cd64eddcbced7b7c77102480e767b210a6
Author: Martin Grigorov <ma...@users.noreply.github.com>
AuthorDate: Thu Jan 13 10:22:15 2022 +0200

    AVRO-3303: Rust: Add support for Xz codec (#1455)
    
    Signed-off-by: Martin Tzvetanov Grigorov <mg...@apache.org>
---
 lang/rust/Cargo.toml   |  2 ++
 lang/rust/README.md    | 12 ++++++++++
 lang/rust/src/codec.rs | 62 ++++++++++++++++++++++++++++++++------------------
 3 files changed, 54 insertions(+), 22 deletions(-)

diff --git a/lang/rust/Cargo.toml b/lang/rust/Cargo.toml
index 5fb8593..0f1f4ed 100644
--- a/lang/rust/Cargo.toml
+++ b/lang/rust/Cargo.toml
@@ -32,6 +32,7 @@ documentation = "https://docs.rs/avro-rs"
 snappy = ["crc32fast", "snap"]
 zstandard = ["zstd"]
 bzip = ["bzip2"]
+xz = ["xz2"]
 
 [lib]
 path = "src/lib.rs"
@@ -56,6 +57,7 @@ bzip2 = { version = "0.4.3", optional = true }
 crc32fast = { version = "1.2.1", optional = true }
 digest = "0.10.1"
 libflate = "1.1.1"
+xz2 = { version = "0.1.6", optional = true }
 num-bigint = "0.4.2"
 rand = "0.8.4"
 regex = "1.5.4"
diff --git a/lang/rust/README.md b/lang/rust/README.md
index 93950ca..0282d51 100644
--- a/lang/rust/README.md
+++ b/lang/rust/README.md
@@ -87,6 +87,15 @@ version = "x.y"
 features = ["bzip"]
 ```
 
+Or in case you want to leverage the **Xz** codec:
+
+```toml
+[dependencies.avro-rs]
+version = "x.y"
+features = ["xz"]
+```
+
+
 ## Upgrading to a newer minor version
 
 The library is still in beta, so there might be backward-incompatible changes between minor
@@ -264,6 +273,9 @@ the block. You must enable the `snappy` feature to use this codec.
 You must enable the `zstandard` feature to use this codec.
 * **Bzip2**: uses [BZip2](https://sourceware.org/bzip2/) compression library.
 You must enable the `bzip` feature to use this codec.
+* **Xz**: uses [xz2](https://github.com/alexcrichton/xz2-rs) compression library.
+  You must enable the `xz` feature to use this codec.
+
 
 To specify a codec to use to compress data, just specify it while creating a `Writer`:
 ```rust
diff --git a/lang/rust/src/codec.rs b/lang/rust/src/codec.rs
index 15992c1..0ba8abe 100644
--- a/lang/rust/src/codec.rs
+++ b/lang/rust/src/codec.rs
@@ -30,6 +30,8 @@ use bzip2::{
 extern crate crc32fast;
 #[cfg(feature = "snappy")]
 use crc32fast::Hasher;
+#[cfg(feature = "xz")]
+use xz2::read::{XzDecoder, XzEncoder};
 
 /// The compression codec used to compress blocks.
 #[derive(Clone, Copy, Debug, PartialEq, EnumString, IntoStaticStr)]
@@ -52,6 +54,10 @@ pub enum Codec {
     /// The `BZip2` codec uses [BZip2](https://sourceware.org/bzip2/)
     /// compression library.
     Bzip2,
+    #[cfg(feature = "xz")]
+    /// The `Xz` codec uses [Xz utils](https://tukaani.org/xz/)
+    /// compression library.
+    Xz,
 }
 
 impl From<Codec> for Value {
@@ -104,6 +110,14 @@ impl Codec {
                 encoder.read_to_end(&mut buffer).unwrap();
                 *stream = buffer;
             }
+            #[cfg(feature = "xz")]
+            Codec::Xz => {
+                let compression_level = 9;
+                let mut encoder = XzEncoder::new(&stream[..], compression_level);
+                let mut buffer = Vec::new();
+                encoder.read_to_end(&mut buffer).unwrap();
+                *stream = buffer;
+            }
         };
 
         Ok(())
@@ -156,6 +170,13 @@ impl Codec {
                 decoder.read_to_end(&mut decoded).unwrap();
                 decoded
             }
+            #[cfg(feature = "xz")]
+            Codec::Xz => {
+                let mut decoder = XzDecoder::new(&stream[..]);
+                let mut decoded: Vec<u8> = Vec::new();
+                decoder.read_to_end(&mut decoded).unwrap();
+                decoded
+            }
         };
         Ok(())
     }
@@ -179,43 +200,34 @@ mod tests {
 
     #[test]
     fn deflate_compress_and_decompress() {
-        let codec = Codec::Deflate;
-        let mut stream = INPUT.to_vec();
-        codec.compress(&mut stream).unwrap();
-        assert_ne!(INPUT, stream.as_slice());
-        assert!(INPUT.len() > stream.len());
-        codec.decompress(&mut stream).unwrap();
-        assert_eq!(INPUT, stream.as_slice());
+        compress_and_decompress(Codec::Deflate);
     }
 
     #[cfg(feature = "snappy")]
     #[test]
     fn snappy_compress_and_decompress() {
-        let codec = Codec::Snappy;
-        let mut stream = INPUT.to_vec();
-        codec.compress(&mut stream).unwrap();
-        assert_ne!(INPUT, stream.as_slice());
-        assert!(INPUT.len() > stream.len());
-        codec.decompress(&mut stream).unwrap();
-        assert_eq!(INPUT, stream.as_slice());
+        compress_and_decompress(Codec::Snappy);
     }
 
     #[cfg(feature = "zstandard")]
     #[test]
     fn zstd_compress_and_decompress() {
-        let codec = Codec::Zstd;
-        let mut stream = INPUT.to_vec();
-        codec.compress(&mut stream).unwrap();
-        assert_ne!(INPUT, stream.as_slice());
-        assert!(INPUT.len() > stream.len());
-        codec.decompress(&mut stream).unwrap();
-        assert_eq!(INPUT, stream.as_slice());
+        compress_and_decompress(Codec::Zstd);
     }
 
     #[cfg(feature = "bzip")]
     #[test]
     fn bzip_compress_and_decompress() {
-        let codec = Codec::Bzip2;
+        compress_and_decompress(Codec::Bzip2);
+    }
+
+    #[cfg(feature = "xz")]
+    #[test]
+    fn xz_compress_and_decompress() {
+        compress_and_decompress(Codec::Xz);
+    }
+
+    fn compress_and_decompress(codec: Codec) {
         let mut stream = INPUT.to_vec();
         codec.compress(&mut stream).unwrap();
         assert_ne!(INPUT, stream.as_slice());
@@ -237,6 +249,9 @@ mod tests {
 
         #[cfg(feature = "bzip")]
         assert_eq!(<&str>::from(Codec::Bzip2), "bzip2");
+
+        #[cfg(feature = "xz")]
+        assert_eq!(<&str>::from(Codec::Xz), "xz");
     }
 
     #[test]
@@ -255,6 +270,9 @@ mod tests {
         #[cfg(feature = "bzip")]
         assert_eq!(Codec::from_str("bzip2").unwrap(), Codec::Bzip2);
 
+        #[cfg(feature = "xz")]
+        assert_eq!(Codec::from_str("xz").unwrap(), Codec::Xz);
+
         assert!(Codec::from_str("not a codec").is_err());
     }
 }