You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by mg...@apache.org on 2022/01/13 08:15:27 UTC

[avro] branch avro-3303-rust-add-xz-codec created (now 8c38ba6)

This is an automated email from the ASF dual-hosted git repository.

mgrigorov pushed a change to branch avro-3303-rust-add-xz-codec
in repository https://gitbox.apache.org/repos/asf/avro.git.


      at 8c38ba6  AVRO-3303: Rust: Add support for Xz codec

This branch includes the following new commits:

     new 8c38ba6  AVRO-3303: Rust: Add support for Xz codec

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


[avro] 01/01: AVRO-3303: Rust: Add support for Xz codec

Posted by mg...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

mgrigorov pushed a commit to branch avro-3303-rust-add-xz-codec
in repository https://gitbox.apache.org/repos/asf/avro.git

commit 8c38ba6508ce8e52a4f6b0f8d9abe06b4e72a79e
Author: Martin Tzvetanov Grigorov <mg...@apache.org>
AuthorDate: Thu Jan 13 10:14:30 2022 +0200

    AVRO-3303: Rust: Add support for Xz codec
    
    Signed-off-by: Martin Tzvetanov Grigorov <mg...@apache.org>
---
 lang/rust/Cargo.toml   |  2 ++
 lang/rust/README.md    | 12 ++++++++++
 lang/rust/src/codec.rs | 62 ++++++++++++++++++++++++++++++++------------------
 3 files changed, 54 insertions(+), 22 deletions(-)

diff --git a/lang/rust/Cargo.toml b/lang/rust/Cargo.toml
index 5fb8593..0f1f4ed 100644
--- a/lang/rust/Cargo.toml
+++ b/lang/rust/Cargo.toml
@@ -32,6 +32,7 @@ documentation = "https://docs.rs/avro-rs"
 snappy = ["crc32fast", "snap"]
 zstandard = ["zstd"]
 bzip = ["bzip2"]
+xz = ["xz2"]
 
 [lib]
 path = "src/lib.rs"
@@ -56,6 +57,7 @@ bzip2 = { version = "0.4.3", optional = true }
 crc32fast = { version = "1.2.1", optional = true }
 digest = "0.10.1"
 libflate = "1.1.1"
+xz2 = { version = "0.1.6", optional = true }
 num-bigint = "0.4.2"
 rand = "0.8.4"
 regex = "1.5.4"
diff --git a/lang/rust/README.md b/lang/rust/README.md
index 93950ca..0282d51 100644
--- a/lang/rust/README.md
+++ b/lang/rust/README.md
@@ -87,6 +87,15 @@ version = "x.y"
 features = ["bzip"]
 ```
 
+Or in case you want to leverage the **Xz** codec:
+
+```toml
+[dependencies.avro-rs]
+version = "x.y"
+features = ["xz"]
+```
+
+
 ## Upgrading to a newer minor version
 
 The library is still in beta, so there might be backward-incompatible changes between minor
@@ -264,6 +273,9 @@ the block. You must enable the `snappy` feature to use this codec.
 You must enable the `zstandard` feature to use this codec.
 * **Bzip2**: uses [BZip2](https://sourceware.org/bzip2/) compression library.
 You must enable the `bzip` feature to use this codec.
+* **Xz**: uses [xz2](https://github.com/alexcrichton/xz2-rs) compression library.
+  You must enable the `xz` feature to use this codec.
+
 
 To specify a codec to use to compress data, just specify it while creating a `Writer`:
 ```rust
diff --git a/lang/rust/src/codec.rs b/lang/rust/src/codec.rs
index 15992c1..0ba8abe 100644
--- a/lang/rust/src/codec.rs
+++ b/lang/rust/src/codec.rs
@@ -30,6 +30,8 @@ use bzip2::{
 extern crate crc32fast;
 #[cfg(feature = "snappy")]
 use crc32fast::Hasher;
+#[cfg(feature = "xz")]
+use xz2::read::{XzDecoder, XzEncoder};
 
 /// The compression codec used to compress blocks.
 #[derive(Clone, Copy, Debug, PartialEq, EnumString, IntoStaticStr)]
@@ -52,6 +54,10 @@ pub enum Codec {
     /// The `BZip2` codec uses [BZip2](https://sourceware.org/bzip2/)
     /// compression library.
     Bzip2,
+    #[cfg(feature = "xz")]
+    /// The `Xz` codec uses [Xz utils](https://tukaani.org/xz/)
+    /// compression library.
+    Xz,
 }
 
 impl From<Codec> for Value {
@@ -104,6 +110,14 @@ impl Codec {
                 encoder.read_to_end(&mut buffer).unwrap();
                 *stream = buffer;
             }
+            #[cfg(feature = "xz")]
+            Codec::Xz => {
+                let compression_level = 9;
+                let mut encoder = XzEncoder::new(&stream[..], compression_level);
+                let mut buffer = Vec::new();
+                encoder.read_to_end(&mut buffer).unwrap();
+                *stream = buffer;
+            }
         };
 
         Ok(())
@@ -156,6 +170,13 @@ impl Codec {
                 decoder.read_to_end(&mut decoded).unwrap();
                 decoded
             }
+            #[cfg(feature = "xz")]
+            Codec::Xz => {
+                let mut decoder = XzDecoder::new(&stream[..]);
+                let mut decoded: Vec<u8> = Vec::new();
+                decoder.read_to_end(&mut decoded).unwrap();
+                decoded
+            }
         };
         Ok(())
     }
@@ -179,43 +200,34 @@ mod tests {
 
     #[test]
     fn deflate_compress_and_decompress() {
-        let codec = Codec::Deflate;
-        let mut stream = INPUT.to_vec();
-        codec.compress(&mut stream).unwrap();
-        assert_ne!(INPUT, stream.as_slice());
-        assert!(INPUT.len() > stream.len());
-        codec.decompress(&mut stream).unwrap();
-        assert_eq!(INPUT, stream.as_slice());
+        compress_and_decompress(Codec::Deflate);
     }
 
     #[cfg(feature = "snappy")]
     #[test]
     fn snappy_compress_and_decompress() {
-        let codec = Codec::Snappy;
-        let mut stream = INPUT.to_vec();
-        codec.compress(&mut stream).unwrap();
-        assert_ne!(INPUT, stream.as_slice());
-        assert!(INPUT.len() > stream.len());
-        codec.decompress(&mut stream).unwrap();
-        assert_eq!(INPUT, stream.as_slice());
+        compress_and_decompress(Codec::Snappy);
     }
 
     #[cfg(feature = "zstandard")]
     #[test]
     fn zstd_compress_and_decompress() {
-        let codec = Codec::Zstd;
-        let mut stream = INPUT.to_vec();
-        codec.compress(&mut stream).unwrap();
-        assert_ne!(INPUT, stream.as_slice());
-        assert!(INPUT.len() > stream.len());
-        codec.decompress(&mut stream).unwrap();
-        assert_eq!(INPUT, stream.as_slice());
+        compress_and_decompress(Codec::Zstd);
     }
 
     #[cfg(feature = "bzip")]
     #[test]
     fn bzip_compress_and_decompress() {
-        let codec = Codec::Bzip2;
+        compress_and_decompress(Codec::Bzip2);
+    }
+
+    #[cfg(feature = "xz")]
+    #[test]
+    fn xz_compress_and_decompress() {
+        compress_and_decompress(Codec::Xz);
+    }
+
+    fn compress_and_decompress(codec: Codec) {
         let mut stream = INPUT.to_vec();
         codec.compress(&mut stream).unwrap();
         assert_ne!(INPUT, stream.as_slice());
@@ -237,6 +249,9 @@ mod tests {
 
         #[cfg(feature = "bzip")]
         assert_eq!(<&str>::from(Codec::Bzip2), "bzip2");
+
+        #[cfg(feature = "xz")]
+        assert_eq!(<&str>::from(Codec::Xz), "xz");
     }
 
     #[test]
@@ -255,6 +270,9 @@ mod tests {
         #[cfg(feature = "bzip")]
         assert_eq!(Codec::from_str("bzip2").unwrap(), Codec::Bzip2);
 
+        #[cfg(feature = "xz")]
+        assert_eq!(Codec::from_str("xz").unwrap(), Codec::Xz);
+
         assert!(Codec::from_str("not a codec").is_err());
     }
 }