You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by mg...@apache.org on 2022/06/28 10:56:38 UTC

[avro] branch master updated: AVRO-3549: [rust] Return a proper error when the codec is not supported and/or enabled. (#1740)

This is an automated email from the ASF dual-hosted git repository.

mgrigorov pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git


The following commit(s) were added to refs/heads/master by this push:
     new d17c5cc5c AVRO-3549: [rust] Return a proper error when the codec is not supported and/or enabled. (#1740)
d17c5cc5c is described below

commit d17c5cc5c066297a09208098627bbed062549dd7
Author: Martin Grigorov <ma...@users.noreply.github.com>
AuthorDate: Tue Jun 28 13:56:33 2022 +0300

    AVRO-3549: [rust] Return a proper error when the codec is not supported and/or enabled. (#1740)
    
    * AVRO-3549: [rust] Avro reader fails if it tries to read data compressed with codec that is not enabled in features
    
    Return a proper error when the codec is not supported and/or enabled.
    
    Signed-off-by: Martin Tzvetanov Grigorov <mg...@apache.org>
    
    * AVRO-3549: Enable the new test only when 'snappy' feature is not enabled
    
    Signed-off-by: Martin Tzvetanov Grigorov <mg...@apache.org>
---
 .github/workflows/test-lang-rust-ci.yml |  7 ++++
 lang/rust/avro/src/error.rs             |  9 +++++
 lang/rust/avro/src/reader.rs            | 68 ++++++++++++++++++++++++---------
 3 files changed, 67 insertions(+), 17 deletions(-)

diff --git a/.github/workflows/test-lang-rust-ci.yml b/.github/workflows/test-lang-rust-ci.yml
index 7d074abbe..5c5bc9e8c 100644
--- a/.github/workflows/test-lang-rust-ci.yml
+++ b/.github/workflows/test-lang-rust-ci.yml
@@ -96,6 +96,13 @@ jobs:
           command: test
           args: --manifest-path lang/rust/Cargo.toml --all-features --target ${{ matrix.target }}
 
+      - name: Rust Test AVRO-3549
+        if: matrix.target != 'wasm32-unknown-unknown'
+        uses: actions-rs/cargo@v1
+        with:
+          command: test
+          args: --manifest-path lang/rust/Cargo.toml --target ${{ matrix.target }} test_avro_3549_read_not_enabled_codec
+
       # because of https://github.com/rust-lang/cargo/issues/6669
       - name: Rust Test docs
         if: matrix.target != 'wasm32-unknown-unknown'
diff --git a/lang/rust/avro/src/error.rs b/lang/rust/avro/src/error.rs
index 1e1e685ab..ec6e22aee 100644
--- a/lang/rust/avro/src/error.rs
+++ b/lang/rust/avro/src/error.rs
@@ -47,6 +47,9 @@ pub enum Error {
     #[error("Invalid utf-8 string")]
     ConvertToUtf8(#[source] std::string::FromUtf8Error),
 
+    #[error("Invalid utf-8 string")]
+    ConvertToUtf8Error(#[source] std::str::Utf8Error),
+
     /// Describes errors happened while validating Avro data.
     #[error("Value does not match schema")]
     Validation,
@@ -424,6 +427,12 @@ pub enum Error {
         "Internal buffer not drained properly. Re-initialize the single object writer struct!"
     )]
     IllegalSingleObjectWriterState,
+
+    #[error("Codec '{0}' is not supported/enabled")]
+    CodecNotSupported(String),
+
+    #[error("Invalid Avro data! Cannot read codec type from value that is not Value::Bytes.")]
+    BadCodecMetadata,
 }
 
 impl serde::ser::Error for Error {
diff --git a/lang/rust/avro/src/reader.rs b/lang/rust/avro/src/reader.rs
index 72513bf1e..193449e12 100644
--- a/lang/rust/avro/src/reader.rs
+++ b/lang/rust/avro/src/reader.rs
@@ -82,7 +82,7 @@ impl<R: Read> Block<R> {
 
         if let Value::Map(metadata) = decode(&meta_schema, &mut self.reader)? {
             self.read_writer_schema(&metadata)?;
-            self.read_codec(&metadata);
+            self.codec = read_codec(&metadata)?;
 
             for (key, value) in metadata {
                 if key == "avro.schema" || key == "avro.codec" {
@@ -199,22 +199,6 @@ impl<R: Read> Block<R> {
         Ok(())
     }
 
-    fn read_codec(&mut self, metadata: &HashMap<String, Value>) {
-        if let Some(codec) = metadata
-            .get("avro.codec")
-            .and_then(|codec| {
-                if let Value::Bytes(ref bytes) = *codec {
-                    std::str::from_utf8(bytes.as_ref()).ok()
-                } else {
-                    None
-                }
-            })
-            .and_then(|codec| Codec::from_str(codec).ok())
-        {
-            self.codec = codec;
-        }
-    }
-
     fn read_user_metadata(&mut self, key: String, value: Value) {
         match value {
             Value::Bytes(ref vec) => {
@@ -230,6 +214,33 @@ impl<R: Read> Block<R> {
     }
 }
 
+fn read_codec(metadata: &HashMap<String, Value>) -> AvroResult<Codec> {
+    let result = metadata
+        .get("avro.codec")
+        .map(|codec| {
+            if let Value::Bytes(ref bytes) = *codec {
+                match std::str::from_utf8(bytes.as_ref()) {
+                    Ok(utf8) => Ok(utf8),
+                    Err(utf8_error) => Err(Error::ConvertToUtf8Error(utf8_error)),
+                }
+            } else {
+                Err(Error::BadCodecMetadata)
+            }
+        })
+        .map(|codec_res| match codec_res {
+            Ok(codec) => match Codec::from_str(codec) {
+                Ok(codec) => Ok(codec),
+                Err(_) => Err(Error::CodecNotSupported(codec.to_owned())),
+            },
+            Err(err) => Err(err),
+        });
+
+    match result {
+        Some(res) => res,
+        None => Ok(Codec::Null),
+    }
+}
+
 /// Main interface for reading Avro formatted values.
 ///
 /// To be used as an iterator:
@@ -821,4 +832,27 @@ mod tests {
         assert_eq!(obj, read_obj2);
         assert_eq!(val, expected_value)
     }
+
+    #[cfg(not(feature = "snappy"))]
+    #[test]
+    fn test_avro_3549_read_not_enabled_codec() {
+        let snappy_compressed_avro = vec![
+            79, 98, 106, 1, 4, 22, 97, 118, 114, 111, 46, 115, 99, 104, 101, 109, 97, 210, 1, 123,
+            34, 102, 105, 101, 108, 100, 115, 34, 58, 91, 123, 34, 110, 97, 109, 101, 34, 58, 34,
+            110, 117, 109, 34, 44, 34, 116, 121, 112, 101, 34, 58, 34, 115, 116, 114, 105, 110,
+            103, 34, 125, 93, 44, 34, 110, 97, 109, 101, 34, 58, 34, 101, 118, 101, 110, 116, 34,
+            44, 34, 110, 97, 109, 101, 115, 112, 97, 99, 101, 34, 58, 34, 101, 120, 97, 109, 112,
+            108, 101, 110, 97, 109, 101, 115, 112, 97, 99, 101, 34, 44, 34, 116, 121, 112, 101, 34,
+            58, 34, 114, 101, 99, 111, 114, 100, 34, 125, 20, 97, 118, 114, 111, 46, 99, 111, 100,
+            101, 99, 12, 115, 110, 97, 112, 112, 121, 0, 213, 209, 241, 208, 200, 110, 164, 47,
+            203, 25, 90, 235, 161, 167, 195, 177, 2, 20, 4, 12, 6, 49, 50, 51, 115, 38, 58, 0, 213,
+            209, 241, 208, 200, 110, 164, 47, 203, 25, 90, 235, 161, 167, 195, 177,
+        ];
+
+        if let Err(err) = Reader::new(snappy_compressed_avro.as_slice()) {
+            assert_eq!("Codec 'snappy' is not supported/enabled", err.to_string());
+        } else {
+            panic!("Expected an error in the reading of the codec!");
+        }
+    }
 }