You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by mg...@apache.org on 2024/01/04 20:08:36 UTC

(avro) 01/01: AVRO-3926: [Rust] Allow UUID to serialize to Fixed[16]

This is an automated email from the ASF dual-hosted git repository.

mgrigorov pushed a commit to branch avro-3926-allow-serde-uuid-to-fixed-16
in repository https://gitbox.apache.org/repos/asf/avro.git

commit 3b2982401b91532f0691ead58d71002ea85c6480
Author: Martin Tzvetanov Grigorov <mg...@apache.org>
AuthorDate: Thu Jan 4 22:07:23 2024 +0200

    AVRO-3926: [Rust] Allow UUID to serialize to Fixed[16]
    
    Signed-off-by: Martin Tzvetanov Grigorov <mg...@apache.org>
---
 lang/rust/avro/src/decode.rs | 103 +++++++++++++++++++++++++++++++++++++++----
 lang/rust/avro/src/encode.rs |  27 +++++++++---
 lang/rust/avro/src/error.rs  |   9 ++++
 lang/rust/avro/src/schema.rs |  60 ++++++++++++++++++++-----
 4 files changed, 173 insertions(+), 26 deletions(-)

diff --git a/lang/rust/avro/src/decode.rs b/lang/rust/avro/src/decode.rs
index bf8477fb7..3171a9426 100644
--- a/lang/rust/avro/src/decode.rs
+++ b/lang/rust/avro/src/decode.rs
@@ -19,6 +19,7 @@ use crate::{
     bigdecimal::deserialize_big_decimal,
     decimal::Decimal,
     duration::Duration,
+    encode::encode_long,
     schema::{
         DecimalSchema, EnumSchema, FixedSchema, Name, Namespace, RecordSchema, ResolvedSchema,
         Schema,
@@ -121,15 +122,60 @@ pub(crate) fn decode_internal<R: Read, S: Borrow<Schema>>(
                 value => Err(Error::BytesValue(value.into())),
             }
         }
-        Schema::Uuid => Ok(Value::Uuid(
-            Uuid::from_str(
-                match decode_internal(&Schema::String, names, enclosing_namespace, reader)? {
-                    Value::String(ref s) => s,
-                    value => return Err(Error::GetUuidFromStringValue(value.into())),
-                },
-            )
-            .map_err(Error::ConvertStrToUuid)?,
-        )),
+        Schema::Uuid => {
+            let len = decode_len(reader)?;
+            let mut bytes = vec![0u8; len];
+            reader.read_exact(&mut bytes).map_err(Error::ReadIntoBuf)?;
+
+            // use a Vec to be able re-read the bytes more than once if needed
+            let mut reader = Vec::with_capacity(len + 1);
+            encode_long(len as i64, &mut reader);
+            reader.extend_from_slice(&bytes);
+
+            let decode_from_string = |reader| match decode_internal(
+                &Schema::String,
+                names,
+                enclosing_namespace,
+                reader,
+            )? {
+                Value::String(ref s) => Uuid::from_str(s).map_err(Error::ConvertStrToUuid),
+                value => Err(Error::GetUuidFromStringValue(value.into())),
+            };
+
+            let uuid: Uuid = if len == 16 {
+                // most probably a Fixed schema
+                let fixed_result = decode_internal(
+                    &Schema::Fixed(FixedSchema {
+                        size: 16,
+                        name: "uuid".into(),
+                        aliases: None,
+                        doc: None,
+                        attributes: Default::default(),
+                    }),
+                    names,
+                    enclosing_namespace,
+                    &mut bytes.as_slice(),
+                );
+                if fixed_result.is_ok() {
+                    match fixed_result? {
+                        Value::Fixed(ref size, ref bytes) => {
+                            if *size != 16 {
+                                return Err(Error::ConvertFixedToUuid(*size));
+                            }
+                            Uuid::from_slice(bytes).map_err(Error::ConvertSliceToUuid)?
+                        }
+                        _ => decode_from_string(&mut reader.as_slice())?,
+                    }
+                } else {
+                    // try to decode as string
+                    decode_from_string(&mut reader.as_slice())?
+                }
+            } else {
+                // definitely a string
+                decode_from_string(&mut reader.as_slice())?
+            };
+            Ok(Value::Uuid(uuid))
+        }
         Schema::Int => decode_int(reader),
         Schema::Date => zag_i32(reader).map(Value::Date),
         Schema::TimeMillis => zag_i32(reader).map(Value::TimeMillis),
@@ -317,6 +363,7 @@ mod tests {
     use apache_avro_test_helper::TestResult;
     use pretty_assertions::assert_eq;
     use std::collections::HashMap;
+    use uuid::Uuid;
 
     #[test]
     fn test_decode_array_without_size() -> TestResult {
@@ -815,4 +862,42 @@ mod tests {
 
         Ok(())
     }
+
+    #[test]
+    fn avro_3926_encode_decode_uuid_to_string() -> TestResult {
+        use crate::encode::encode;
+
+        let schema = Schema::String;
+        let value = Value::Uuid(Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000")?);
+
+        let mut buffer = Vec::new();
+        encode(&value, &schema, &mut buffer).expect(&success(&value, &schema));
+
+        let result = decode(&Schema::Uuid, &mut &buffer[..])?;
+        assert_eq!(result, value);
+
+        Ok(())
+    }
+
+    #[test]
+    fn avro_3926_encode_decode_uuid_to_fixed() -> TestResult {
+        use crate::encode::encode;
+
+        let schema = Schema::Fixed(FixedSchema {
+            size: 16,
+            name: "uuid".into(),
+            aliases: None,
+            doc: None,
+            attributes: Default::default(),
+        });
+        let value = Value::Uuid(Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000")?);
+
+        let mut buffer = Vec::new();
+        encode(&value, &schema, &mut buffer).expect(&success(&value, &schema));
+
+        let result = decode(&Schema::Uuid, &mut &buffer[..])?;
+        assert_eq!(result, value);
+
+        Ok(())
+    }
 }
diff --git a/lang/rust/avro/src/encode.rs b/lang/rust/avro/src/encode.rs
index 23f94664c..390b09164 100644
--- a/lang/rust/avro/src/encode.rs
+++ b/lang/rust/avro/src/encode.rs
@@ -125,12 +125,27 @@ pub(crate) fn encode_internal<S: Borrow<Schema>>(
             let slice: [u8; 12] = duration.into();
             buffer.extend_from_slice(&slice);
         }
-        Value::Uuid(uuid) => encode_bytes(
-            // we need the call .to_string() to properly convert ASCII to UTF-8
-            #[allow(clippy::unnecessary_to_owned)]
-            &uuid.to_string(),
-            buffer,
-        ),
+        Value::Uuid(uuid) => match *schema {
+            Schema::String => encode_bytes(
+                // we need the call .to_string() to properly convert ASCII to UTF-8
+                #[allow(clippy::unnecessary_to_owned)]
+                &uuid.to_string(),
+                buffer,
+            ),
+            Schema::Fixed(FixedSchema { size, .. }) => {
+                let bytes = uuid.as_bytes();
+                if bytes.len() != size {
+                    return Err(Error::EncodeUuidAsFixedError(bytes.len(), size));
+                }
+                encode_bytes(bytes, buffer)
+            }
+            _ => {
+                return Err(Error::EncodeValueAsSchemaError {
+                    value_kind: ValueKind::Uuid,
+                    supported_schema: vec![SchemaKind::Uuid, SchemaKind::Fixed],
+                });
+            }
+        },
         Value::BigDecimal(bg) => {
             let buf: Vec<u8> = serialize_big_decimal(bg);
             buffer.extend_from_slice(buf.as_slice());
diff --git a/lang/rust/avro/src/error.rs b/lang/rust/avro/src/error.rs
index 36c8d94a9..bf7a9574c 100644
--- a/lang/rust/avro/src/error.rs
+++ b/lang/rust/avro/src/error.rs
@@ -89,6 +89,15 @@ pub enum Error {
     #[error("Failed to convert &str to UUID")]
     ConvertStrToUuid(#[source] uuid::Error),
 
+    #[error("Failed to convert Fixed bytes to UUID. It must be exactly 16 bytes, got {0}")]
+    ConvertFixedToUuid(usize),
+
+    #[error("Failed to convert Fixed bytes to UUID")]
+    ConvertSliceToUuid(#[source] uuid::Error),
+
+    #[error("Failed to encode Fixed bytes to UUID: {0} -> {1}")]
+    EncodeUuidAsFixedError(usize, usize),
+
     #[error("Map key is not a string; key type is {0:?}")]
     MapKeyType(ValueKind),
 
diff --git a/lang/rust/avro/src/schema.rs b/lang/rust/avro/src/schema.rs
index f4c063df6..15cb86734 100644
--- a/lang/rust/avro/src/schema.rs
+++ b/lang/rust/avro/src/schema.rs
@@ -1404,8 +1404,24 @@ impl Parser {
                     return try_convert_to_logical_type(
                         "uuid",
                         parse_as_native_complex(complex, self, enclosing_namespace)?,
-                        &[SchemaKind::String],
-                        |_| -> AvroResult<Schema> { Ok(Schema::Uuid) },
+                        &[SchemaKind::String, SchemaKind::Fixed],
+                        |schema| match schema {
+                            Schema::String => Ok(Schema::Uuid),
+                            Schema::Fixed(FixedSchema { size, .. }) if size == 16 => {
+                                Ok(Schema::Uuid)
+                            }
+                            Schema::Fixed(FixedSchema { size, .. }) => {
+                                warn!("Ignoring uuid logical type for a Fixed schema because its size ({size:?}) is not 16! Schema: {:?}", schema);
+                                Ok(schema)
+                            }
+                            _ => {
+                                warn!(
+                                    "Ignoring invalid uuid logical type for schema: {:?}",
+                                    schema
+                                );
+                                Ok(schema)
+                            }
+                        },
                     );
                 }
                 "date" => {
@@ -2367,6 +2383,7 @@ pub mod derive {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use apache_avro_test_helper::logger::{assert_logged, assert_not_logged};
     use apache_avro_test_helper::TestResult;
     use pretty_assertions::assert_eq;
     use serde_json::json;
@@ -6299,7 +6316,7 @@ mod tests {
     }
 
     #[test]
-    fn test_avro_3896_uuid_schema() -> TestResult {
+    fn avro_3896_uuid_schema_for_string() -> TestResult {
         // string uuid, represents as native logical type.
         let schema = json!(
         {
@@ -6310,8 +6327,11 @@ mod tests {
         let parse_result = Schema::parse(&schema)?;
         assert_eq!(parse_result, Schema::Uuid);
 
-        // uuid logical type is not supported for SchemaKind::Fixed, so it is parsed as Schema::Fixed
-        // and the `logicalType` is preserved as an attribute.
+        Ok(())
+    }
+
+    #[test]
+    fn avro_3926_uuid_schema_for_fixed_with_size_16() -> TestResult {
         let schema = json!(
         {
             "type": "fixed",
@@ -6320,19 +6340,37 @@ mod tests {
             "logicalType": "uuid"
         });
         let parse_result = Schema::parse(&schema)?;
+        assert_eq!(parse_result, Schema::Uuid);
+        assert_not_logged(
+            r#"Ignoring uuid logical type for a Fixed schema because its size (6) is not 16! Schema: Fixed(FixedSchema { name: Name { name: "FixedUUID", namespace: None }, aliases: None, doc: None, size: 6, attributes: {"logicalType": String("uuid")} })"#,
+        );
+
+        Ok(())
+    }
+
+    #[test]
+    fn avro_3926_uuid_schema_for_fixed_with_size_different_than_16() -> TestResult {
+        let schema = json!(
+        {
+            "type": "fixed",
+            "name": "FixedUUID",
+            "size": 6,
+            "logicalType": "uuid"
+        });
+        let parse_result = Schema::parse(&schema)?;
         assert_eq!(
             parse_result,
             Schema::Fixed(FixedSchema {
                 name: Name::new("FixedUUID")?,
-                doc: None,
                 aliases: None,
-                size: 16,
-                attributes: BTreeMap::from([(
-                    "logicalType".to_string(),
-                    Value::String(String::from("uuid")),
-                )]),
+                doc: None,
+                size: 6,
+                attributes: BTreeMap::from([("logicalType".to_string(), "uuid".into())]),
             })
         );
+        assert_logged(
+            r#"Ignoring uuid logical type for a Fixed schema because its size (6) is not 16! Schema: Fixed(FixedSchema { name: Name { name: "FixedUUID", namespace: None }, aliases: None, doc: None, size: 6, attributes: {"logicalType": String("uuid")} })"#,
+        );
 
         Ok(())
     }