You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by mg...@apache.org on 2024/01/04 20:08:36 UTC
(avro) 01/01: AVRO-3926: [Rust] Allow UUID to serialize to Fixed[16]
This is an automated email from the ASF dual-hosted git repository.
mgrigorov pushed a commit to branch avro-3926-allow-serde-uuid-to-fixed-16
in repository https://gitbox.apache.org/repos/asf/avro.git
commit 3b2982401b91532f0691ead58d71002ea85c6480
Author: Martin Tzvetanov Grigorov <mg...@apache.org>
AuthorDate: Thu Jan 4 22:07:23 2024 +0200
AVRO-3926: [Rust] Allow UUID to serialize to Fixed[16]
Signed-off-by: Martin Tzvetanov Grigorov <mg...@apache.org>
---
lang/rust/avro/src/decode.rs | 103 +++++++++++++++++++++++++++++++++++++++----
lang/rust/avro/src/encode.rs | 27 +++++++++---
lang/rust/avro/src/error.rs | 9 ++++
lang/rust/avro/src/schema.rs | 60 ++++++++++++++++++++-----
4 files changed, 173 insertions(+), 26 deletions(-)
diff --git a/lang/rust/avro/src/decode.rs b/lang/rust/avro/src/decode.rs
index bf8477fb7..3171a9426 100644
--- a/lang/rust/avro/src/decode.rs
+++ b/lang/rust/avro/src/decode.rs
@@ -19,6 +19,7 @@ use crate::{
bigdecimal::deserialize_big_decimal,
decimal::Decimal,
duration::Duration,
+ encode::encode_long,
schema::{
DecimalSchema, EnumSchema, FixedSchema, Name, Namespace, RecordSchema, ResolvedSchema,
Schema,
@@ -121,15 +122,60 @@ pub(crate) fn decode_internal<R: Read, S: Borrow<Schema>>(
value => Err(Error::BytesValue(value.into())),
}
}
- Schema::Uuid => Ok(Value::Uuid(
- Uuid::from_str(
- match decode_internal(&Schema::String, names, enclosing_namespace, reader)? {
- Value::String(ref s) => s,
- value => return Err(Error::GetUuidFromStringValue(value.into())),
- },
- )
- .map_err(Error::ConvertStrToUuid)?,
- )),
+ Schema::Uuid => {
+ let len = decode_len(reader)?;
+ let mut bytes = vec![0u8; len];
+ reader.read_exact(&mut bytes).map_err(Error::ReadIntoBuf)?;
+
+ // use a Vec to be able re-read the bytes more than once if needed
+ let mut reader = Vec::with_capacity(len + 1);
+ encode_long(len as i64, &mut reader);
+ reader.extend_from_slice(&bytes);
+
+ let decode_from_string = |reader| match decode_internal(
+ &Schema::String,
+ names,
+ enclosing_namespace,
+ reader,
+ )? {
+ Value::String(ref s) => Uuid::from_str(s).map_err(Error::ConvertStrToUuid),
+ value => Err(Error::GetUuidFromStringValue(value.into())),
+ };
+
+ let uuid: Uuid = if len == 16 {
+ // most probably a Fixed schema
+ let fixed_result = decode_internal(
+ &Schema::Fixed(FixedSchema {
+ size: 16,
+ name: "uuid".into(),
+ aliases: None,
+ doc: None,
+ attributes: Default::default(),
+ }),
+ names,
+ enclosing_namespace,
+ &mut bytes.as_slice(),
+ );
+ if fixed_result.is_ok() {
+ match fixed_result? {
+ Value::Fixed(ref size, ref bytes) => {
+ if *size != 16 {
+ return Err(Error::ConvertFixedToUuid(*size));
+ }
+ Uuid::from_slice(bytes).map_err(Error::ConvertSliceToUuid)?
+ }
+ _ => decode_from_string(&mut reader.as_slice())?,
+ }
+ } else {
+ // try to decode as string
+ decode_from_string(&mut reader.as_slice())?
+ }
+ } else {
+ // definitely a string
+ decode_from_string(&mut reader.as_slice())?
+ };
+ Ok(Value::Uuid(uuid))
+ }
Schema::Int => decode_int(reader),
Schema::Date => zag_i32(reader).map(Value::Date),
Schema::TimeMillis => zag_i32(reader).map(Value::TimeMillis),
@@ -317,6 +363,7 @@ mod tests {
use apache_avro_test_helper::TestResult;
use pretty_assertions::assert_eq;
use std::collections::HashMap;
+ use uuid::Uuid;
#[test]
fn test_decode_array_without_size() -> TestResult {
@@ -815,4 +862,42 @@ mod tests {
Ok(())
}
+
+ #[test]
+ fn avro_3926_encode_decode_uuid_to_string() -> TestResult {
+ use crate::encode::encode;
+
+ let schema = Schema::String;
+ let value = Value::Uuid(Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000")?);
+
+ let mut buffer = Vec::new();
+ encode(&value, &schema, &mut buffer).expect(&success(&value, &schema));
+
+ let result = decode(&Schema::Uuid, &mut &buffer[..])?;
+ assert_eq!(result, value);
+
+ Ok(())
+ }
+
+ #[test]
+ fn avro_3926_encode_decode_uuid_to_fixed() -> TestResult {
+ use crate::encode::encode;
+
+ let schema = Schema::Fixed(FixedSchema {
+ size: 16,
+ name: "uuid".into(),
+ aliases: None,
+ doc: None,
+ attributes: Default::default(),
+ });
+ let value = Value::Uuid(Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000")?);
+
+ let mut buffer = Vec::new();
+ encode(&value, &schema, &mut buffer).expect(&success(&value, &schema));
+
+ let result = decode(&Schema::Uuid, &mut &buffer[..])?;
+ assert_eq!(result, value);
+
+ Ok(())
+ }
}
diff --git a/lang/rust/avro/src/encode.rs b/lang/rust/avro/src/encode.rs
index 23f94664c..390b09164 100644
--- a/lang/rust/avro/src/encode.rs
+++ b/lang/rust/avro/src/encode.rs
@@ -125,12 +125,27 @@ pub(crate) fn encode_internal<S: Borrow<Schema>>(
let slice: [u8; 12] = duration.into();
buffer.extend_from_slice(&slice);
}
- Value::Uuid(uuid) => encode_bytes(
- // we need the call .to_string() to properly convert ASCII to UTF-8
- #[allow(clippy::unnecessary_to_owned)]
- &uuid.to_string(),
- buffer,
- ),
+ Value::Uuid(uuid) => match *schema {
+ Schema::String => encode_bytes(
+ // we need the call .to_string() to properly convert ASCII to UTF-8
+ #[allow(clippy::unnecessary_to_owned)]
+ &uuid.to_string(),
+ buffer,
+ ),
+ Schema::Fixed(FixedSchema { size, .. }) => {
+ let bytes = uuid.as_bytes();
+ if bytes.len() != size {
+ return Err(Error::EncodeUuidAsFixedError(bytes.len(), size));
+ }
+ encode_bytes(bytes, buffer)
+ }
+ _ => {
+ return Err(Error::EncodeValueAsSchemaError {
+ value_kind: ValueKind::Uuid,
+ supported_schema: vec![SchemaKind::Uuid, SchemaKind::Fixed],
+ });
+ }
+ },
Value::BigDecimal(bg) => {
let buf: Vec<u8> = serialize_big_decimal(bg);
buffer.extend_from_slice(buf.as_slice());
diff --git a/lang/rust/avro/src/error.rs b/lang/rust/avro/src/error.rs
index 36c8d94a9..bf7a9574c 100644
--- a/lang/rust/avro/src/error.rs
+++ b/lang/rust/avro/src/error.rs
@@ -89,6 +89,15 @@ pub enum Error {
#[error("Failed to convert &str to UUID")]
ConvertStrToUuid(#[source] uuid::Error),
+ #[error("Failed to convert Fixed bytes to UUID. It must be exactly 16 bytes, got {0}")]
+ ConvertFixedToUuid(usize),
+
+ #[error("Failed to convert Fixed bytes to UUID")]
+ ConvertSliceToUuid(#[source] uuid::Error),
+
+ #[error("Failed to encode Fixed bytes to UUID: {0} -> {1}")]
+ EncodeUuidAsFixedError(usize, usize),
+
#[error("Map key is not a string; key type is {0:?}")]
MapKeyType(ValueKind),
diff --git a/lang/rust/avro/src/schema.rs b/lang/rust/avro/src/schema.rs
index f4c063df6..15cb86734 100644
--- a/lang/rust/avro/src/schema.rs
+++ b/lang/rust/avro/src/schema.rs
@@ -1404,8 +1404,24 @@ impl Parser {
return try_convert_to_logical_type(
"uuid",
parse_as_native_complex(complex, self, enclosing_namespace)?,
- &[SchemaKind::String],
- |_| -> AvroResult<Schema> { Ok(Schema::Uuid) },
+ &[SchemaKind::String, SchemaKind::Fixed],
+ |schema| match schema {
+ Schema::String => Ok(Schema::Uuid),
+ Schema::Fixed(FixedSchema { size, .. }) if size == 16 => {
+ Ok(Schema::Uuid)
+ }
+ Schema::Fixed(FixedSchema { size, .. }) => {
+ warn!("Ignoring uuid logical type for a Fixed schema because its size ({size:?}) is not 16! Schema: {:?}", schema);
+ Ok(schema)
+ }
+ _ => {
+ warn!(
+ "Ignoring invalid uuid logical type for schema: {:?}",
+ schema
+ );
+ Ok(schema)
+ }
+ },
);
}
"date" => {
@@ -2367,6 +2383,7 @@ pub mod derive {
#[cfg(test)]
mod tests {
use super::*;
+ use apache_avro_test_helper::logger::{assert_logged, assert_not_logged};
use apache_avro_test_helper::TestResult;
use pretty_assertions::assert_eq;
use serde_json::json;
@@ -6299,7 +6316,7 @@ mod tests {
}
#[test]
- fn test_avro_3896_uuid_schema() -> TestResult {
+ fn avro_3896_uuid_schema_for_string() -> TestResult {
// string uuid, represents as native logical type.
let schema = json!(
{
@@ -6310,8 +6327,11 @@ mod tests {
let parse_result = Schema::parse(&schema)?;
assert_eq!(parse_result, Schema::Uuid);
- // uuid logical type is not supported for SchemaKind::Fixed, so it is parsed as Schema::Fixed
- // and the `logicalType` is preserved as an attribute.
+ Ok(())
+ }
+
+ #[test]
+ fn avro_3926_uuid_schema_for_fixed_with_size_16() -> TestResult {
let schema = json!(
{
"type": "fixed",
@@ -6320,19 +6340,37 @@ mod tests {
"logicalType": "uuid"
});
let parse_result = Schema::parse(&schema)?;
+ assert_eq!(parse_result, Schema::Uuid);
+ assert_not_logged(
+ r#"Ignoring uuid logical type for a Fixed schema because its size (6) is not 16! Schema: Fixed(FixedSchema { name: Name { name: "FixedUUID", namespace: None }, aliases: None, doc: None, size: 6, attributes: {"logicalType": String("uuid")} })"#,
+ );
+
+ Ok(())
+ }
+
+ #[test]
+ fn avro_3926_uuid_schema_for_fixed_with_size_different_than_16() -> TestResult {
+ let schema = json!(
+ {
+ "type": "fixed",
+ "name": "FixedUUID",
+ "size": 6,
+ "logicalType": "uuid"
+ });
+ let parse_result = Schema::parse(&schema)?;
assert_eq!(
parse_result,
Schema::Fixed(FixedSchema {
name: Name::new("FixedUUID")?,
- doc: None,
aliases: None,
- size: 16,
- attributes: BTreeMap::from([(
- "logicalType".to_string(),
- Value::String(String::from("uuid")),
- )]),
+ doc: None,
+ size: 6,
+ attributes: BTreeMap::from([("logicalType".to_string(), "uuid".into())]),
})
);
+ assert_logged(
+ r#"Ignoring uuid logical type for a Fixed schema because its size (6) is not 16! Schema: Fixed(FixedSchema { name: Name { name: "FixedUUID", namespace: None }, aliases: None, doc: None, size: 6, attributes: {"logicalType": String("uuid")} })"#,
+ );
Ok(())
}