You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by mg...@apache.org on 2022/10/06 08:48:36 UTC
[avro] branch avro-3631/fix-fixed-serialization updated: AVRO-3631: Add serde serialize_with functions
This is an automated email from the ASF dual-hosted git repository.
mgrigorov pushed a commit to branch avro-3631/fix-fixed-serialization
in repository https://gitbox.apache.org/repos/asf/avro.git
The following commit(s) were added to refs/heads/avro-3631/fix-fixed-serialization by this push:
new 7c9e938eb AVRO-3631: Add serde serialize_with functions
7c9e938eb is described below
commit 7c9e938eb67f1812fb3be1e341987003ef2f359f
Author: Martin Tzvetanov Grigorov <mg...@apache.org>
AuthorDate: Thu Oct 6 11:45:50 2022 +0300
AVRO-3631: Add serde serialize_with functions
Those should be used for hinting the serialization process how to serialize a byte array to Value::(Bytes|Fixed)
Signed-off-by: Martin Tzvetanov Grigorov <mg...@apache.org>
---
lang/rust/Cargo.lock | 1 +
lang/rust/avro/Cargo.toml | 2 +
lang/rust/avro/src/lib.rs | 2 +-
lang/rust/avro/src/ser.rs | 135 +++++++++++++++++++++++++++++++++++++++-----
lang/rust/avro/src/types.rs | 31 ++++++++--
lang/rust/avro/tests/ser.rs | 37 ++++++++++++
6 files changed, 186 insertions(+), 22 deletions(-)
diff --git a/lang/rust/Cargo.lock b/lang/rust/Cargo.lock
index 721cf9514..ab229bf9c 100644
--- a/lang/rust/Cargo.lock
+++ b/lang/rust/Cargo.lock
@@ -50,6 +50,7 @@ dependencies = [
"pretty_assertions",
"quad-rand",
"rand",
+ "ref_thread_local",
"regex",
"serde",
"serde_bytes",
diff --git a/lang/rust/avro/Cargo.toml b/lang/rust/avro/Cargo.toml
index de9528b17..6a29bcd50 100644
--- a/lang/rust/avro/Cargo.toml
+++ b/lang/rust/avro/Cargo.toml
@@ -75,6 +75,8 @@ uuid = { default-features = false, version = "1.1.2", features = ["serde", "std"
xz2 = { default-features = false, version = "0.1.7", optional = true }
zerocopy = { default-features = false, version = "0.6.1" }
zstd = { default-features = false, version = "0.11.2+zstd.1.5.2", optional = true }
+ref_thread_local = { default-features = false, version = "0.1.1" }
+
[target.'cfg(target_arch = "wasm32")'.dependencies]
quad-rand = { default-features = false, version = "0.2.1" }
diff --git a/lang/rust/avro/src/lib.rs b/lang/rust/avro/src/lib.rs
index a6b06d74a..9a4110abb 100644
--- a/lang/rust/avro/src/lib.rs
+++ b/lang/rust/avro/src/lib.rs
@@ -746,7 +746,7 @@ pub use reader::{
from_avro_datum, read_marker, GenericSingleObjectReader, Reader, SpecificSingleObjectReader,
};
pub use schema::{AvroSchema, Schema};
-pub use ser::to_value;
+pub use ser::{avro_serialize_bytes, avro_serialize_fixed, to_value};
pub use util::max_allocation_bytes;
pub use writer::{to_avro_datum, GenericSingleObjectWriter, SpecificSingleObjectWriter, Writer};
diff --git a/lang/rust/avro/src/ser.rs b/lang/rust/avro/src/ser.rs
index af286640d..15c7afb3b 100644
--- a/lang/rust/avro/src/ser.rs
+++ b/lang/rust/avro/src/ser.rs
@@ -17,9 +17,25 @@
//! Logic for serde-compatible serialization.
use crate::{types::Value, Error};
+use ref_thread_local::ref_thread_local;
+use ref_thread_local::RefThreadLocal;
use serde::{ser, Serialize};
use std::{collections::HashMap, iter::once};
+ref_thread_local! {
+ /// A thread local that is used to decide how to serialize
+ /// a byte array into Avro `types::Value`.
+ ///
+ /// Depends on the fact that serde's serialization process is single-threaded!
+ static managed BYTES_TYPE: BytesType = BytesType::Bytes;
+}
+
+/// A hint helping in the serialization of a byte arrays (&[u8], [u8; N])
+enum BytesType {
+ Bytes,
+ Fixed,
+}
+
#[derive(Clone, Default)]
pub struct Serializer {}
@@ -174,7 +190,10 @@ impl<'b> ser::Serializer for &'b mut Serializer {
}
fn serialize_bytes(self, v: &[u8]) -> Result<Self::Ok, Self::Error> {
- Ok(Value::Fixed(v.len(), v.to_owned()))
+ match *BYTES_TYPE.borrow() {
+ BytesType::Bytes => Ok(Value::Bytes(v.to_owned())),
+ BytesType::Fixed => Ok(Value::Fixed(v.len(), v.to_owned())),
+ }
}
fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
@@ -473,7 +492,7 @@ impl<'a> ser::SerializeStructVariant for StructVariantSerializer<'a> {
}
}
-/// Interpret a serializeable instance as a `Value`.
+/// Interpret a serializable instance as a `Value`.
///
/// This conversion can fail if the value is not valid as per the Avro specification.
/// e.g: HashMap with non-string keys
@@ -482,6 +501,42 @@ pub fn to_value<S: Serialize>(value: S) -> Result<Value, Error> {
value.serialize(&mut serializer)
}
+/// A function that could be used by #[serde(serialize_with = ...)] to give a
+/// hint to Avro's `Serializer` how to serialize a byte array like `[u8; N]` to
+/// `Value::Fixed`
+#[allow(dead_code)]
+pub fn avro_serialize_fixed<S>(value: &[u8], serializer: S) -> Result<S::Ok, S::Error>
+where
+ S: ser::Serializer,
+{
+ serialize_bytes_type(value, serializer, BytesType::Fixed)
+}
+
+/// A function that could be used by #[serde(serialize_with = ...)] to give a
+/// hint to Avro's `Serializer` how to serialize a byte array like `&[u8]` to
+/// `Value::Bytes`
+#[allow(dead_code)]
+pub fn avro_serialize_bytes<S>(value: &[u8], serializer: S) -> Result<S::Ok, S::Error>
+where
+ S: ser::Serializer,
+{
+ serialize_bytes_type(value, serializer, BytesType::Bytes)
+}
+
+fn serialize_bytes_type<S>(
+ value: &[u8],
+ serializer: S,
+ bytes_type: BytesType,
+) -> Result<S::Ok, S::Error>
+where
+ S: ser::Serializer,
+{
+ *BYTES_TYPE.borrow_mut() = bytes_type;
+ let res = serializer.serialize_bytes(value);
+ *BYTES_TYPE.borrow_mut() = BytesType::Bytes;
+ res
+}
+
#[cfg(test)]
mod tests {
use super::*;
@@ -1002,20 +1057,70 @@ mod tests {
#[test]
fn avro_3631_test_to_value_fixed_field() {
#[derive(Debug, Serialize, Deserialize)]
- struct TestStructFixedField {
- #[serde(with = "serde_bytes")]
- field: [u8; 6],
+ struct TestStructFixedField<'a> {
+ // will be serialized as Value::Array<Vec<Value::Int>>
+ array_field: &'a [u8],
+
+ // will be serialized as Value::Fixed
+ #[serde(serialize_with = "avro_serialize_fixed")]
+ fixed_field: [u8; 6],
+ #[serde(serialize_with = "avro_serialize_fixed")]
+ fixed_field2: &'a [u8],
+
+ // will be serialized as Value::Bytes
+ #[serde(serialize_with = "avro_serialize_bytes")]
+ bytes_field: &'a [u8],
+ #[serde(serialize_with = "avro_serialize_bytes")]
+ bytes_field2: [u8; 6],
+
+ // will be serialized as Value::Array<Vec<Value::Int>>
+ vec_field: Vec<u8>,
}
- let test = TestStructFixedField { field: [1; 6] };
- let expected = Value::Record(vec![(
- "field".to_owned(),
- Value::Fixed(6, Vec::from(test.field.clone())),
- )]);
- assert_eq!(
- expected,
- to_value(test).unwrap(),
- "error serializing fixed array"
- );
+ let test = TestStructFixedField {
+ array_field: &[1, 11, 111],
+ bytes_field: &[2, 22, 222],
+ bytes_field2: [2; 6],
+ fixed_field: [1; 6],
+ fixed_field2: &[6, 66],
+ vec_field: vec![3, 33],
+ };
+ let expected = Value::Record(vec![
+ (
+ "array_field".to_owned(),
+ Value::Array(
+ test.array_field
+ .iter()
+ .map(|i| Value::Int(*i as i32))
+ .collect(),
+ ),
+ ),
+ (
+ "fixed_field".to_owned(),
+ Value::Fixed(6, Vec::from(test.fixed_field.clone())),
+ ),
+ (
+ "fixed_field2".to_owned(),
+ Value::Fixed(2, Vec::from(test.fixed_field2.clone())),
+ ),
+ (
+ "bytes_field".to_owned(),
+ Value::Bytes(Vec::from(test.bytes_field.clone())),
+ ),
+ (
+ "bytes_field2".to_owned(),
+ Value::Bytes(Vec::from(test.bytes_field2.clone())),
+ ),
+ (
+ "vec_field".to_owned(),
+ Value::Array(
+ test.vec_field
+ .iter()
+ .map(|i| Value::Int(*i as i32))
+ .collect(),
+ ),
+ ),
+ ]);
+ assert_eq!(expected, to_value(test).unwrap());
}
}
diff --git a/lang/rust/avro/src/types.rs b/lang/rust/avro/src/types.rs
index 2b5dc56cd..a946b6c6d 100644
--- a/lang/rust/avro/src/types.rs
+++ b/lang/rust/avro/src/types.rs
@@ -535,7 +535,10 @@ impl Value {
}
})
}
- (_v, _s) => Some("Unsupported value-schema combination".to_string()),
+ (v, s) => Some(format!(
+ "Unsupported value-schema combination: \nValue: {:?},\nSchema: {:?}",
+ v, s
+ )),
}
}
@@ -2397,12 +2400,20 @@ Field with name '"b"' is not a member of the map items"#,
#[test]
fn avro_3631_test_serialize_fixed_fields() {
#[derive(Debug, Serialize, Deserialize)]
- struct TestStructFixedField {
+ struct TestStructFixedField<'a> {
+ bytes_field: &'a [u8],
+ vec_field: Vec<u8>,
#[serde(with = "serde_bytes")]
- field: [u8; 6],
+ fixed_field: [u8; 6],
+ // #[serde(with = "serde_bytes")]
+ // #[serde(with = "serde_bytes")]
}
- let test = TestStructFixedField { field: [1; 6] };
+ let test = TestStructFixedField {
+ bytes_field: &[1, 2, 3],
+ fixed_field: [1; 6],
+ vec_field: vec![2, 3, 4],
+ };
let value: Value = to_value(test).unwrap();
let schema = Schema::parse_str(
r#"
@@ -2411,9 +2422,17 @@ Field with name '"b"' is not a member of the map items"#,
"name": "TestStructFixedField",
"fields": [
{
- "name": "field",
+ "name": "bytes_field",
+ "type": "bytes"
+ },
+ {
+ "name": "vec_field",
+ "type": "bytes"
+ },
+ {
+ "name": "fixed_field",
"type": {
- "name": "field",
+ "name": "fixed_field",
"type": "fixed",
"size": 6
}
diff --git a/lang/rust/avro/tests/ser.rs b/lang/rust/avro/tests/ser.rs
new file mode 100644
index 000000000..30717e480
--- /dev/null
+++ b/lang/rust/avro/tests/ser.rs
@@ -0,0 +1,37 @@
+use apache_avro::to_value;
+use apache_avro::types::Value;
+use serde::{Deserialize, Serialize};
+
+#[test]
+fn avro_3631_visibility_of_avro_serialize_bytes_type() {
+ use apache_avro::{avro_serialize_bytes, avro_serialize_fixed};
+
+ #[derive(Debug, Serialize, Deserialize)]
+ struct TestStructFixedField<'a> {
+ // will be serialized as Value::Bytes
+ #[serde(serialize_with = "avro_serialize_bytes")]
+ bytes_field: &'a [u8],
+
+ // will be serialized as Value::Fixed
+ #[serde(serialize_with = "avro_serialize_fixed")]
+ fixed_field: [u8; 6],
+ }
+
+ let test = TestStructFixedField {
+ bytes_field: &[2, 22, 222],
+ fixed_field: [1; 6],
+ };
+
+ let expected = Value::Record(vec![
+ (
+ "bytes_field".to_owned(),
+ Value::Bytes(Vec::from(test.bytes_field.clone())),
+ ),
+ (
+ "fixed_field".to_owned(),
+ Value::Fixed(6, Vec::from(test.fixed_field.clone())),
+ ),
+ ]);
+
+ assert_eq!(expected, to_value(test).unwrap());
+}