You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/11/21 22:48:14 UTC

[arrow-rs] branch master updated: Don't Skip Serializing Empty Metadata (#3082) (#3126)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 528a07283 Don't Skip Serializing Empty Metadata (#3082) (#3126)
528a07283 is described below

commit 528a0728359946436454678625630ad55e6499fb
Author: askoa <11...@users.noreply.github.com>
AuthorDate: Mon Nov 21 17:48:08 2022 -0500

    Don't Skip Serializing Empty Metadata (#3082) (#3126)
    
    * remove skip_serializing_if to please postcard
    
    * add serde postcard roundtrip tests
    
    * fix formatting issues
    
    * use bincode for serialization tests
    
    Co-authored-by: askoa <as...@local>
---
 arrow-schema/Cargo.toml      |  1 +
 arrow-schema/src/datatype.rs |  8 ++++----
 arrow-schema/src/field.rs    | 37 +++++++++++++++++++++++++++++++++----
 arrow-schema/src/schema.rs   |  4 ----
 4 files changed, 38 insertions(+), 12 deletions(-)

diff --git a/arrow-schema/Cargo.toml b/arrow-schema/Cargo.toml
index 3b809f23e..d88632d10 100644
--- a/arrow-schema/Cargo.toml
+++ b/arrow-schema/Cargo.toml
@@ -45,3 +45,4 @@ default = []
 
 [dev-dependencies]
 serde_json = "1.0"
+bincode = { version = "1.3.3", default-features = false }
diff --git a/arrow-schema/src/datatype.rs b/arrow-schema/src/datatype.rs
index acf369145..572d6f67d 100644
--- a/arrow-schema/src/datatype.rs
+++ b/arrow-schema/src/datatype.rs
@@ -415,11 +415,11 @@ mod tests {
         assert_eq!(
             "{\"Struct\":[\
              {\"name\":\"first_name\",\"data_type\":\"Utf8\",\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false,\"metadata\":{\"k\":\"v\"}},\
-             {\"name\":\"last_name\",\"data_type\":\"Utf8\",\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false},\
+             {\"name\":\"last_name\",\"data_type\":\"Utf8\",\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false,\"metadata\":{}},\
              {\"name\":\"address\",\"data_type\":{\"Struct\":\
-             [{\"name\":\"street\",\"data_type\":\"Utf8\",\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false},\
-             {\"name\":\"zip\",\"data_type\":\"UInt16\",\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false}\
-             ]},\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false}]}",
+             [{\"name\":\"street\",\"data_type\":\"Utf8\",\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false,\"metadata\":{}},\
+             {\"name\":\"zip\",\"data_type\":\"UInt16\",\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false,\"metadata\":{}}\
+             ]},\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false,\"metadata\":{}}]}",
             serialized
         );
 
diff --git a/arrow-schema/src/field.rs b/arrow-schema/src/field.rs
index cd9024747..9eed03ed2 100644
--- a/arrow-schema/src/field.rs
+++ b/arrow-schema/src/field.rs
@@ -35,10 +35,6 @@ pub struct Field {
     dict_id: i64,
     dict_is_ordered: bool,
     /// A map of key-value pairs containing additional custom meta data.
-    #[cfg_attr(
-        feature = "serde",
-        serde(skip_serializing_if = "HashMap::is_empty", default)
-    )]
     metadata: HashMap<String, String>,
 }
 
@@ -654,4 +650,37 @@ mod test {
         assert!(!field1.contains(&field2));
         assert!(!field2.contains(&field1));
     }
+
+    #[cfg(feature = "serde")]
+    fn assert_binary_serde_round_trip(field: Field) {
+        let serialized = bincode::serialize(&field).unwrap();
+        let deserialized: Field = bincode::deserialize(&serialized).unwrap();
+        assert_eq!(field, deserialized)
+    }
+
+    #[cfg(feature = "serde")]
+    #[test]
+    fn test_field_without_metadata_serde() {
+        let field = Field::new("name", DataType::Boolean, true);
+        assert_binary_serde_round_trip(field)
+    }
+
+    #[cfg(feature = "serde")]
+    #[test]
+    fn test_field_with_empty_metadata_serde() {
+        let field =
+            Field::new("name", DataType::Boolean, false).with_metadata(HashMap::new());
+
+        assert_binary_serde_round_trip(field)
+    }
+
+    #[cfg(feature = "serde")]
+    #[test]
+    fn test_field_with_nonempty_metadata_serde() {
+        let mut metadata = HashMap::new();
+        metadata.insert("hi".to_owned(), "".to_owned());
+        let field = Field::new("name", DataType::Boolean, false).with_metadata(metadata);
+
+        assert_binary_serde_round_trip(field)
+    }
 }
diff --git a/arrow-schema/src/schema.rs b/arrow-schema/src/schema.rs
index 8ff40866d..e45cedfb6 100644
--- a/arrow-schema/src/schema.rs
+++ b/arrow-schema/src/schema.rs
@@ -34,10 +34,6 @@ pub type SchemaRef = std::sync::Arc<Schema>;
 pub struct Schema {
     pub fields: Vec<Field>,
     /// A map of key-value pairs containing additional meta data.
-    #[cfg_attr(
-        feature = "serde",
-        serde(skip_serializing_if = "HashMap::is_empty", default)
-    )]
     pub metadata: HashMap<String, String>,
 }