You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2022/11/15 19:33:16 UTC
[arrow-rs] branch master updated: Remove Option from `Field::metadata` (#3091)
This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 8bb2917ee Remove Option from `Field::metadata` (#3091)
8bb2917ee is described below
commit 8bb2917ee7c22c71cd71368cbe4dec4335e7d8f5
Author: askoa <11...@users.noreply.github.com>
AuthorDate: Tue Nov 15 14:33:10 2022 -0500
Remove Option from `Field::metadata` (#3091)
* Remove Option from field metadata
* fix test issues
* fix clippy warnings
* fix test issues
* fix test issues
* use default for BTreeMap initialization
* empty commit
Co-authored-by: askoa <as...@local>
---
arrow-array/src/builder/struct_builder.rs | 2 +-
arrow-integration-test/src/field.rs | 10 +-
arrow-integration-test/src/lib.rs | 171 +++++++++++++++---------------
arrow-ipc/src/convert.rs | 30 +++---
arrow-schema/src/datatype.rs | 6 +-
arrow-schema/src/field.rs | 75 ++++++-------
arrow-schema/src/schema.rs | 62 +++++------
parquet/src/arrow/arrow_reader/mod.rs | 2 +-
parquet/src/arrow/schema/complex.rs | 10 +-
9 files changed, 176 insertions(+), 192 deletions(-)
diff --git a/arrow-array/src/builder/struct_builder.rs b/arrow-array/src/builder/struct_builder.rs
index 69c092c03..1cb04aa6f 100644
--- a/arrow-array/src/builder/struct_builder.rs
+++ b/arrow-array/src/builder/struct_builder.rs
@@ -396,7 +396,7 @@ mod tests {
#[test]
#[should_panic(
- expected = "Data type List(Field { name: \"item\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: None }) is not currently supported"
+ expected = "Data type List(Field { name: \"item\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) is not currently supported"
)]
fn test_struct_array_builder_from_schema_unsupported_type() {
let mut fields = vec![Field::new("f1", DataType::Int16, false)];
diff --git a/arrow-integration-test/src/field.rs b/arrow-integration-test/src/field.rs
index 9b1a8f5f9..5b5863557 100644
--- a/arrow-integration-test/src/field.rs
+++ b/arrow-integration-test/src/field.rs
@@ -53,7 +53,7 @@ pub fn field_from_json(json: &serde_json::Value) -> Result<Field> {
// Referenced example file: testing/data/arrow-ipc-stream/integration/1.0.0-littleendian/generated_custom_metadata.json.gz
let metadata = match map.get("metadata") {
Some(&Value::Array(ref values)) => {
- let mut res: BTreeMap<String, String> = BTreeMap::new();
+ let mut res: BTreeMap<String, String> = BTreeMap::default();
for value in values {
match value.as_object() {
Some(map) => {
@@ -87,12 +87,12 @@ pub fn field_from_json(json: &serde_json::Value) -> Result<Field> {
}
}
}
- Some(res)
+ res
}
// We also support map format, because Schema's metadata supports this.
// See https://github.com/apache/arrow/pull/5907
Some(&Value::Object(ref values)) => {
- let mut res: BTreeMap<String, String> = BTreeMap::new();
+ let mut res: BTreeMap<String, String> = BTreeMap::default();
for (k, v) in values {
if let Some(str_value) = v.as_str() {
res.insert(k.clone(), str_value.to_string().clone());
@@ -103,14 +103,14 @@ pub fn field_from_json(json: &serde_json::Value) -> Result<Field> {
)));
}
}
- Some(res)
+ res
}
Some(_) => {
return Err(ArrowError::ParseError(
"Field `metadata` is not json array".to_string(),
));
}
- _ => None,
+ _ => BTreeMap::default(),
};
// if data_type is a struct or list, get its children
diff --git a/arrow-integration-test/src/lib.rs b/arrow-integration-test/src/lib.rs
index d0db4b4b9..75b76af1e 100644
--- a/arrow-integration-test/src/lib.rs
+++ b/arrow-integration-test/src/lib.rs
@@ -83,10 +83,10 @@ pub struct ArrowJsonField {
impl From<&Field> for ArrowJsonField {
fn from(field: &Field) -> Self {
- let metadata_value = match field.metadata() {
- Some(kv_list) => {
+ let metadata_value = match field.metadata().is_empty() {
+ false => {
let mut array = Vec::new();
- for (k, v) in kv_list {
+ for (k, v) in field.metadata() {
let mut kv_map = SJMap::new();
kv_map.insert(k.clone(), Value::String(v.clone()));
array.push(Value::Object(kv_map));
@@ -1120,90 +1120,87 @@ mod tests {
let micros_tz = Some("UTC".to_string());
let nanos_tz = Some("Africa/Johannesburg".to_string());
- let schema =
- Schema::new(vec![
- Field::new("bools-with-metadata-map", DataType::Boolean, true)
- .with_metadata(Some(
- [("k".to_string(), "v".to_string())]
- .iter()
- .cloned()
- .collect(),
- )),
- Field::new("bools-with-metadata-vec", DataType::Boolean, true)
- .with_metadata(Some(
- [("k2".to_string(), "v2".to_string())]
- .iter()
- .cloned()
- .collect(),
- )),
- Field::new("bools", DataType::Boolean, true),
- Field::new("int8s", DataType::Int8, true),
- Field::new("int16s", DataType::Int16, true),
- Field::new("int32s", DataType::Int32, true),
- Field::new("int64s", DataType::Int64, true),
- Field::new("uint8s", DataType::UInt8, true),
- Field::new("uint16s", DataType::UInt16, true),
- Field::new("uint32s", DataType::UInt32, true),
- Field::new("uint64s", DataType::UInt64, true),
- Field::new("float32s", DataType::Float32, true),
- Field::new("float64s", DataType::Float64, true),
- Field::new("date_days", DataType::Date32, true),
- Field::new("date_millis", DataType::Date64, true),
- Field::new("time_secs", DataType::Time32(TimeUnit::Second), true),
- Field::new("time_millis", DataType::Time32(TimeUnit::Millisecond), true),
- Field::new("time_micros", DataType::Time64(TimeUnit::Microsecond), true),
- Field::new("time_nanos", DataType::Time64(TimeUnit::Nanosecond), true),
- Field::new("ts_secs", DataType::Timestamp(TimeUnit::Second, None), true),
- Field::new(
- "ts_millis",
- DataType::Timestamp(TimeUnit::Millisecond, None),
- true,
- ),
- Field::new(
- "ts_micros",
- DataType::Timestamp(TimeUnit::Microsecond, None),
- true,
- ),
- Field::new(
- "ts_nanos",
- DataType::Timestamp(TimeUnit::Nanosecond, None),
- true,
- ),
- Field::new(
- "ts_secs_tz",
- DataType::Timestamp(TimeUnit::Second, secs_tz.clone()),
- true,
- ),
- Field::new(
- "ts_millis_tz",
- DataType::Timestamp(TimeUnit::Millisecond, millis_tz.clone()),
- true,
- ),
- Field::new(
- "ts_micros_tz",
- DataType::Timestamp(TimeUnit::Microsecond, micros_tz.clone()),
- true,
- ),
- Field::new(
- "ts_nanos_tz",
- DataType::Timestamp(TimeUnit::Nanosecond, nanos_tz.clone()),
- true,
- ),
- Field::new("utf8s", DataType::Utf8, true),
- Field::new(
- "lists",
- DataType::List(Box::new(Field::new("item", DataType::Int32, true))),
- true,
- ),
- Field::new(
- "structs",
- DataType::Struct(vec![
- Field::new("int32s", DataType::Int32, true),
- Field::new("utf8s", DataType::Utf8, true),
- ]),
- true,
- ),
- ]);
+ let schema = Schema::new(vec![
+ Field::new("bools-with-metadata-map", DataType::Boolean, true).with_metadata(
+ [("k".to_string(), "v".to_string())]
+ .iter()
+ .cloned()
+ .collect(),
+ ),
+ Field::new("bools-with-metadata-vec", DataType::Boolean, true).with_metadata(
+ [("k2".to_string(), "v2".to_string())]
+ .iter()
+ .cloned()
+ .collect(),
+ ),
+ Field::new("bools", DataType::Boolean, true),
+ Field::new("int8s", DataType::Int8, true),
+ Field::new("int16s", DataType::Int16, true),
+ Field::new("int32s", DataType::Int32, true),
+ Field::new("int64s", DataType::Int64, true),
+ Field::new("uint8s", DataType::UInt8, true),
+ Field::new("uint16s", DataType::UInt16, true),
+ Field::new("uint32s", DataType::UInt32, true),
+ Field::new("uint64s", DataType::UInt64, true),
+ Field::new("float32s", DataType::Float32, true),
+ Field::new("float64s", DataType::Float64, true),
+ Field::new("date_days", DataType::Date32, true),
+ Field::new("date_millis", DataType::Date64, true),
+ Field::new("time_secs", DataType::Time32(TimeUnit::Second), true),
+ Field::new("time_millis", DataType::Time32(TimeUnit::Millisecond), true),
+ Field::new("time_micros", DataType::Time64(TimeUnit::Microsecond), true),
+ Field::new("time_nanos", DataType::Time64(TimeUnit::Nanosecond), true),
+ Field::new("ts_secs", DataType::Timestamp(TimeUnit::Second, None), true),
+ Field::new(
+ "ts_millis",
+ DataType::Timestamp(TimeUnit::Millisecond, None),
+ true,
+ ),
+ Field::new(
+ "ts_micros",
+ DataType::Timestamp(TimeUnit::Microsecond, None),
+ true,
+ ),
+ Field::new(
+ "ts_nanos",
+ DataType::Timestamp(TimeUnit::Nanosecond, None),
+ true,
+ ),
+ Field::new(
+ "ts_secs_tz",
+ DataType::Timestamp(TimeUnit::Second, secs_tz.clone()),
+ true,
+ ),
+ Field::new(
+ "ts_millis_tz",
+ DataType::Timestamp(TimeUnit::Millisecond, millis_tz.clone()),
+ true,
+ ),
+ Field::new(
+ "ts_micros_tz",
+ DataType::Timestamp(TimeUnit::Microsecond, micros_tz.clone()),
+ true,
+ ),
+ Field::new(
+ "ts_nanos_tz",
+ DataType::Timestamp(TimeUnit::Nanosecond, nanos_tz.clone()),
+ true,
+ ),
+ Field::new("utf8s", DataType::Utf8, true),
+ Field::new(
+ "lists",
+ DataType::List(Box::new(Field::new("item", DataType::Int32, true))),
+ true,
+ ),
+ Field::new(
+ "structs",
+ DataType::Struct(vec![
+ Field::new("int32s", DataType::Int32, true),
+ Field::new("utf8s", DataType::Utf8, true),
+ ]),
+ true,
+ ),
+ ]);
let bools_with_metadata_map =
BooleanArray::from(vec![Some(true), None, Some(false)]);
diff --git a/arrow-ipc/src/convert.rs b/arrow-ipc/src/convert.rs
index 8d01c58b6..a9dda6f2a 100644
--- a/arrow-ipc/src/convert.rs
+++ b/arrow-ipc/src/convert.rs
@@ -86,18 +86,16 @@ impl<'a> From<crate::Field<'a>> for Field {
)
};
- let mut metadata = None;
+ let mut metadata_map = BTreeMap::default();
if let Some(list) = field.custom_metadata() {
- let mut metadata_map = BTreeMap::default();
for kv in list {
if let (Some(k), Some(v)) = (kv.key(), kv.value()) {
metadata_map.insert(k.to_string(), v.to_string());
}
}
- metadata = Some(metadata_map);
}
- arrow_field.with_metadata(metadata)
+ arrow_field.with_metadata(metadata_map)
}
}
@@ -424,19 +422,17 @@ pub(crate) fn build_field<'a>(
) -> WIPOffset<crate::Field<'a>> {
// Optional custom metadata.
let mut fb_metadata = None;
- if let Some(metadata) = field.metadata() {
- if !metadata.is_empty() {
- let mut kv_vec = vec![];
- for (k, v) in metadata {
- let kv_args = crate::KeyValueArgs {
- key: Some(fbb.create_string(k.as_str())),
- value: Some(fbb.create_string(v.as_str())),
- };
- let kv_offset = crate::KeyValue::create(fbb, &kv_args);
- kv_vec.push(kv_offset);
- }
- fb_metadata = Some(fbb.create_vector(&kv_vec));
+ if !field.metadata().is_empty() {
+ let mut kv_vec = vec![];
+ for (k, v) in field.metadata() {
+ let kv_args = crate::KeyValueArgs {
+ key: Some(fbb.create_string(k.as_str())),
+ value: Some(fbb.create_string(v.as_str())),
+ };
+ let kv_offset = crate::KeyValue::create(fbb, &kv_args);
+ kv_vec.push(kv_offset);
}
+ fb_metadata = Some(fbb.create_vector(&kv_vec));
};
let fb_field_name = fbb.create_string(field.name().as_str());
@@ -822,7 +818,7 @@ mod tests {
.collect();
let schema = Schema::new_with_metadata(
vec![
- Field::new("uint8", DataType::UInt8, false).with_metadata(Some(field_md)),
+ Field::new("uint8", DataType::UInt8, false).with_metadata(field_md),
Field::new("uint16", DataType::UInt16, true),
Field::new("uint32", DataType::UInt32, false),
Field::new("uint64", DataType::UInt64, true),
diff --git a/arrow-schema/src/datatype.rs b/arrow-schema/src/datatype.rs
index 759fc3964..90ae42942 100644
--- a/arrow-schema/src/datatype.rs
+++ b/arrow-schema/src/datatype.rs
@@ -387,12 +387,12 @@ mod tests {
let field_metadata: BTreeMap<String, String> = kv_array.iter().cloned().collect();
// Non-empty map: should be converted as JSON obj { ... }
- let first_name = Field::new("first_name", DataType::Utf8, false)
- .with_metadata(Some(field_metadata));
+ let first_name =
+ Field::new("first_name", DataType::Utf8, false).with_metadata(field_metadata);
// Empty map: should be omitted.
let last_name = Field::new("last_name", DataType::Utf8, false)
- .with_metadata(Some(BTreeMap::default()));
+ .with_metadata(BTreeMap::default());
let person = DataType::Struct(vec![
first_name,
diff --git a/arrow-schema/src/field.rs b/arrow-schema/src/field.rs
index 4d13f523f..ee6ece862 100644
--- a/arrow-schema/src/field.rs
+++ b/arrow-schema/src/field.rs
@@ -35,8 +35,11 @@ pub struct Field {
dict_id: i64,
dict_is_ordered: bool,
/// A map of key-value pairs containing additional custom meta data.
- #[cfg_attr(feature = "serde", serde(skip_serializing_if = "Option::is_none"))]
- metadata: Option<BTreeMap<String, String>>,
+ #[cfg_attr(
+ feature = "serde",
+ serde(skip_serializing_if = "BTreeMap::is_empty", default)
+ )]
+ metadata: BTreeMap<String, String>,
}
// Auto-derive `PartialEq` traits will pull `dict_id` and `dict_is_ordered`
@@ -89,7 +92,7 @@ impl Field {
nullable,
dict_id: 0,
dict_is_ordered: false,
- metadata: None,
+ metadata: BTreeMap::default(),
}
}
@@ -107,33 +110,30 @@ impl Field {
nullable,
dict_id,
dict_is_ordered,
- metadata: None,
+ metadata: BTreeMap::default(),
}
}
/// Sets the `Field`'s optional custom metadata.
/// The metadata is set as `None` for empty map.
#[inline]
- pub fn set_metadata(&mut self, metadata: Option<BTreeMap<String, String>>) {
- // To make serde happy, convert Some(empty_map) to None.
- self.metadata = None;
- if let Some(v) = metadata {
- if !v.is_empty() {
- self.metadata = Some(v);
- }
+ pub fn set_metadata(&mut self, metadata: BTreeMap<String, String>) {
+ self.metadata = BTreeMap::default();
+ if !metadata.is_empty() {
+ self.metadata = metadata;
}
}
/// Sets the metadata of this `Field` to be `metadata` and returns self
- pub fn with_metadata(mut self, metadata: Option<BTreeMap<String, String>>) -> Self {
+ pub fn with_metadata(mut self, metadata: BTreeMap<String, String>) -> Self {
self.set_metadata(metadata);
self
}
/// Returns the immutable reference to the `Field`'s optional custom metadata.
#[inline]
- pub const fn metadata(&self) -> Option<&BTreeMap<String, String>> {
- self.metadata.as_ref()
+ pub const fn metadata(&self) -> &BTreeMap<String, String> {
+ &self.metadata
}
/// Returns an immutable reference to the `Field`'s name.
@@ -278,11 +278,11 @@ impl Field {
)));
}
// merge metadata
- match (self.metadata(), from.metadata()) {
- (Some(self_metadata), Some(from_metadata)) => {
- let mut merged = self_metadata.clone();
- for (key, from_value) in from_metadata {
- if let Some(self_value) = self_metadata.get(key) {
+ match (self.metadata().is_empty(), from.metadata().is_empty()) {
+ (false, false) => {
+ let mut merged = self.metadata().clone();
+ for (key, from_value) in from.metadata() {
+ if let Some(self_value) = self.metadata.get(key) {
if self_value != from_value {
return Err(ArrowError::SchemaError(format!(
"Fail to merge field due to conflicting metadata data value for key {}.
@@ -293,10 +293,10 @@ impl Field {
merged.insert(key.clone(), from_value.clone());
}
}
- self.set_metadata(Some(merged));
+ self.set_metadata(merged);
}
- (None, Some(from_metadata)) => {
- self.set_metadata(Some(from_metadata.clone()));
+ (true, false) => {
+ self.set_metadata(from.metadata().clone());
}
_ => {}
}
@@ -415,12 +415,12 @@ impl Field {
// self need to be nullable or both of them are not nullable
&& (self.nullable || !other.nullable)
// make sure self.metadata is a superset of other.metadata
- && match (&self.metadata, &other.metadata) {
- (_, None) => true,
- (None, Some(_)) => false,
- (Some(self_meta), Some(other_meta)) => {
- other_meta.iter().all(|(k, v)| {
- match self_meta.get(k) {
+ && match (&self.metadata.is_empty(), &other.metadata.is_empty()) {
+ (_, true) => true,
+ (true, false) => false,
+ (false, false) => {
+ other.metadata().iter().all(|(k, v)| {
+ match self.metadata().get(k) {
Some(s) => s == v,
None => false
}
@@ -538,10 +538,10 @@ mod test {
#[test]
fn test_contains_reflexivity() {
let mut field = Field::new("field1", DataType::Float16, false);
- field.set_metadata(Some(BTreeMap::from([
+ field.set_metadata(BTreeMap::from([
(String::from("k0"), String::from("v0")),
(String::from("k1"), String::from("v1")),
- ])));
+ ]));
assert!(field.contains(&field))
}
@@ -550,23 +550,14 @@ mod test {
let child_field = Field::new("child1", DataType::Float16, false);
let mut field1 = Field::new("field1", DataType::Struct(vec![child_field]), false);
- field1.set_metadata(Some(BTreeMap::from([(
- String::from("k1"),
- String::from("v1"),
- )])));
+ field1.set_metadata(BTreeMap::from([(String::from("k1"), String::from("v1"))]));
let mut field2 = Field::new("field1", DataType::Struct(vec![]), true);
- field2.set_metadata(Some(BTreeMap::from([(
- String::from("k2"),
- String::from("v2"),
- )])));
+ field2.set_metadata(BTreeMap::from([(String::from("k2"), String::from("v2"))]));
field2.try_merge(&field1).unwrap();
let mut field3 = Field::new("field1", DataType::Struct(vec![]), false);
- field3.set_metadata(Some(BTreeMap::from([(
- String::from("k3"),
- String::from("v3"),
- )])));
+ field3.set_metadata(BTreeMap::from([(String::from("k3"), String::from("v3"))]));
field3.try_merge(&field2).unwrap();
assert!(field2.contains(&field1));
diff --git a/arrow-schema/src/schema.rs b/arrow-schema/src/schema.rs
index 60fe3c6ca..519a8e089 100644
--- a/arrow-schema/src/schema.rs
+++ b/arrow-schema/src/schema.rs
@@ -419,12 +419,12 @@ mod tests {
assert_ne!(schema2, schema4);
assert_ne!(schema3, schema4);
- let f = Field::new("c1", DataType::Utf8, false).with_metadata(Some(
+ let f = Field::new("c1", DataType::Utf8, false).with_metadata(
[("foo".to_string(), "bar".to_string())]
.iter()
.cloned()
.collect(),
- ));
+ );
let schema5 = Schema::new(vec![
f,
Field::new("c2", DataType::Float64, true),
@@ -437,13 +437,13 @@ mod tests {
fn create_schema_string() {
let schema = person_schema();
assert_eq!(schema.to_string(),
- "Field { name: \"first_name\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: Some({\"k\": \"v\"}) }, \
- Field { name: \"last_name\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None }, \
+ "Field { name: \"first_name\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {\"k\": \"v\"} }, \
+ Field { name: \"last_name\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, \
Field { name: \"address\", data_type: Struct([\
- Field { name: \"street\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None }, \
- Field { name: \"zip\", data_type: UInt16, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None }\
- ]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None }, \
- Field { name: \"interests\", data_type: Dictionary(Int32, Utf8), nullable: true, dict_id: 123, dict_is_ordered: true, metadata: None }")
+ Field { name: \"street\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, \
+ Field { name: \"zip\", data_type: UInt16, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }\
+ ]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, \
+ Field { name: \"interests\", data_type: Dictionary(Int32, Utf8), nullable: true, dict_id: 123, dict_is_ordered: true, metadata: {} }")
}
#[test]
@@ -462,8 +462,8 @@ mod tests {
assert_eq!(first_name.dict_is_ordered(), None);
let metadata = first_name.metadata();
- assert!(metadata.is_some());
- let md = metadata.as_ref().unwrap();
+ assert!(!metadata.is_empty());
+ let md = &metadata;
assert_eq!(md.len(), 1);
let key = md.get("k");
assert!(key.is_some());
@@ -524,8 +524,8 @@ mod tests {
fn person_schema() -> Schema {
let kv_array = [("k".to_string(), "v".to_string())];
let field_metadata: BTreeMap<String, String> = kv_array.iter().cloned().collect();
- let first_name = Field::new("first_name", DataType::Utf8, false)
- .with_metadata(Some(field_metadata));
+ let first_name =
+ Field::new("first_name", DataType::Utf8, false).with_metadata(field_metadata);
Schema::new(vec![
first_name,
@@ -556,16 +556,14 @@ mod tests {
.iter()
.cloned()
.collect();
- let f1 = Field::new("first_name", DataType::Utf8, false)
- .with_metadata(Some(metadata1));
+ let f1 = Field::new("first_name", DataType::Utf8, false).with_metadata(metadata1);
let metadata2: BTreeMap<String, String> =
[("foo".to_string(), "baz".to_string())]
.iter()
.cloned()
.collect();
- let f2 = Field::new("first_name", DataType::Utf8, false)
- .with_metadata(Some(metadata2));
+ let f2 = Field::new("first_name", DataType::Utf8, false).with_metadata(metadata2);
assert!(
Schema::try_merge(vec![Schema::new(vec![f1]), Schema::new(vec![f2])])
@@ -579,34 +577,30 @@ mod tests {
.iter()
.cloned()
.collect();
- let f2 = Field::new("first_name", DataType::Utf8, false)
- .with_metadata(Some(metadata2));
+ let f2 = Field::new("first_name", DataType::Utf8, false).with_metadata(metadata2);
assert!(f1.try_merge(&f2).is_ok());
- assert!(f1.metadata().is_some());
- assert_eq!(
- f1.metadata().as_ref().unwrap(),
- f2.metadata().as_ref().unwrap()
- );
+ assert!(!f1.metadata().is_empty());
+ assert_eq!(f1.metadata(), f2.metadata());
// 3. Some + Some
- let mut f1 = Field::new("first_name", DataType::Utf8, false).with_metadata(Some(
+ let mut f1 = Field::new("first_name", DataType::Utf8, false).with_metadata(
[("foo".to_string(), "bar".to_string())]
.iter()
.cloned()
.collect(),
- ));
- let f2 = Field::new("first_name", DataType::Utf8, false).with_metadata(Some(
+ );
+ let f2 = Field::new("first_name", DataType::Utf8, false).with_metadata(
[("foo2".to_string(), "bar2".to_string())]
.iter()
.cloned()
.collect(),
- ));
+ );
assert!(f1.try_merge(&f2).is_ok());
- assert!(f1.metadata().is_some());
+ assert!(!f1.metadata().is_empty());
assert_eq!(
- f1.metadata().cloned().unwrap(),
+ f1.metadata().clone(),
[
("foo".to_string(), "bar".to_string()),
("foo2".to_string(), "bar2".to_string())
@@ -617,17 +611,17 @@ mod tests {
);
// 4. Some + None.
- let mut f1 = Field::new("first_name", DataType::Utf8, false).with_metadata(Some(
+ let mut f1 = Field::new("first_name", DataType::Utf8, false).with_metadata(
[("foo".to_string(), "bar".to_string())]
.iter()
.cloned()
.collect(),
- ));
+ );
let f2 = Field::new("first_name", DataType::Utf8, false);
assert!(f1.try_merge(&f2).is_ok());
- assert!(f1.metadata().is_some());
+ assert!(!f1.metadata().is_empty());
assert_eq!(
- f1.metadata().cloned().unwrap(),
+ f1.metadata().clone(),
[("foo".to_string(), "bar".to_string())]
.iter()
.cloned()
@@ -638,7 +632,7 @@ mod tests {
let mut f1 = Field::new("first_name", DataType::Utf8, false);
let f2 = Field::new("first_name", DataType::Utf8, false);
assert!(f1.try_merge(&f2).is_ok());
- assert!(f1.metadata().is_none());
+ assert!(f1.metadata().is_empty());
}
#[test]
diff --git a/parquet/src/arrow/arrow_reader/mod.rs b/parquet/src/arrow/arrow_reader/mod.rs
index 35b70a048..a720d439c 100644
--- a/parquet/src/arrow/arrow_reader/mod.rs
+++ b/parquet/src/arrow/arrow_reader/mod.rs
@@ -2021,7 +2021,7 @@ mod tests {
.collect();
let schema_with_metadata =
- Arc::new(Schema::new(vec![field.with_metadata(Some(metadata))]));
+ Arc::new(Schema::new(vec![field.with_metadata(metadata)]));
assert_ne!(schema_with_metadata, schema_without_metadata);
diff --git a/parquet/src/arrow/schema/complex.rs b/parquet/src/arrow/schema/complex.rs
index 2334a5601..4ff9c7a39 100644
--- a/parquet/src/arrow/schema/complex.rs
+++ b/parquet/src/arrow/schema/complex.rs
@@ -15,6 +15,8 @@
// specific language governing permissions and limitations
// under the License.
+use std::collections::BTreeMap;
+
use crate::arrow::schema::primitive::convert_primitive;
use crate::arrow::ProjectionMask;
use crate::basic::{ConvertedType, Repetition};
@@ -343,13 +345,17 @@ impl Visitor {
(Some(key), Some(value)) => {
let key_field = convert_field(map_key, &key, arrow_key);
let value_field = convert_field(map_value, &value, arrow_value);
+ let field_metadata = match arrow_map {
+ Some(field) => field.metadata().clone(),
+ _ => BTreeMap::default(),
+ };
let map_field = Field::new(
map_key_value.name(),
DataType::Struct(vec![key_field, value_field]),
false, // The inner map field is always non-nullable (#1697)
)
- .with_metadata(arrow_map.and_then(|f| f.metadata().cloned()));
+ .with_metadata(field_metadata);
Ok(Some(ParquetField {
rep_level,
@@ -539,7 +545,7 @@ fn convert_field(
_ => Field::new(name, data_type, nullable),
};
- field.with_metadata(hint.metadata().cloned())
+ field.with_metadata(hint.metadata().clone())
}
None => Field::new(name, data_type, nullable),
}