You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2022/04/07 21:07:51 UTC
[arrow-rs] branch master updated: Add `new_from_strings` to create `MapArrays` (#1507)
This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 252a983b1 Add `new_from_strings` to create `MapArrays` (#1507)
252a983b1 is described below
commit 252a983b1aa250452ac47818e3a57b6e8eaaa601
Author: Liang-Chi Hsieh <vi...@gmail.com>
AuthorDate: Thu Apr 7 14:07:45 2022 -0700
Add `new_from_strings` to create `MapArrays` (#1507)
* Add new_from_strings
* Fix clippy
* Update arrow/src/array/array_map.rs
Co-authored-by: Andrew Lamb <an...@nerdnetworks.org>
* Fix typo too
* For review
Co-authored-by: Andrew Lamb <an...@nerdnetworks.org>
---
arrow/src/array/array_map.rs | 93 +++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 92 insertions(+), 1 deletion(-)
diff --git a/arrow/src/array/array_map.rs b/arrow/src/array/array_map.rs
index 704c11149..045d647ad 100644
--- a/arrow/src/array/array_map.rs
+++ b/arrow/src/array/array_map.rs
@@ -15,15 +15,18 @@
// specific language governing permissions and limitations
// under the License.
+use crate::array::{StringArray, StructArray};
+use crate::buffer::Buffer;
use std::any::Any;
use std::fmt;
use std::mem;
+use std::sync::Arc;
use super::make_array;
use super::{
array::print_long_array, raw_pointer::RawPtrBox, Array, ArrayData, ArrayRef,
};
-use crate::datatypes::{ArrowNativeType, DataType};
+use crate::datatypes::{ArrowNativeType, DataType, Field, ToByteSlice};
use crate::error::ArrowError;
/// A nested array type where each record is a key-value map.
@@ -152,6 +155,44 @@ impl MapArray {
value_offsets,
})
}
+
+ /// Creates map array from provided keys, values and entry_offsets.
+ pub fn new_from_strings<'a>(
+ keys: impl Iterator<Item = &'a str>,
+ values: &dyn Array,
+ entry_offsets: &[u32],
+ ) -> Result<Self, ArrowError> {
+ let entry_offsets_buffer = Buffer::from(entry_offsets.to_byte_slice());
+ let keys_data = StringArray::from_iter_values(keys);
+
+ let keys_field = Field::new("keys", DataType::Utf8, false);
+ let values_field = Field::new(
+ "values",
+ values.data_type().clone(),
+ values.null_count() > 0,
+ );
+
+ let entry_struct = StructArray::from(vec![
+ (keys_field, Arc::new(keys_data) as ArrayRef),
+ (values_field, make_array(values.data().clone())),
+ ]);
+
+ let map_data_type = DataType::Map(
+ Box::new(Field::new(
+ "entries",
+ entry_struct.data_type().clone(),
+ true,
+ )),
+ false,
+ );
+ let map_data = ArrayData::builder(map_data_type)
+ .len(entry_offsets.len() - 1)
+ .add_buffer(entry_offsets_buffer)
+ .add_child_data(entry_struct.data().clone())
+ .build()?;
+
+ Ok(MapArray::from(map_data))
+ }
}
impl Array for MapArray {
@@ -428,4 +469,54 @@ mod tests {
map_array.value(map_array.len());
}
+
+ #[test]
+ fn test_new_from_strings() {
+ let keys = vec!["a", "b", "c", "d", "e", "f", "g", "h"];
+ let values_data = UInt32Array::from(vec![0u32, 10, 20, 30, 40, 50, 60, 70]);
+
+ // Construct a buffer for value offsets, for the nested array:
+ // [[a, b, c], [d, e, f], [g, h]]
+ let entry_offsets = [0, 3, 6, 8];
+
+ let map_array = MapArray::new_from_strings(
+ keys.clone().into_iter(),
+ &values_data,
+ &entry_offsets,
+ )
+ .unwrap();
+
+ let values = map_array.values();
+ assert_eq!(
+ &values_data,
+ values.as_any().downcast_ref::<UInt32Array>().unwrap()
+ );
+ assert_eq!(DataType::UInt32, map_array.value_type());
+ assert_eq!(3, map_array.len());
+ assert_eq!(0, map_array.null_count());
+ assert_eq!(6, map_array.value_offsets()[2]);
+ assert_eq!(2, map_array.value_length(2));
+
+ let key_array = Arc::new(StringArray::from(vec!["a", "b", "c"])) as ArrayRef;
+ let value_array = Arc::new(UInt32Array::from(vec![0u32, 10, 20])) as ArrayRef;
+ let keys_field = Field::new("keys", DataType::Utf8, false);
+ let values_field = Field::new("values", DataType::UInt32, false);
+ let struct_array =
+ StructArray::from(vec![(keys_field, key_array), (values_field, value_array)]);
+ assert_eq!(
+ struct_array,
+ StructArray::from(map_array.value(0).data().clone())
+ );
+ assert_eq!(
+ &struct_array,
+ unsafe { map_array.value_unchecked(0) }
+ .as_any()
+ .downcast_ref::<StructArray>()
+ .unwrap()
+ );
+ for i in 0..3 {
+ assert!(map_array.is_valid(i));
+ assert!(!map_array.is_null(i));
+ }
+ }
}