You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2021/08/21 10:33:16 UTC
[arrow-rs] branch master updated: Support binary data type in
`build_struct_array`. (#702)
This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 8308615 Support binary data type in `build_struct_array`. (#702)
8308615 is described below
commit 8308615d40e14caa5cdbee118ecc2f46696b920f
Author: Yuan Zhou <my...@gmail.com>
AuthorDate: Sat Aug 21 18:33:11 2021 +0800
Support binary data type in `build_struct_array`. (#702)
* Support binary data type in `build_struct_array`.
* Modify test case.
* cargo fmt
Co-authored-by: Andrew Lamb <an...@nerdnetworks.org>
---
arrow/src/array/array.rs | 2 +-
arrow/src/json/reader.rs | 40 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 41 insertions(+), 1 deletion(-)
diff --git a/arrow/src/array/array.rs b/arrow/src/array/array.rs
index 4702179..5504c4a 100644
--- a/arrow/src/array/array.rs
+++ b/arrow/src/array/array.rs
@@ -668,7 +668,7 @@ mod tests {
"entry",
DataType::Struct(vec![
Field::new("key", DataType::Utf8, false),
- Field::new("key", DataType::Int32, true),
+ Field::new("value", DataType::Int32, true),
]),
false,
)),
diff --git a/arrow/src/json/reader.rs b/arrow/src/json/reader.rs
index 4912c5e..9592b59 100644
--- a/arrow/src/json/reader.rs
+++ b/arrow/src/json/reader.rs
@@ -1225,6 +1225,14 @@ impl Decoder {
})
.collect::<StringArray>(),
) as ArrayRef),
+ DataType::Binary => Ok(Arc::new(
+ rows.iter()
+ .map(|row| {
+ let maybe_value = row.get(field.name());
+ maybe_value.and_then(|value| value.as_str())
+ })
+ .collect::<BinaryArray>(),
+ ) as ArrayRef),
DataType::List(ref list_field) => {
match list_field.data_type() {
DataType::Dictionary(ref key_ty, _) => {
@@ -3141,6 +3149,38 @@ mod tests {
}
#[test]
+ fn test_json_read_binary_structs() {
+ let schema = Schema::new(vec![Field::new("c1", DataType::Binary, true)]);
+ let decoder = Decoder::new(Arc::new(schema), 1024, None);
+ let batch = decoder
+ .next_batch(
+ &mut vec![
+ Ok(serde_json::json!({
+ "c1": "₁₂₃",
+ })),
+ Ok(serde_json::json!({
+ "c1": "foo",
+ })),
+ ]
+ .into_iter(),
+ )
+ .unwrap()
+ .unwrap();
+ let data = batch.columns().iter().collect::<Vec<_>>();
+
+ let schema = Schema::new(vec![Field::new("c1", DataType::Binary, true)]);
+ let binary_values = BinaryArray::from(vec!["₁₂₃".as_bytes(), "foo".as_bytes()]);
+ let expected_batch =
+ RecordBatch::try_new(Arc::new(schema), vec![Arc::new(binary_values)])
+ .unwrap();
+ let expected_data = expected_batch.columns().iter().collect::<Vec<_>>();
+
+ assert_eq!(data, expected_data);
+ assert_eq!(batch.num_columns(), 1);
+ assert_eq!(batch.num_rows(), 2);
+ }
+
+ #[test]
fn test_json_iterator() {
let builder = ReaderBuilder::new().infer_schema(None).with_batch_size(5);
let reader: Reader<File> = builder