You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by al...@apache.org on 2021/08/21 10:33:16 UTC

[arrow-rs] branch master updated: Support binary data type in `build_struct_array`. (#702)

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 8308615  Support binary data type in `build_struct_array`. (#702)
8308615 is described below

commit 8308615d40e14caa5cdbee118ecc2f46696b920f
Author: Yuan Zhou <my...@gmail.com>
AuthorDate: Sat Aug 21 18:33:11 2021 +0800

    Support binary data type in `build_struct_array`. (#702)
    
    * Support binary data type in `build_struct_array`.
    
    * Modify test case.
    
    * cargo fmt
    
    Co-authored-by: Andrew Lamb <an...@nerdnetworks.org>
---
 arrow/src/array/array.rs |  2 +-
 arrow/src/json/reader.rs | 40 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/arrow/src/array/array.rs b/arrow/src/array/array.rs
index 4702179..5504c4a 100644
--- a/arrow/src/array/array.rs
+++ b/arrow/src/array/array.rs
@@ -668,7 +668,7 @@ mod tests {
                 "entry",
                 DataType::Struct(vec![
                     Field::new("key", DataType::Utf8, false),
-                    Field::new("key", DataType::Int32, true),
+                    Field::new("value", DataType::Int32, true),
                 ]),
                 false,
             )),
diff --git a/arrow/src/json/reader.rs b/arrow/src/json/reader.rs
index 4912c5e..9592b59 100644
--- a/arrow/src/json/reader.rs
+++ b/arrow/src/json/reader.rs
@@ -1225,6 +1225,14 @@ impl Decoder {
                             })
                             .collect::<StringArray>(),
                     ) as ArrayRef),
+                    DataType::Binary => Ok(Arc::new(
+                        rows.iter()
+                            .map(|row| {
+                                let maybe_value = row.get(field.name());
+                                maybe_value.and_then(|value| value.as_str())
+                            })
+                            .collect::<BinaryArray>(),
+                    ) as ArrayRef),
                     DataType::List(ref list_field) => {
                         match list_field.data_type() {
                             DataType::Dictionary(ref key_ty, _) => {
@@ -3141,6 +3149,38 @@ mod tests {
     }
 
     #[test]
+    fn test_json_read_binary_structs() {
+        let schema = Schema::new(vec![Field::new("c1", DataType::Binary, true)]);
+        let decoder = Decoder::new(Arc::new(schema), 1024, None);
+        let batch = decoder
+            .next_batch(
+                &mut vec![
+                    Ok(serde_json::json!({
+                        "c1": "₁₂₃",
+                    })),
+                    Ok(serde_json::json!({
+                        "c1": "foo",
+                    })),
+                ]
+                .into_iter(),
+            )
+            .unwrap()
+            .unwrap();
+        let data = batch.columns().iter().collect::<Vec<_>>();
+
+        let schema = Schema::new(vec![Field::new("c1", DataType::Binary, true)]);
+        let binary_values = BinaryArray::from(vec!["₁₂₃".as_bytes(), "foo".as_bytes()]);
+        let expected_batch =
+            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(binary_values)])
+                .unwrap();
+        let expected_data = expected_batch.columns().iter().collect::<Vec<_>>();
+
+        assert_eq!(data, expected_data);
+        assert_eq!(batch.num_columns(), 1);
+        assert_eq!(batch.num_rows(), 2);
+    }
+
+    #[test]
     fn test_json_iterator() {
         let builder = ReaderBuilder::new().infer_schema(None).with_batch_size(5);
         let reader: Reader<File> = builder