You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2023/04/25 14:29:44 UTC

[arrow-rs] branch master updated: add support empty array (#4114)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new f3b4a73de  add support empty array (#4114)
f3b4a73de is described below

commit f3b4a73de2e732445513257edf9d1395f4d9e624
Author: zhenxing jiang <ji...@gmail.com>
AuthorDate: Tue Apr 25 22:29:38 2023 +0800

     add support empty array (#4114)
    
    Co-authored-by: Raphael Taylor-Davies <17...@users.noreply.github.com>
---
 arrow-json/src/reader/mod.rs        | 61 ++++++++++++++++++++++++++++++++++++-
 arrow-json/src/reader/null_array.rs | 35 +++++++++++++++++++++
 2 files changed, 95 insertions(+), 1 deletion(-)

diff --git a/arrow-json/src/reader/mod.rs b/arrow-json/src/reader/mod.rs
index 4abcb1ea7..9541e0372 100644
--- a/arrow-json/src/reader/mod.rs
+++ b/arrow-json/src/reader/mod.rs
@@ -146,6 +146,7 @@ use crate::reader::boolean_array::BooleanArrayDecoder;
 use crate::reader::decimal_array::DecimalArrayDecoder;
 use crate::reader::list_array::ListArrayDecoder;
 use crate::reader::map_array::MapArrayDecoder;
+use crate::reader::null_array::NullArrayDecoder;
 use crate::reader::primitive_array::PrimitiveArrayDecoder;
 use crate::reader::string_array::StringArrayDecoder;
 use crate::reader::struct_array::StructArrayDecoder;
@@ -156,6 +157,7 @@ mod boolean_array;
 mod decimal_array;
 mod list_array;
 mod map_array;
+mod null_array;
 mod primitive_array;
 mod schema;
 mod serializer;
@@ -580,6 +582,7 @@ fn make_decoder(
 ) -> Result<Box<dyn ArrayDecoder>, ArrowError> {
     downcast_integer! {
         data_type => (primitive_decoder, data_type),
+        DataType::Null => Ok(Box::<NullArrayDecoder>::default()),
         DataType::Float32 => primitive_decoder!(Float32Type, data_type),
         DataType::Float64 => primitive_decoder!(Float64Type, data_type),
         DataType::Timestamp(TimeUnit::Second, None) => {
@@ -647,7 +650,7 @@ mod tests {
     use arrow_buffer::{ArrowNativeType, Buffer};
     use arrow_cast::display::{ArrayFormatter, FormatOptions};
     use arrow_data::ArrayDataBuilder;
-    use arrow_schema::{DataType, Field, Schema};
+    use arrow_schema::{DataType, Field, FieldRef, Schema};
 
     use crate::reader::infer_json_schema;
     use crate::ReaderBuilder;
@@ -1602,6 +1605,62 @@ mod tests {
         assert!(!cc.is_valid(5));
     }
 
+    #[test]
+    fn test_empty_json_arrays() {
+        let json_content = r#"
+            {"items": []}
+            {"items": null}
+            {}
+            "#;
+
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "items",
+            DataType::List(FieldRef::new(Field::new("item", DataType::Null, true))),
+            true,
+        )]));
+
+        let batches = do_read(json_content, 1024, false, schema);
+        assert_eq!(batches.len(), 1);
+
+        let col1 = batches[0].column(0).as_list::<i32>();
+        assert_eq!(col1.null_count(), 2);
+        assert!(col1.value(0).is_empty());
+        assert_eq!(col1.value(0).data_type(), &DataType::Null);
+        assert!(col1.is_null(1));
+        assert!(col1.is_null(2));
+    }
+
+    #[test]
+    fn test_nested_empty_json_arrays() {
+        let json_content = r#"
+            {"items": [[],[]]}
+            {"items": [[null, null],[null]]}
+            "#;
+
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "items",
+            DataType::List(FieldRef::new(Field::new(
+                "item",
+                DataType::List(FieldRef::new(Field::new("item", DataType::Null, true))),
+                true,
+            ))),
+            true,
+        )]));
+
+        let batches = do_read(json_content, 1024, false, schema);
+        assert_eq!(batches.len(), 1);
+
+        let col1 = batches[0].column(0).as_list::<i32>();
+        assert_eq!(col1.null_count(), 0);
+        assert_eq!(col1.value(0).len(), 2);
+        assert!(col1.value(0).as_list::<i32>().value(0).is_empty());
+        assert!(col1.value(0).as_list::<i32>().value(1).is_empty());
+
+        assert_eq!(col1.value(1).len(), 2);
+        assert_eq!(col1.value(1).as_list::<i32>().value(0).len(), 2);
+        assert_eq!(col1.value(1).as_list::<i32>().value(1).len(), 1);
+    }
+
     #[test]
     fn test_nested_list_json_arrays() {
         let c_field =
diff --git a/arrow-json/src/reader/null_array.rs b/arrow-json/src/reader/null_array.rs
new file mode 100644
index 000000000..4270045fb
--- /dev/null
+++ b/arrow-json/src/reader/null_array.rs
@@ -0,0 +1,35 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::reader::tape::{Tape, TapeElement};
+use crate::reader::ArrayDecoder;
+use arrow_data::{ArrayData, ArrayDataBuilder};
+use arrow_schema::{ArrowError, DataType};
+
+#[derive(Default)]
+pub struct NullArrayDecoder {}
+
+impl ArrayDecoder for NullArrayDecoder {
+    fn decode(&mut self, tape: &Tape<'_>, pos: &[u32]) -> Result<ArrayData, ArrowError> {
+        for p in pos {
+            if !matches!(tape.get(*p), TapeElement::Null) {
+                return Err(tape.error(*p, "null"));
+            }
+        }
+        ArrayDataBuilder::new(DataType::Null).len(pos.len()).build()
+    }
+}