You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2023/04/25 14:29:44 UTC
[arrow-rs] branch master updated: add support empty array (#4114)
This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new f3b4a73de add support empty array (#4114)
f3b4a73de is described below
commit f3b4a73de2e732445513257edf9d1395f4d9e624
Author: zhenxing jiang <ji...@gmail.com>
AuthorDate: Tue Apr 25 22:29:38 2023 +0800
add support empty array (#4114)
Co-authored-by: Raphael Taylor-Davies <17...@users.noreply.github.com>
---
arrow-json/src/reader/mod.rs | 61 ++++++++++++++++++++++++++++++++++++-
arrow-json/src/reader/null_array.rs | 35 +++++++++++++++++++++
2 files changed, 95 insertions(+), 1 deletion(-)
diff --git a/arrow-json/src/reader/mod.rs b/arrow-json/src/reader/mod.rs
index 4abcb1ea7..9541e0372 100644
--- a/arrow-json/src/reader/mod.rs
+++ b/arrow-json/src/reader/mod.rs
@@ -146,6 +146,7 @@ use crate::reader::boolean_array::BooleanArrayDecoder;
use crate::reader::decimal_array::DecimalArrayDecoder;
use crate::reader::list_array::ListArrayDecoder;
use crate::reader::map_array::MapArrayDecoder;
+use crate::reader::null_array::NullArrayDecoder;
use crate::reader::primitive_array::PrimitiveArrayDecoder;
use crate::reader::string_array::StringArrayDecoder;
use crate::reader::struct_array::StructArrayDecoder;
@@ -156,6 +157,7 @@ mod boolean_array;
mod decimal_array;
mod list_array;
mod map_array;
+mod null_array;
mod primitive_array;
mod schema;
mod serializer;
@@ -580,6 +582,7 @@ fn make_decoder(
) -> Result<Box<dyn ArrayDecoder>, ArrowError> {
downcast_integer! {
data_type => (primitive_decoder, data_type),
+ DataType::Null => Ok(Box::<NullArrayDecoder>::default()),
DataType::Float32 => primitive_decoder!(Float32Type, data_type),
DataType::Float64 => primitive_decoder!(Float64Type, data_type),
DataType::Timestamp(TimeUnit::Second, None) => {
@@ -647,7 +650,7 @@ mod tests {
use arrow_buffer::{ArrowNativeType, Buffer};
use arrow_cast::display::{ArrayFormatter, FormatOptions};
use arrow_data::ArrayDataBuilder;
- use arrow_schema::{DataType, Field, Schema};
+ use arrow_schema::{DataType, Field, FieldRef, Schema};
use crate::reader::infer_json_schema;
use crate::ReaderBuilder;
@@ -1602,6 +1605,62 @@ mod tests {
assert!(!cc.is_valid(5));
}
+ #[test]
+ fn test_empty_json_arrays() {
+ let json_content = r#"
+ {"items": []}
+ {"items": null}
+ {}
+ "#;
+
+ let schema = Arc::new(Schema::new(vec![Field::new(
+ "items",
+ DataType::List(FieldRef::new(Field::new("item", DataType::Null, true))),
+ true,
+ )]));
+
+ let batches = do_read(json_content, 1024, false, schema);
+ assert_eq!(batches.len(), 1);
+
+ let col1 = batches[0].column(0).as_list::<i32>();
+ assert_eq!(col1.null_count(), 2);
+ assert!(col1.value(0).is_empty());
+ assert_eq!(col1.value(0).data_type(), &DataType::Null);
+ assert!(col1.is_null(1));
+ assert!(col1.is_null(2));
+ }
+
+ #[test]
+ fn test_nested_empty_json_arrays() {
+ let json_content = r#"
+ {"items": [[],[]]}
+ {"items": [[null, null],[null]]}
+ "#;
+
+ let schema = Arc::new(Schema::new(vec![Field::new(
+ "items",
+ DataType::List(FieldRef::new(Field::new(
+ "item",
+ DataType::List(FieldRef::new(Field::new("item", DataType::Null, true))),
+ true,
+ ))),
+ true,
+ )]));
+
+ let batches = do_read(json_content, 1024, false, schema);
+ assert_eq!(batches.len(), 1);
+
+ let col1 = batches[0].column(0).as_list::<i32>();
+ assert_eq!(col1.null_count(), 0);
+ assert_eq!(col1.value(0).len(), 2);
+ assert!(col1.value(0).as_list::<i32>().value(0).is_empty());
+ assert!(col1.value(0).as_list::<i32>().value(1).is_empty());
+
+ assert_eq!(col1.value(1).len(), 2);
+ assert_eq!(col1.value(1).as_list::<i32>().value(0).len(), 2);
+ assert_eq!(col1.value(1).as_list::<i32>().value(1).len(), 1);
+ }
+
#[test]
fn test_nested_list_json_arrays() {
let c_field =
diff --git a/arrow-json/src/reader/null_array.rs b/arrow-json/src/reader/null_array.rs
new file mode 100644
index 000000000..4270045fb
--- /dev/null
+++ b/arrow-json/src/reader/null_array.rs
@@ -0,0 +1,35 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::reader::tape::{Tape, TapeElement};
+use crate::reader::ArrayDecoder;
+use arrow_data::{ArrayData, ArrayDataBuilder};
+use arrow_schema::{ArrowError, DataType};
+
+#[derive(Default)]
+pub struct NullArrayDecoder {}
+
+impl ArrayDecoder for NullArrayDecoder {
+ fn decode(&mut self, tape: &Tape<'_>, pos: &[u32]) -> Result<ArrayData, ArrowError> {
+ for p in pos {
+ if !matches!(tape.get(*p), TapeElement::Null) {
+ return Err(tape.error(*p, "null"));
+ }
+ }
+ ArrayDataBuilder::new(DataType::Null).len(pos.len()).build()
+ }
+}