You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by tu...@apache.org on 2023/12/19 11:34:39 UTC

(arrow-rs) branch master updated: Fix inferring object after field was null. (#5216)

This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new c0b805597c Fix inferring object after field was null. (#5216)
c0b805597c is described below

commit c0b805597c0c4d29f240809063e3e474e42dbf38
Author: Kamil Skalski <ka...@gmail.com>
AuthorDate: Tue Dec 19 12:34:33 2023 +0100

    Fix inferring object after field was null. (#5216)
---
 arrow-json/src/reader/schema.rs | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/arrow-json/src/reader/schema.rs b/arrow-json/src/reader/schema.rs
index 97f1a0f295..ace7b0ea5c 100644
--- a/arrow-json/src/reader/schema.rs
+++ b/arrow-json/src/reader/schema.rs
@@ -455,7 +455,7 @@ fn collect_field_types_from_object(
                 set_object_scalar_field_type(field_types, k, DataType::Utf8)?;
             }
             Value::Object(inner_map) => {
-                if !field_types.contains_key(k) {
+                if let InferredType::Any = field_types.get(k).unwrap_or(&InferredType::Any) {
                     field_types.insert(k.to_string(), InferredType::Object(HashMap::new()));
                 }
                 match field_types.get_mut(k).unwrap() {
@@ -719,4 +719,24 @@ mod tests {
         ]);
         assert_eq!(inferred_schema, schema);
     }
+
+    #[test]
+    fn test_infer_from_null_then_object() {
+        let data = r#"
+            {"obj":null}
+            {"obj":{"foo":1}}
+        "#;
+        let (inferred_schema, _) =
+            infer_json_schema_from_seekable(Cursor::new(data), None).expect("infer");
+        let schema = Schema::new(vec![Field::new(
+            "obj",
+            DataType::Struct(
+                [Field::new("foo", DataType::Int64, true)]
+                    .into_iter()
+                    .collect(),
+            ),
+            true,
+        )]);
+        assert_eq!(inferred_schema, schema);
+    }
 }