You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by wj...@apache.org on 2023/04/28 16:50:51 UTC

[arrow] branch main updated: GH-35297: [C++][IPC] Fix schema deserialization of map field (#35298)

This is an automated email from the ASF dual-hosted git repository.

wjones127 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 87280b9447 GH-35297: [C++][IPC] Fix schema deserialization of map field (#35298)
87280b9447 is described below

commit 87280b9447f5ec278137a496c16cd61dbbb7ca4c
Author: Gang Wu <us...@gmail.com>
AuthorDate: Sat Apr 29 00:50:39 2023 +0800

    GH-35297: [C++][IPC] Fix schema deserialization of map field (#35298)
    
    ### Rationale for this change
    
    The arrow schema deserialization from flatbuffer does not preserve nullable and metadata of sub-fields in the map type.
    
    ### What changes are included in this PR?
    
    Fix map type deserialization by creating map type using field objects instead of type objects for sub-types.
    
    ### Are these changes tested?
    
    Add several new test cases in the arrow/ipc/read_write_test.cc.
    
    ### Are there any user-facing changes?
    
    No.
    * Closes: #35297
    
    Authored-by: Gang Wu <us...@gmail.com>
    Signed-off-by: Will Jones <wi...@gmail.com>
---
 cpp/src/arrow/ipc/metadata_internal.cc |  4 ++--
 cpp/src/arrow/ipc/read_write_test.cc   | 34 +++++++++++++++++++++++++++++++++-
 2 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/cpp/src/arrow/ipc/metadata_internal.cc b/cpp/src/arrow/ipc/metadata_internal.cc
index a9f90a0724..1394516ecd 100644
--- a/cpp/src/arrow/ipc/metadata_internal.cc
+++ b/cpp/src/arrow/ipc/metadata_internal.cc
@@ -367,8 +367,8 @@ Status ConcreteTypeFromFlatbuffer(flatbuf::Type type, const void* type_data,
         return Status::Invalid("Map's keys must be non-nullable");
       } else {
         auto map = static_cast<const flatbuf::Map*>(type_data);
-        *out = std::make_shared<MapType>(children[0]->type()->field(0)->type(),
-                                         children[0]->type()->field(1)->type(),
+        *out = std::make_shared<MapType>(children[0]->type()->field(0)->WithName("key"),
+                                         children[0]->type()->field(1)->WithName("value"),
                                          map->keysSorted());
       }
       return Status::OK();
diff --git a/cpp/src/arrow/ipc/read_write_test.cc b/cpp/src/arrow/ipc/read_write_test.cc
index fe821fbf12..7de81eff7a 100644
--- a/cpp/src/arrow/ipc/read_write_test.cc
+++ b/cpp/src/arrow/ipc/read_write_test.cc
@@ -236,7 +236,7 @@ class TestSchemaMetadata : public ::testing::Test {
     io::BufferReader reader(buffer);
     DictionaryMemo in_memo;
     ASSERT_OK_AND_ASSIGN(auto actual_schema, ReadSchema(&reader, &in_memo));
-    AssertSchemaEqual(schema, *actual_schema);
+    AssertSchemaEqual(schema, *actual_schema, /* check_metadata= */ true);
   }
 };
 
@@ -259,6 +259,15 @@ TEST_F(TestSchemaMetadata, PrimitiveFields) {
   CheckSchemaRoundtrip(schema);
 }
 
+TEST_F(TestSchemaMetadata, PrimitiveFieldsWithKeyValueMetadata) {
+  auto f1 = field("f1", std::make_shared<Int64Type>(), false,
+                  key_value_metadata({"k1"}, {"v1"}));
+  auto f2 = field("f2", std::make_shared<StringType>(), true,
+                  key_value_metadata({"k2"}, {"v2"}));
+  Schema schema({f1, f2});
+  CheckSchemaRoundtrip(schema);
+}
+
 TEST_F(TestSchemaMetadata, NestedFields) {
   auto type = list(int32());
   auto f0 = field("f0", type);
@@ -271,6 +280,29 @@ TEST_F(TestSchemaMetadata, NestedFields) {
   CheckSchemaRoundtrip(schema);
 }
 
+// Verify that nullable=false is well-preserved for child fields of map type.
+TEST_F(TestSchemaMetadata, MapField) {
+  auto key = field("key", int32(), false);
+  auto item = field("value", int32(), false);
+  auto f0 = field("f0", std::make_shared<MapType>(key, item));
+  Schema schema({f0});
+  CheckSchemaRoundtrip(schema);
+}
+
+// Verify that key value metadata is well-preserved for child fields of nested type.
+TEST_F(TestSchemaMetadata, NestedFieldsWithKeyValueMetadata) {
+  auto metadata = key_value_metadata({"foo"}, {"bar"});
+  auto inner_field = field("inner", int32(), false, metadata);
+  auto f0 = field("f0", list(inner_field), false, metadata);
+  auto f1 = field("f1", struct_({inner_field}), false, metadata);
+  auto f2 = field("f2",
+                  std::make_shared<MapType>(field("key", int32(), false, metadata),
+                                            field("value", int32(), false, metadata)),
+                  false, metadata);
+  Schema schema({f0, f1, f2});
+  CheckSchemaRoundtrip(schema);
+}
+
 TEST_F(TestSchemaMetadata, DictionaryFields) {
   {
     auto dict_type = dictionary(int8(), int32(), true /* ordered */);