You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by wj...@apache.org on 2023/04/28 16:50:51 UTC
[arrow] branch main updated: GH-35297: [C++][IPC] Fix schema deserialization of map field (#35298)
This is an automated email from the ASF dual-hosted git repository.
wjones127 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 87280b9447 GH-35297: [C++][IPC] Fix schema deserialization of map field (#35298)
87280b9447 is described below
commit 87280b9447f5ec278137a496c16cd61dbbb7ca4c
Author: Gang Wu <us...@gmail.com>
AuthorDate: Sat Apr 29 00:50:39 2023 +0800
GH-35297: [C++][IPC] Fix schema deserialization of map field (#35298)
### Rationale for this change
The arrow schema deserialization from flatbuffer does not preserve nullable and metadata of sub-fields in the map type.
### What changes are included in this PR?
Fix map type deserialization by creating map type using field objects instead of type objects for sub-types.
### Are these changes tested?
Add several new test cases in the arrow/ipc/read_write_test.cc.
### Are there any user-facing changes?
No.
* Closes: #35297
Authored-by: Gang Wu <us...@gmail.com>
Signed-off-by: Will Jones <wi...@gmail.com>
---
cpp/src/arrow/ipc/metadata_internal.cc | 4 ++--
cpp/src/arrow/ipc/read_write_test.cc | 34 +++++++++++++++++++++++++++++++++-
2 files changed, 35 insertions(+), 3 deletions(-)
diff --git a/cpp/src/arrow/ipc/metadata_internal.cc b/cpp/src/arrow/ipc/metadata_internal.cc
index a9f90a0724..1394516ecd 100644
--- a/cpp/src/arrow/ipc/metadata_internal.cc
+++ b/cpp/src/arrow/ipc/metadata_internal.cc
@@ -367,8 +367,8 @@ Status ConcreteTypeFromFlatbuffer(flatbuf::Type type, const void* type_data,
return Status::Invalid("Map's keys must be non-nullable");
} else {
auto map = static_cast<const flatbuf::Map*>(type_data);
- *out = std::make_shared<MapType>(children[0]->type()->field(0)->type(),
- children[0]->type()->field(1)->type(),
+ *out = std::make_shared<MapType>(children[0]->type()->field(0)->WithName("key"),
+ children[0]->type()->field(1)->WithName("value"),
map->keysSorted());
}
return Status::OK();
diff --git a/cpp/src/arrow/ipc/read_write_test.cc b/cpp/src/arrow/ipc/read_write_test.cc
index fe821fbf12..7de81eff7a 100644
--- a/cpp/src/arrow/ipc/read_write_test.cc
+++ b/cpp/src/arrow/ipc/read_write_test.cc
@@ -236,7 +236,7 @@ class TestSchemaMetadata : public ::testing::Test {
io::BufferReader reader(buffer);
DictionaryMemo in_memo;
ASSERT_OK_AND_ASSIGN(auto actual_schema, ReadSchema(&reader, &in_memo));
- AssertSchemaEqual(schema, *actual_schema);
+ AssertSchemaEqual(schema, *actual_schema, /* check_metadata= */ true);
}
};
@@ -259,6 +259,15 @@ TEST_F(TestSchemaMetadata, PrimitiveFields) {
CheckSchemaRoundtrip(schema);
}
+TEST_F(TestSchemaMetadata, PrimitiveFieldsWithKeyValueMetadata) {
+ auto f1 = field("f1", std::make_shared<Int64Type>(), false,
+ key_value_metadata({"k1"}, {"v1"}));
+ auto f2 = field("f2", std::make_shared<StringType>(), true,
+ key_value_metadata({"k2"}, {"v2"}));
+ Schema schema({f1, f2});
+ CheckSchemaRoundtrip(schema);
+}
+
TEST_F(TestSchemaMetadata, NestedFields) {
auto type = list(int32());
auto f0 = field("f0", type);
@@ -271,6 +280,29 @@ TEST_F(TestSchemaMetadata, NestedFields) {
CheckSchemaRoundtrip(schema);
}
+// Verify that nullable=false is well-preserved for child fields of map type.
+TEST_F(TestSchemaMetadata, MapField) {
+ auto key = field("key", int32(), false);
+ auto item = field("value", int32(), false);
+ auto f0 = field("f0", std::make_shared<MapType>(key, item));
+ Schema schema({f0});
+ CheckSchemaRoundtrip(schema);
+}
+
+// Verify that key value metadata is well-preserved for child fields of nested type.
+TEST_F(TestSchemaMetadata, NestedFieldsWithKeyValueMetadata) {
+ auto metadata = key_value_metadata({"foo"}, {"bar"});
+ auto inner_field = field("inner", int32(), false, metadata);
+ auto f0 = field("f0", list(inner_field), false, metadata);
+ auto f1 = field("f1", struct_({inner_field}), false, metadata);
+ auto f2 = field("f2",
+ std::make_shared<MapType>(field("key", int32(), false, metadata),
+ field("value", int32(), false, metadata)),
+ false, metadata);
+ Schema schema({f0, f1, f2});
+ CheckSchemaRoundtrip(schema);
+}
+
TEST_F(TestSchemaMetadata, DictionaryFields) {
{
auto dict_type = dictionary(int8(), int32(), true /* ordered */);