You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ks...@apache.org on 2020/08/11 15:57:51 UTC
[arrow] 22/22: ARROW-9598: [C++][Parquet] Fix writing nullable
structs
This is an automated email from the ASF dual-hosted git repository.
kszucs pushed a commit to branch maint-1.0.x
in repository https://gitbox.apache.org/repos/asf/arrow.git
commit b24b7e9f9664e9b1799a26cf6fd11f406747ab4f
Author: Micah Kornfield <em...@gmail.com>
AuthorDate: Mon Aug 10 15:33:10 2020 -0500
ARROW-9598: [C++][Parquet] Fix writing nullable structs
Traverse the node hierarchy to ensure we capture the right value count.
Closes #7862 from emkornfield/verify_parquetfg
Authored-by: Micah Kornfield <em...@gmail.com>
Signed-off-by: Wes McKinney <we...@apache.org>
---
cpp/src/parquet/arrow/arrow_reader_writer_test.cc | 17 +++++++++++++++++
cpp/src/parquet/column_writer.cc | 9 ++++++---
2 files changed, 23 insertions(+), 3 deletions(-)
diff --git a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
index 661ce7b..476d82f 100644
--- a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
+++ b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
@@ -2344,6 +2344,23 @@ TEST(ArrowReadWrite, SimpleStructRoundTrip) {
2);
}
+TEST(ArrowReadWrite, SingleColumnNullableStruct) {
+ auto links =
+ field("Links",
+ ::arrow::struct_({field("Backward", ::arrow::int64(), /*nullable=*/true)}));
+
+ auto links_id_array = ::arrow::ArrayFromJSON(links->type(),
+ "[null, "
+ "{\"Backward\": 10}"
+ "]");
+
+ CheckSimpleRoundtrip(
+ ::arrow::Table::Make(std::make_shared<::arrow::Schema>(
+ std::vector<std::shared_ptr<::arrow::Field>>{links}),
+ {links_id_array}),
+ 3);
+}
+
// Disabled until implementation can be finished.
TEST(TestArrowReadWrite, DISABLED_CanonicalNestedRoundTrip) {
auto doc_id = field("DocId", ::arrow::int64(), /*nullable=*/false);
diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc
index f9cf37c..6cb0bae 100644
--- a/cpp/src/parquet/column_writer.cc
+++ b/cpp/src/parquet/column_writer.cc
@@ -1138,8 +1138,12 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter<
if (descr_->max_definition_level() > 0) {
// Minimal definition level for which spaced values are written
int16_t min_spaced_def_level = descr_->max_definition_level();
- if (descr_->schema_node()->is_optional()) {
- min_spaced_def_level--;
+ const ::parquet::schema::Node* node = descr_->schema_node().get();
+ while (node != nullptr && !node->is_repeated()) {
+ if (node->is_optional()) {
+ min_spaced_def_level--;
+ }
+ node = node->parent();
}
for (int64_t i = 0; i < num_levels; ++i) {
if (def_levels[i] == descr_->max_definition_level()) {
@@ -1149,7 +1153,6 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter<
++spaced_values_to_write;
}
}
-
WriteDefinitionLevels(num_levels, def_levels);
} else {
// Required field, write all values