You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ks...@apache.org on 2020/08/11 15:57:51 UTC

[arrow] 22/22: ARROW-9598: [C++][Parquet] Fix writing nullable structs

This is an automated email from the ASF dual-hosted git repository.

kszucs pushed a commit to branch maint-1.0.x
in repository https://gitbox.apache.org/repos/asf/arrow.git

commit b24b7e9f9664e9b1799a26cf6fd11f406747ab4f
Author: Micah Kornfield <em...@gmail.com>
AuthorDate: Mon Aug 10 15:33:10 2020 -0500

    ARROW-9598: [C++][Parquet] Fix writing nullable structs
    
    Traverse the node hierarchy to ensure we capture the right value count.
    
    Closes #7862 from emkornfield/verify_parquetfg
    
    Authored-by: Micah Kornfield <em...@gmail.com>
    Signed-off-by: Wes McKinney <we...@apache.org>
---
 cpp/src/parquet/arrow/arrow_reader_writer_test.cc | 17 +++++++++++++++++
 cpp/src/parquet/column_writer.cc                  |  9 ++++++---
 2 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
index 661ce7b..476d82f 100644
--- a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
+++ b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
@@ -2344,6 +2344,23 @@ TEST(ArrowReadWrite, SimpleStructRoundTrip) {
       2);
 }
 
+TEST(ArrowReadWrite, SingleColumnNullableStruct) {
+  auto links =
+      field("Links",
+            ::arrow::struct_({field("Backward", ::arrow::int64(), /*nullable=*/true)}));
+
+  auto links_id_array = ::arrow::ArrayFromJSON(links->type(),
+                                               "[null, "
+                                               "{\"Backward\": 10}"
+                                               "]");
+
+  CheckSimpleRoundtrip(
+      ::arrow::Table::Make(std::make_shared<::arrow::Schema>(
+                               std::vector<std::shared_ptr<::arrow::Field>>{links}),
+                           {links_id_array}),
+      3);
+}
+
 // Disabled until implementation can be finished.
 TEST(TestArrowReadWrite, DISABLED_CanonicalNestedRoundTrip) {
   auto doc_id = field("DocId", ::arrow::int64(), /*nullable=*/false);
diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc
index f9cf37c..6cb0bae 100644
--- a/cpp/src/parquet/column_writer.cc
+++ b/cpp/src/parquet/column_writer.cc
@@ -1138,8 +1138,12 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter<
     if (descr_->max_definition_level() > 0) {
       // Minimal definition level for which spaced values are written
       int16_t min_spaced_def_level = descr_->max_definition_level();
-      if (descr_->schema_node()->is_optional()) {
-        min_spaced_def_level--;
+      const ::parquet::schema::Node* node = descr_->schema_node().get();
+      while (node != nullptr && !node->is_repeated()) {
+        if (node->is_optional()) {
+          min_spaced_def_level--;
+        }
+        node = node->parent();
       }
       for (int64_t i = 0; i < num_levels; ++i) {
         if (def_levels[i] == descr_->max_definition_level()) {
@@ -1149,7 +1153,6 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter<
           ++spaced_values_to_write;
         }
       }
-
       WriteDefinitionLevels(num_levels, def_levels);
     } else {
       // Required field, write all values