You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2020/08/16 07:22:25 UTC

[GitHub] [arrow] emkornfield opened a new pull request #7973: ARROW-8493: [C++][Parquet] Start populating repeated ancestor defintion

emkornfield opened a new pull request #7973:
URL: https://github.com/apache/arrow/pull/7973


   level
   
   - Also add additional check that list annotated fields aren't repeated
   - Adds unit test for SchemaManifest::Make to verify rep/def/ancestor
   levels.


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] emkornfield commented on pull request #7973: ARROW-8493: [C++][Parquet] Start populating repeated ancestor defintion

Posted by GitBox <gi...@apache.org>.
emkornfield commented on pull request #7973:
URL: https://github.com/apache/arrow/pull/7973#issuecomment-675585382


   > Is it better to create an issue in PARQUET instead of ARROW?
   
   It is a fine line.  Generally, I use PARQUET for issues related to core parquet reading (i.e. not arrow specific) and ARROW when the functionality is used for the Parquet->Arrow path.


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] github-actions[bot] commented on pull request #7973: ARROW-8493: [C++][Parquet] Start populating repeated ancestor defintion

Posted by GitBox <gi...@apache.org>.
github-actions[bot] commented on pull request #7973:
URL: https://github.com/apache/arrow/pull/7973#issuecomment-674492185


   https://issues.apache.org/jira/browse/ARROW-8493


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] emkornfield commented on a change in pull request #7973: ARROW-8493: [C++][Parquet] Start populating repeated ancestor defintion

Posted by GitBox <gi...@apache.org>.
emkornfield commented on a change in pull request #7973:
URL: https://github.com/apache/arrow/pull/7973#discussion_r478100513



##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1144,244 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, &arrow_schema));
 }
 
+::arrow::Result<std::deque<LevelInfo>> RootToTreeLeafLevels(
+    const SchemaManifest& manifest, int column_number) {
+  std::deque<LevelInfo> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front(field->level_info);
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const ::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(LevelInfo{/*null_slot_usage=*/1,
+                                            /*def_level=*/0, /*rep_level=*/0,
+                                            /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1,
+                                            /*rep_level=*/0,
+                                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: list(bool not null) not null
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 0},  // List Field
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1}));  //  primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  // Arrow schema: struct(child: struct(inner: boolean not null))
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: struct(child: struct(inner: boolean ))
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: struct(child: struct(inner: boolean)) not null
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/0, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  // Arrow schema: list(struct(child: struct(list(bool not null) not null)) non null) not
+  // null
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1},  // optional child struct
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, /*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 1},  // repeated field
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, /*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 3}));  // innter field

Review comment:
       done

##########
File path: cpp/src/parquet/level_conversion.h
##########
@@ -20,10 +20,117 @@
 #include <cstdint>
 
 #include "parquet/platform.h"
+#include "parquet/schema.h"
 
 namespace parquet {
 namespace internal {
 
+struct PARQUET_EXPORT LevelInfo {
+  LevelInfo()
+      : null_slot_usage(1), def_level(0), rep_level(0), repeated_ancestor_def_level(0) {}
+  LevelInfo(int32_t null_slots, int32_t definition_level, int32_t repitition_level,
+            int32_t repeated_ancestor_definition_level)
+      : null_slot_usage(null_slots),
+        def_level(definition_level),
+        rep_level(repitition_level),
+        repeated_ancestor_def_level(repeated_ancestor_definition_level) {}
+
+  bool operator==(const LevelInfo& b) const {
+    return null_slot_usage == b.null_slot_usage && def_level == b.def_level &&
+           rep_level == b.rep_level &&
+           repeated_ancestor_def_level == b.repeated_ancestor_def_level;
+  }
+
+  // How many slots a null element consumes.
+  // This is only ever >1 for descendents of
+  // FixedSizeList.
+  int32_t null_slot_usage = 1;
+
+  // The definition level at which the value for the field
+  // is considered not null (definition levels greater than
+  // or equal to indicate this value indicate a not-null
+  // value for the field). For list fields definition levels
+  // greater then or equal to this field indicate a present
+  // , possibly null, element.
+  int16_t def_level = 0;
+
+  // The repetition level corresponding to this element
+  // or the closest repeated ancestor.  Any repetition
+  // level less than this indicates either a new list OR
+  // an empty list (which is determined in conjunction
+  // definition_level).
+  int16_t rep_level = 0;
+
+  // The definition level indicating the level at which the closest
+  // repeated ancestor was not empty.  This is used to discriminate

Review comment:
       I don't think I understand this question.  since this mentions definition level this is really a parquet construct.

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1144,244 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, &arrow_schema));
 }
 
+::arrow::Result<std::deque<LevelInfo>> RootToTreeLeafLevels(
+    const SchemaManifest& manifest, int column_number) {
+  std::deque<LevelInfo> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front(field->level_info);
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const ::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(LevelInfo{/*null_slot_usage=*/1,
+                                            /*def_level=*/0, /*rep_level=*/0,
+                                            /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1,
+                                            /*rep_level=*/0,
+                                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: list(bool not null) not null
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 0},  // List Field
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1}));  //  primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  // Arrow schema: struct(child: struct(inner: boolean not null))
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: struct(child: struct(inner: boolean ))
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: struct(child: struct(inner: boolean)) not null
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/0, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  // Arrow schema: list(struct(child: struct(list(bool not null) not null)) non null) not
+  // null
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1},  // optional child struct
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, /*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 1},  // repeated field
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, /*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 3}));  // innter field
+
+  // Arrow schema: list(struct(child_list: list(struct(f0: bool f1: bool no-required )))
+  // not null) not null
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {GroupNode::Make(
+              "list", Repetition::REPEATED,
+              {GroupNode::Make(
+                  "element", Repetition::OPTIONAL,
+                  {PrimitiveNode::Make("f0", Repetition::OPTIONAL, ParquetType::BOOLEAN),
+                   PrimitiveNode::Make("f1", Repetition::REQUIRED,
+                                       ParquetType::BOOLEAN)})})},
+          LogicalType::List())}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1},
+                  // Def_level=2 is handled together with def_level=3
+                  // When decoding.  Def_level=2 indicate present but empty
+                  // list.  def_level=3 indicates a present element in the
+                  // list.
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, /*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 1},  // list field
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/4, /*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 3},  // inner struct field
+
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/5, /*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 3}));  // f0 bool field

Review comment:
       I agree, nice catch, there was in fact a bug in the linkages.

##########
File path: cpp/src/parquet/level_conversion.h
##########
@@ -20,10 +20,117 @@
 #include <cstdint>
 
 #include "parquet/platform.h"
+#include "parquet/schema.h"
 
 namespace parquet {
 namespace internal {
 
+struct PARQUET_EXPORT LevelInfo {
+  LevelInfo()
+      : null_slot_usage(1), def_level(0), rep_level(0), repeated_ancestor_def_level(0) {}
+  LevelInfo(int32_t null_slots, int32_t definition_level, int32_t repitition_level,

Review comment:
       fixed.

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, &arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const ::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field
+                          Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1}));  //  primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/3, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          Levels{/*def_level=*/2, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},  // optional child struct
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // repeated field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // innter field
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {GroupNode::Make(
+              "list", Repetition::REPEATED,
+              {GroupNode::Make(
+                  "element", Repetition::OPTIONAL,
+                  {PrimitiveNode::Make("f0", Repetition::OPTIONAL, ParquetType::BOOLEAN),
+                   PrimitiveNode::Make("f1", Repetition::REQUIRED,
+                                       ParquetType::BOOLEAN)})})},
+          ConvertedType::LIST)}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner struct field
+
+                          Levels{/*def_level=*/5, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f0 bool field
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/1));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner struct field
+
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f1 bool field
+
+  // Legacy 2-level necoding
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("bool", Repetition::REPEATED, ParquetType::BOOLEAN)},
+          ConvertedType::LIST)}));
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // inner struct field
+}
+
+TEST_F(TestLevels, ListErrors) {
+  {
+    ::arrow::Status error = MaybeSetParquetSchema(GroupNode::Make(
+        "child_list", Repetition::REPEATED,
+        {PrimitiveNode::Make("bool", Repetition::REPEATED, ParquetType::BOOLEAN)},
+        ConvertedType::LIST));
+    EXPECT_TRUE(error.IsInvalid());
+    std::string expected("LIST-annotated groups must not be repeated.");
+    EXPECT_EQ(error.message().substr(0, expected.size()), expected);
+  }

Review comment:
       Yes it should be named "list"  unseen I think references that isn't modelled on the arrow side.

##########
File path: cpp/src/parquet/level_conversion.h
##########
@@ -20,10 +20,117 @@
 #include <cstdint>
 
 #include "parquet/platform.h"
+#include "parquet/schema.h"
 
 namespace parquet {
 namespace internal {
 
+struct PARQUET_EXPORT LevelInfo {
+  LevelInfo()
+      : null_slot_usage(1), def_level(0), rep_level(0), repeated_ancestor_def_level(0) {}
+  LevelInfo(int32_t null_slots, int32_t definition_level, int32_t repitition_level,
+            int32_t repeated_ancestor_definition_level)
+      : null_slot_usage(null_slots),
+        def_level(definition_level),
+        rep_level(repitition_level),
+        repeated_ancestor_def_level(repeated_ancestor_definition_level) {}
+
+  bool operator==(const LevelInfo& b) const {
+    return null_slot_usage == b.null_slot_usage && def_level == b.def_level &&
+           rep_level == b.rep_level &&
+           repeated_ancestor_def_level == b.repeated_ancestor_def_level;
+  }
+
+  // How many slots a null element consumes.
+  // This is only ever >1 for descendents of
+  // FixedSizeList.
+  int32_t null_slot_usage = 1;
+
+  // The definition level at which the value for the field
+  // is considered not null (definition levels greater than
+  // or equal to indicate this value indicate a not-null
+  // value for the field). For list fields definition levels
+  // greater then or equal to this field indicate a present
+  // , possibly null, element.
+  int16_t def_level = 0;
+
+  // The repetition level corresponding to this element
+  // or the closest repeated ancestor.  Any repetition
+  // level less than this indicates either a new list OR
+  // an empty list (which is determined in conjunction
+  // definition_level).

Review comment:
       definition levels in general.  We only really care about where repetition level increases in which case the def_level of node should be sufficient to determine this.  

##########
File path: cpp/src/parquet/level_conversion.h
##########
@@ -20,10 +20,117 @@
 #include <cstdint>
 
 #include "parquet/platform.h"
+#include "parquet/schema.h"
 
 namespace parquet {
 namespace internal {
 
+struct PARQUET_EXPORT LevelInfo {
+  LevelInfo()
+      : null_slot_usage(1), def_level(0), rep_level(0), repeated_ancestor_def_level(0) {}
+  LevelInfo(int32_t null_slots, int32_t definition_level, int32_t repitition_level,
+            int32_t repeated_ancestor_definition_level)
+      : null_slot_usage(null_slots),
+        def_level(definition_level),
+        rep_level(repitition_level),
+        repeated_ancestor_def_level(repeated_ancestor_definition_level) {}
+
+  bool operator==(const LevelInfo& b) const {
+    return null_slot_usage == b.null_slot_usage && def_level == b.def_level &&
+           rep_level == b.rep_level &&
+           repeated_ancestor_def_level == b.repeated_ancestor_def_level;
+  }
+
+  // How many slots a null element consumes.
+  // This is only ever >1 for descendents of
+  // FixedSizeList.
+  int32_t null_slot_usage = 1;
+
+  // The definition level at which the value for the field
+  // is considered not null (definition levels greater than
+  // or equal to indicate this value indicate a not-null

Review comment:
       yes. fixed.

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, &arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const ::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field
+                          Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1}));  //  primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/3, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          Levels{/*def_level=*/2, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},  // optional child struct
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // repeated field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // innter field
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {GroupNode::Make(
+              "list", Repetition::REPEATED,
+              {GroupNode::Make(
+                  "element", Repetition::OPTIONAL,
+                  {PrimitiveNode::Make("f0", Repetition::OPTIONAL, ParquetType::BOOLEAN),
+                   PrimitiveNode::Make("f1", Repetition::REQUIRED,
+                                       ParquetType::BOOLEAN)})})},
+          ConvertedType::LIST)}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner struct field
+
+                          Levels{/*def_level=*/5, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f0 bool field
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/1));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner struct field
+
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f1 bool field
+
+  // Legacy 2-level necoding
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("bool", Repetition::REPEATED, ParquetType::BOOLEAN)},
+          ConvertedType::LIST)}));
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // inner struct field
+}
+
+TEST_F(TestLevels, ListErrors) {
+  {
+    ::arrow::Status error = MaybeSetParquetSchema(GroupNode::Make(
+        "child_list", Repetition::REPEATED,
+        {PrimitiveNode::Make("bool", Repetition::REPEATED, ParquetType::BOOLEAN)},
+        ConvertedType::LIST));
+    EXPECT_TRUE(error.IsInvalid());
+    std::string expected("LIST-annotated groups must not be repeated.");
+    EXPECT_EQ(error.message().substr(0, expected.size()), expected);
+  }

Review comment:
       I think you are right this documention is off.  I think I mostly copy and pasted from existing code.

##########
File path: cpp/src/parquet/arrow/schema.cc
##########
@@ -477,12 +484,11 @@ Status ListToSchemaField(const GroupNode& group, int16_t current_def_level,
   const Node& list_node = *group.field(0);
 
   if (!list_node.is_repeated()) {
-    return Status::NotImplemented(
+    return Status::Invalid(
         "Non-repeated nodes in a LIST-annotated group are not supported.");
   }
 
-  ++current_def_level;
-  ++current_rep_level;
+  int16_t repeated_ancesor_def_level = current_levels.IncrementRepeated();

Review comment:
       yes.  fixed.

##########
File path: cpp/src/parquet/level_conversion.h
##########
@@ -20,10 +20,117 @@
 #include <cstdint>
 
 #include "parquet/platform.h"
+#include "parquet/schema.h"
 
 namespace parquet {
 namespace internal {
 
+struct PARQUET_EXPORT LevelInfo {
+  LevelInfo()
+      : null_slot_usage(1), def_level(0), rep_level(0), repeated_ancestor_def_level(0) {}
+  LevelInfo(int32_t null_slots, int32_t definition_level, int32_t repitition_level,
+            int32_t repeated_ancestor_definition_level)
+      : null_slot_usage(null_slots),
+        def_level(definition_level),
+        rep_level(repitition_level),
+        repeated_ancestor_def_level(repeated_ancestor_definition_level) {}
+
+  bool operator==(const LevelInfo& b) const {
+    return null_slot_usage == b.null_slot_usage && def_level == b.def_level &&
+           rep_level == b.rep_level &&
+           repeated_ancestor_def_level == b.repeated_ancestor_def_level;
+  }
+
+  // How many slots a null element consumes.

Review comment:
       not quite, tried to reword.  This reflects when parquet has an undefined but present element (i.e. null in arrow) how many slots in arrow are used in the arrow array.  For fixed size lists a null fixed size list forces all of its non nested children to have N null slots where N is the length of the fixed size list.  Nested fixed size lists increases this number multiplicatively .

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1144,244 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, &arrow_schema));
 }
 
+::arrow::Result<std::deque<LevelInfo>> RootToTreeLeafLevels(
+    const SchemaManifest& manifest, int column_number) {
+  std::deque<LevelInfo> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front(field->level_info);
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const ::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(LevelInfo{/*null_slot_usage=*/1,
+                                            /*def_level=*/0, /*rep_level=*/0,
+                                            /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1,
+                                            /*rep_level=*/0,
+                                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: list(bool not null) not null
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 0},  // List Field
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1}));  //  primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  // Arrow schema: struct(child: struct(inner: boolean not null))
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: struct(child: struct(inner: boolean ))
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: struct(child: struct(inner: boolean)) not null
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/0, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  // Arrow schema: list(struct(child: struct(list(bool not null) not null)) non null) not

Review comment:
       I don't believe the outer struct is nullable.  See comment below.

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1144,244 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, &arrow_schema));
 }
 
+::arrow::Result<std::deque<LevelInfo>> RootToTreeLeafLevels(
+    const SchemaManifest& manifest, int column_number) {
+  std::deque<LevelInfo> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front(field->level_info);
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const ::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(LevelInfo{/*null_slot_usage=*/1,
+                                            /*def_level=*/0, /*rep_level=*/0,
+                                            /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1,
+                                            /*rep_level=*/0,
+                                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: list(bool not null) not null
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 0},  // List Field
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1}));  //  primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  // Arrow schema: struct(child: struct(inner: boolean not null))
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: struct(child: struct(inner: boolean ))
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: struct(child: struct(inner: boolean)) not null
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/0, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  // Arrow schema: list(struct(child: struct(list(bool not null) not null)) non null) not
+  // null
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1},  // optional child struct
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, /*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 1},  // repeated field
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, /*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 3}));  // innter field
+
+  // Arrow schema: list(struct(child_list: list(struct(f0: bool f1: bool no-required )))
+  // not null) not null
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {GroupNode::Make(
+              "list", Repetition::REPEATED,
+              {GroupNode::Make(
+                  "element", Repetition::OPTIONAL,
+                  {PrimitiveNode::Make("f0", Repetition::OPTIONAL, ParquetType::BOOLEAN),
+                   PrimitiveNode::Make("f1", Repetition::REQUIRED,
+                                       ParquetType::BOOLEAN)})})},
+          LogicalType::List())}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1},
+                  // Def_level=2 is handled together with def_level=3
+                  // When decoding.  Def_level=2 indicate present but empty
+                  // list.  def_level=3 indicates a present element in the
+                  // list.
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, /*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 1},  // list field
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/4, /*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 3},  // inner struct field
+
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/5, /*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 3}));  // f0 bool field
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/1));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1},
+                  // Def_level=2 is handled together with def_level=3
+                  // When decoding.  Def_level=2 indicate present but empty
+                  // list.  def_level=3 indicates a present element in the
+                  // list.
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, /*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 1},  // list field
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/4, /*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 3},  // inner struct field
+
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/4, /*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 3}));  // f1 bool field
+
+  // Arrow schema: list(struct(child_list: list(bool not null)) not null) not null
+  // Legacy 2-level necoding (required for backwards compatibility.  See

Review comment:
       done.

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1144,244 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, &arrow_schema));
 }
 
+::arrow::Result<std::deque<LevelInfo>> RootToTreeLeafLevels(
+    const SchemaManifest& manifest, int column_number) {
+  std::deque<LevelInfo> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front(field->level_info);
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const ::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(LevelInfo{/*null_slot_usage=*/1,
+                                            /*def_level=*/0, /*rep_level=*/0,
+                                            /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1,
+                                            /*rep_level=*/0,
+                                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: list(bool not null) not null
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 0},  // List Field
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1}));  //  primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  // Arrow schema: struct(child: struct(inner: boolean not null))
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: struct(child: struct(inner: boolean ))
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: struct(child: struct(inner: boolean)) not null
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/0, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {

Review comment:
       Added a few more, let me know if you would like to see more.

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1144,244 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, &arrow_schema));
 }
 
+::arrow::Result<std::deque<LevelInfo>> RootToTreeLeafLevels(
+    const SchemaManifest& manifest, int column_number) {
+  std::deque<LevelInfo> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front(field->level_info);
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const ::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(LevelInfo{/*null_slot_usage=*/1,
+                                            /*def_level=*/0, /*rep_level=*/0,
+                                            /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1,
+                                            /*rep_level=*/0,
+                                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: list(bool not null) not null
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 0},  // List Field
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1}));  //  primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  // Arrow schema: struct(child: struct(inner: boolean not null))
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: struct(child: struct(inner: boolean ))
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: struct(child: struct(inner: boolean)) not null
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/0, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  // Arrow schema: list(struct(child: struct(list(bool not null) not null)) non null) not
+  // null
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1},  // optional child struct
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, /*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 1},  // repeated field
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, /*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 3}));  // innter field
+
+  // Arrow schema: list(struct(child_list: list(struct(f0: bool f1: bool no-required )))
+  // not null) not null

Review comment:
       Hmm, I don't think the outer struct is nullable. "parent" ends up mapping to two arrow fields: list and struct.  they share the same def level, so it there is an element in list struct must be present.  But I could be misreading this.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] emkornfield commented on pull request #7973: ARROW-8493: [C++][Parquet] Start populating repeated ancestor defintion

Posted by GitBox <gi...@apache.org>.
emkornfield commented on pull request #7973:
URL: https://github.com/apache/arrow/pull/7973#issuecomment-680594450


   @pitrou does the updated PR look OK?


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] pitrou commented on pull request #7973: ARROW-8493: [C++][Parquet] Start populating repeated ancestor defintion

Posted by GitBox <gi...@apache.org>.
pitrou commented on pull request #7973:
URL: https://github.com/apache/arrow/pull/7973#issuecomment-677763342


   Not this PR, but I'd appreciate if you could explain this comment in `schema.h`:
   ```c++
     // The definition level indicating the level at which the closest
     // repeated ancestor was not empty.  This is used to discrimate
     // between a value less than |definition_level|
     // being null or excluded entirely.
     // TODO(ARROW-8493): Populate this value.
   ```
   
   What is the difference between a null and excluded value here? I though null values were simply not encoded at all in Parquet, thus "excluded".


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] emkornfield commented on a change in pull request #7973: ARROW-8493: [C++][Parquet] Start populating repeated ancestor defintion

Posted by GitBox <gi...@apache.org>.
emkornfield commented on a change in pull request #7973:
URL: https://github.com/apache/arrow/pull/7973#discussion_r471123867



##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1144,235 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, &arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const ::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field
+                          Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1}  //  primitive field
+                          ));
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/3, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,

Review comment:
       Need to double check this one




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] emkornfield commented on a change in pull request #7973: ARROW-8493: [C++][Parquet] Start populating repeated ancestor defintion

Posted by GitBox <gi...@apache.org>.
emkornfield commented on a change in pull request #7973:
URL: https://github.com/apache/arrow/pull/7973#discussion_r474409559



##########
File path: cpp/src/parquet/arrow/schema.cc
##########
@@ -410,21 +410,66 @@ ::arrow::Result<std::shared_ptr<ArrowType>> GetTypeForNode(
   return storage_type;
 }
 
-Status NodeToSchemaField(const Node& node, int16_t max_def_level, int16_t max_rep_level,
+struct LevelInfo {
+  int16_t def_level = 0;
+  int16_t rep_level = 0;
+  int16_t repeated_ancestor_def_level = 0;

Review comment:
       consolidated into 1 in level_conversions where ina follow-up PR will use it as an argument to the levels there.

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, &arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const ::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field
+                          Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1}));  //  primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));

Review comment:
       the latter. added comment.

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, &arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const ::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field
+                          Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1}));  //  primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/3, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));

Review comment:
       yes.

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, &arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const ::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field
+                          Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1}));  //  primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/3, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));

Review comment:
       yes.

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, &arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const ::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field
+                          Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1}));  //  primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/3, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          Levels{/*def_level=*/2, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},  // optional child struct
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // repeated field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // innter field
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {GroupNode::Make(
+              "list", Repetition::REPEATED,
+              {GroupNode::Make(
+                  "element", Repetition::OPTIONAL,
+                  {PrimitiveNode::Make("f0", Repetition::OPTIONAL, ParquetType::BOOLEAN),
+                   PrimitiveNode::Make("f1", Repetition::REQUIRED,
+                                       ParquetType::BOOLEAN)})})},
+          ConvertedType::LIST)}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner struct field
+
+                          Levels{/*def_level=*/5, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f0 bool field
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/1));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner struct field
+
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f1 bool field
+
+  // Legacy 2-level necoding

Review comment:
       Non-legacy is three level encoding above.

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, &arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const ::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field
+                          Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1}));  //  primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/3, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          Levels{/*def_level=*/2, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},  // optional child struct
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // repeated field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // innter field
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {GroupNode::Make(
+              "list", Repetition::REPEATED,
+              {GroupNode::Make(
+                  "element", Repetition::OPTIONAL,
+                  {PrimitiveNode::Make("f0", Repetition::OPTIONAL, ParquetType::BOOLEAN),
+                   PrimitiveNode::Make("f1", Repetition::REQUIRED,
+                                       ParquetType::BOOLEAN)})})},
+          ConvertedType::LIST)}));

Review comment:
       Yes.  The recommended standard to list types is 3 level encoding.  Which has an outer group indicating nullability of the "list" field.  A repeated child-group to indicate its repeatedness and an inner group to indicate nullabiity of elements.  Reference: https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#nested-types

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, &arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const ::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field
+                          Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1}));  //  primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/3, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          Levels{/*def_level=*/2, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},  // optional child struct
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // repeated field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // innter field
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {GroupNode::Make(
+              "list", Repetition::REPEATED,
+              {GroupNode::Make(
+                  "element", Repetition::OPTIONAL,
+                  {PrimitiveNode::Make("f0", Repetition::OPTIONAL, ParquetType::BOOLEAN),
+                   PrimitiveNode::Make("f1", Repetition::REQUIRED,
+                                       ParquetType::BOOLEAN)})})},
+          ConvertedType::LIST)}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner struct field
+
+                          Levels{/*def_level=*/5, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f0 bool field
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/1));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner struct field
+
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f1 bool field
+
+  // Legacy 2-level necoding
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("bool", Repetition::REPEATED, ParquetType::BOOLEAN)},
+          ConvertedType::LIST)}));
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // inner struct field

Review comment:
       yes.  fixed.

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, &arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const ::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field
+                          Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1}));  //  primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/3, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          Levels{/*def_level=*/2, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},  // optional child struct
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // repeated field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // innter field
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {GroupNode::Make(
+              "list", Repetition::REPEATED,
+              {GroupNode::Make(
+                  "element", Repetition::OPTIONAL,
+                  {PrimitiveNode::Make("f0", Repetition::OPTIONAL, ParquetType::BOOLEAN),
+                   PrimitiveNode::Make("f1", Repetition::REQUIRED,
+                                       ParquetType::BOOLEAN)})})},
+          ConvertedType::LIST)}));

Review comment:
       ConvertedType is legacy, correct, this was copy-pasta and should be fixed now.

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, &arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const ::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field
+                          Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1}));  //  primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/3, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          Levels{/*def_level=*/2, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},  // optional child struct
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // repeated field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // innter field
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {GroupNode::Make(
+              "list", Repetition::REPEATED,
+              {GroupNode::Make(
+                  "element", Repetition::OPTIONAL,
+                  {PrimitiveNode::Make("f0", Repetition::OPTIONAL, ParquetType::BOOLEAN),
+                   PrimitiveNode::Make("f1", Repetition::REQUIRED,
+                                       ParquetType::BOOLEAN)})})},
+          ConvertedType::LIST)}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner struct field
+
+                          Levels{/*def_level=*/5, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f0 bool field
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/1));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner struct field
+
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f1 bool field
+
+  // Legacy 2-level necoding

Review comment:
       Legacy because it isn't recommended.  Gave a pointer to  https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#nested-types

##########
File path: cpp/src/parquet/arrow/schema.cc
##########
@@ -606,23 +652,27 @@ Status NodeToSchemaField(const Node& node, int16_t current_def_level,
     if (node.is_repeated()) {
       // One-level list encoding, e.g.
       // a: repeated int32;
+      int16_t repeated_ancestor_def_level = current_levels.IncrementRepeated();
       out->children.resize(1);
       auto child_field = ::arrow::field(node.name(), type, /*nullable=*/false);
-      RETURN_NOT_OK(PopulateLeaf(column_index, child_field, current_def_level,
-                                 current_rep_level, ctx, out, &out->children[0]));
+      RETURN_NOT_OK(PopulateLeaf(column_index, child_field, current_levels, ctx, out,
+                                 &out->children[0]));
 
       out->field = ::arrow::field(node.name(), ::arrow::list(child_field),
                                   /*nullable=*/false, FieldIdMetadata(node.field_id()));
       // Is this right?

Review comment:
       probably not but I found it entertaining.  removed.

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, &arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const ::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field
+                          Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1}));  //  primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/3, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          Levels{/*def_level=*/2, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},  // optional child struct
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // repeated field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // innter field
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {GroupNode::Make(
+              "list", Repetition::REPEATED,
+              {GroupNode::Make(
+                  "element", Repetition::OPTIONAL,
+                  {PrimitiveNode::Make("f0", Repetition::OPTIONAL, ParquetType::BOOLEAN),
+                   PrimitiveNode::Make("f1", Repetition::REQUIRED,
+                                       ParquetType::BOOLEAN)})})},
+          ConvertedType::LIST)}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner struct field
+
+                          Levels{/*def_level=*/5, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f0 bool field
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/1));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner struct field
+
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f1 bool field
+
+  // Legacy 2-level necoding
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("bool", Repetition::REPEATED, ParquetType::BOOLEAN)},
+          ConvertedType::LIST)}));
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // inner struct field
+}
+
+TEST_F(TestLevels, ListErrors) {
+  {
+    ::arrow::Status error = MaybeSetParquetSchema(GroupNode::Make(
+        "child_list", Repetition::REPEATED,
+        {PrimitiveNode::Make("bool", Repetition::REPEATED, ParquetType::BOOLEAN)},
+        ConvertedType::LIST));
+    EXPECT_TRUE(error.IsInvalid());
+    std::string expected("LIST-annotated groups must not be repeated.");
+    EXPECT_EQ(error.message().substr(0, expected.size()), expected);

Review comment:
       I was originally using testing::HasSubstr but it had linking issues on windows (I opened a JIRA for this) and didn't want to spend the time investigating.  I think it might be the same with StartsWith.

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, &arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const ::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field
+                          Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1}));  //  primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/3, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          Levels{/*def_level=*/2, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},  // optional child struct
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // repeated field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // innter field
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {GroupNode::Make(
+              "list", Repetition::REPEATED,
+              {GroupNode::Make(
+                  "element", Repetition::OPTIONAL,
+                  {PrimitiveNode::Make("f0", Repetition::OPTIONAL, ParquetType::BOOLEAN),
+                   PrimitiveNode::Make("f1", Repetition::REQUIRED,
+                                       ParquetType::BOOLEAN)})})},
+          ConvertedType::LIST)}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner struct field
+
+                          Levels{/*def_level=*/5, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f0 bool field
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/1));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner struct field
+
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f1 bool field
+
+  // Legacy 2-level necoding
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("bool", Repetition::REPEATED, ParquetType::BOOLEAN)},
+          ConvertedType::LIST)}));
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // inner struct field
+}
+
+TEST_F(TestLevels, ListErrors) {
+  {
+    ::arrow::Status error = MaybeSetParquetSchema(GroupNode::Make(
+        "child_list", Repetition::REPEATED,
+        {PrimitiveNode::Make("bool", Repetition::REPEATED, ParquetType::BOOLEAN)},
+        ConvertedType::LIST));
+    EXPECT_TRUE(error.IsInvalid());

Review comment:
       done.

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, &arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const ::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field
+                          Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1}));  //  primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/3, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          Levels{/*def_level=*/2, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},  // optional child struct
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // repeated field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // innter field

Review comment:
       updated with a comment.

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, &arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const ::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field
+                          Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1}));  //  primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/3, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          Levels{/*def_level=*/2, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},  // optional child struct
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // repeated field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // innter field
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {GroupNode::Make(
+              "list", Repetition::REPEATED,
+              {GroupNode::Make(
+                  "element", Repetition::OPTIONAL,
+                  {PrimitiveNode::Make("f0", Repetition::OPTIONAL, ParquetType::BOOLEAN),
+                   PrimitiveNode::Make("f1", Repetition::REQUIRED,
+                                       ParquetType::BOOLEAN)})})},
+          ConvertedType::LIST)}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a null list.

Review comment:
       Tried to rephrase.  Def level 2 and def level 3 are use to discriminate for the same field `child_list` which is a `nullable list`.  When decoding `def_level=2` indicates a non-null but empty list.  `def_level==3` indicates an element present in the list.

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, &arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const ::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field
+                          Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1}));  //  primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/3, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          Levels{/*def_level=*/2, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},  // optional child struct
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // repeated field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // innter field
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {GroupNode::Make(
+              "list", Repetition::REPEATED,
+              {GroupNode::Make(
+                  "element", Repetition::OPTIONAL,
+                  {PrimitiveNode::Make("f0", Repetition::OPTIONAL, ParquetType::BOOLEAN),
+                   PrimitiveNode::Make("f1", Repetition::REQUIRED,
+                                       ParquetType::BOOLEAN)})})},
+          ConvertedType::LIST)}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner struct field
+
+                          Levels{/*def_level=*/5, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f0 bool field
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/1));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner struct field
+
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f1 bool field
+
+  // Legacy 2-level necoding
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("bool", Repetition::REPEATED, ParquetType::BOOLEAN)},
+          ConvertedType::LIST)}));
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // inner struct field
+}
+
+TEST_F(TestLevels, ListErrors) {
+  {
+    ::arrow::Status error = MaybeSetParquetSchema(GroupNode::Make(
+        "child_list", Repetition::REPEATED,
+        {PrimitiveNode::Make("bool", Repetition::REPEATED, ParquetType::BOOLEAN)},
+        ConvertedType::LIST));
+    EXPECT_TRUE(error.IsInvalid());
+    std::string expected("LIST-annotated groups must not be repeated.");
+    EXPECT_EQ(error.message().substr(0, expected.size()), expected);
+  }

Review comment:
       From the spec:
   
   > The outer-most level must be a group annotated with LIST that contains a single field named list. The repetition of this level must be either optional or required and determines whether the list is nullable.
   
   I read that as it should never be repeated (I didn't see any exceptions under the 2-level encoding but maybe we should clarify on parquet-dev@)?

##########
File path: cpp/src/parquet/arrow/schema.cc
##########
@@ -410,21 +410,66 @@ ::arrow::Result<std::shared_ptr<ArrowType>> GetTypeForNode(
   return storage_type;
 }
 
-Status NodeToSchemaField(const Node& node, int16_t max_def_level, int16_t max_rep_level,
+struct LevelInfo {
+  int16_t def_level = 0;
+  int16_t rep_level = 0;
+  int16_t repeated_ancestor_def_level = 0;
+
+  /// Copies current levels to the schema field.
+  void Populate(SchemaField* out) {
+    out->definition_level = def_level;
+    out->repetition_level = rep_level;
+    out->repeated_ancestor_definition_level = repeated_ancestor_def_level;
+  }
+
+  /// Increments levels according to the cardinality of node.
+  void Increment(const Node& node) {
+    if (node.is_repeated()) {
+      IncrementRepeated();
+      return;
+    }
+    if (node.is_optional()) {
+      IncrementOptional();
+      return;
+    }
+  }
+
+  /// Incremetns level for a optional node.
+  void IncrementOptional() { def_level++; }
+
+  /// Increments levels for the repeated node.  Returns
+  /// the previous ancestor_list_def_level.
+  int16_t IncrementRepeated() {
+    int16_t last_repeated_ancestor = repeated_ancestor_def_level;
+
+    // Repeated fields add both a repetition and definition level. This is used
+    // to distinguish between an empty list and a list with an item in it.
+    ++rep_level;
+    ++def_level;
+    // For levels >= current_def_level it indicates the list was

Review comment:
       should be repeated_ancestor_def_level.

##########
File path: cpp/src/parquet/arrow/schema.cc
##########
@@ -554,41 +605,36 @@ Status GroupToSchemaField(const GroupNode& node, int16_t current_def_level,
     // repeated group $NAME {
     //   r/o TYPE[0] f0
     //   r/o TYPE[1] f1
-    // }

Review comment:
       overzealous delete i think.  added back.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] pitrou commented on pull request #7973: ARROW-8493: [C++][Parquet] Start populating repeated ancestor defintion

Posted by GitBox <gi...@apache.org>.
pitrou commented on pull request #7973:
URL: https://github.com/apache/arrow/pull/7973#issuecomment-677762828


   "Start populating" means it's not fully populated yet? Or not always?


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] emkornfield commented on pull request #7973: ARROW-8493: [C++][Parquet] Start populating repeated ancestor defintion

Posted by GitBox <gi...@apache.org>.
emkornfield commented on pull request #7973:
URL: https://github.com/apache/arrow/pull/7973#issuecomment-676927791


   CC @wesm or @pitrou 


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] pitrou commented on a change in pull request #7973: ARROW-8493: [C++][Parquet] Start populating repeated ancestor defintion

Posted by GitBox <gi...@apache.org>.
pitrou commented on a change in pull request #7973:
URL: https://github.com/apache/arrow/pull/7973#discussion_r474112279



##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, &arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const ::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field
+                          Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1}));  //  primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));

Review comment:
       This is `list(list(boolean))` in Arrow terms? Perhaps add comments as above?

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, &arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const ::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field
+                          Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1}));  //  primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/3, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          Levels{/*def_level=*/2, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},  // optional child struct
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // repeated field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // innter field
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {GroupNode::Make(
+              "list", Repetition::REPEATED,
+              {GroupNode::Make(
+                  "element", Repetition::OPTIONAL,
+                  {PrimitiveNode::Make("f0", Repetition::OPTIONAL, ParquetType::BOOLEAN),
+                   PrimitiveNode::Make("f1", Repetition::REQUIRED,
+                                       ParquetType::BOOLEAN)})})},
+          ConvertedType::LIST)}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a null list.

Review comment:
       Hmm... what is a null list?
   What does it mean to have a def level that doesn't map to anything?

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, &arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const ::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field
+                          Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1}));  //  primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/3, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          Levels{/*def_level=*/2, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},  // optional child struct
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // repeated field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // innter field

Review comment:
       This would be `list(struct(child: list(boolean)))`?

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, &arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const ::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field
+                          Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1}));  //  primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/3, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));

Review comment:
       Is it `struct(child: struct(inner: boolean)) not null`?

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, &arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const ::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field
+                          Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1}));  //  primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/3, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));

Review comment:
       Is it `struct(child: struct(inner: boolean))`?

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, &arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const ::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field
+                          Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1}));  //  primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/3, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          Levels{/*def_level=*/2, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},  // optional child struct
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // repeated field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // innter field
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {GroupNode::Make(
+              "list", Repetition::REPEATED,
+              {GroupNode::Make(
+                  "element", Repetition::OPTIONAL,
+                  {PrimitiveNode::Make("f0", Repetition::OPTIONAL, ParquetType::BOOLEAN),
+                   PrimitiveNode::Make("f1", Repetition::REQUIRED,
+                                       ParquetType::BOOLEAN)})})},
+          ConvertedType::LIST)}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner struct field
+
+                          Levels{/*def_level=*/5, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f0 bool field
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/1));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner struct field
+
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f1 bool field
+
+  // Legacy 2-level necoding
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("bool", Repetition::REPEATED, ParquetType::BOOLEAN)},
+          ConvertedType::LIST)}));
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // inner struct field
+}
+
+TEST_F(TestLevels, ListErrors) {
+  {
+    ::arrow::Status error = MaybeSetParquetSchema(GroupNode::Make(
+        "child_list", Repetition::REPEATED,
+        {PrimitiveNode::Make("bool", Repetition::REPEATED, ParquetType::BOOLEAN)},
+        ConvertedType::LIST));
+    EXPECT_TRUE(error.IsInvalid());

Review comment:
       `ASSERT_RAISES(Invalid, error)`

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, &arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const ::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field
+                          Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1}));  //  primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/3, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          Levels{/*def_level=*/2, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},  // optional child struct
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // repeated field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // innter field
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {GroupNode::Make(
+              "list", Repetition::REPEATED,
+              {GroupNode::Make(
+                  "element", Repetition::OPTIONAL,
+                  {PrimitiveNode::Make("f0", Repetition::OPTIONAL, ParquetType::BOOLEAN),
+                   PrimitiveNode::Make("f1", Repetition::REQUIRED,
+                                       ParquetType::BOOLEAN)})})},
+          ConvertedType::LIST)}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner struct field
+
+                          Levels{/*def_level=*/5, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f0 bool field
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/1));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner struct field
+
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f1 bool field
+
+  // Legacy 2-level necoding
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("bool", Repetition::REPEATED, ParquetType::BOOLEAN)},
+          ConvertedType::LIST)}));
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // inner struct field
+}
+
+TEST_F(TestLevels, ListErrors) {
+  {
+    ::arrow::Status error = MaybeSetParquetSchema(GroupNode::Make(
+        "child_list", Repetition::REPEATED,
+        {PrimitiveNode::Make("bool", Repetition::REPEATED, ParquetType::BOOLEAN)},
+        ConvertedType::LIST));
+    EXPECT_TRUE(error.IsInvalid());
+    std::string expected("LIST-annotated groups must not be repeated.");
+    EXPECT_EQ(error.message().substr(0, expected.size()), expected);
+  }

Review comment:
       Why is it an error to have a repeated LIST-annotated group?

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, &arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const ::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field

Review comment:
       Do we want to test that it's actually mapped to an Arrow `list(boolean)`? Or is that done elsewhere in the tests?
   (or is it `list(boolean not null)`?)

##########
File path: cpp/src/parquet/arrow/schema.cc
##########
@@ -410,21 +410,66 @@ ::arrow::Result<std::shared_ptr<ArrowType>> GetTypeForNode(
   return storage_type;
 }
 
-Status NodeToSchemaField(const Node& node, int16_t max_def_level, int16_t max_rep_level,
+struct LevelInfo {
+  int16_t def_level = 0;
+  int16_t rep_level = 0;
+  int16_t repeated_ancestor_def_level = 0;
+
+  /// Copies current levels to the schema field.
+  void Populate(SchemaField* out) {
+    out->definition_level = def_level;
+    out->repetition_level = rep_level;
+    out->repeated_ancestor_definition_level = repeated_ancestor_def_level;
+  }
+
+  /// Increments levels according to the cardinality of node.
+  void Increment(const Node& node) {
+    if (node.is_repeated()) {
+      IncrementRepeated();
+      return;
+    }
+    if (node.is_optional()) {
+      IncrementOptional();
+      return;
+    }
+  }
+
+  /// Incremetns level for a optional node.
+  void IncrementOptional() { def_level++; }
+
+  /// Increments levels for the repeated node.  Returns
+  /// the previous ancestor_list_def_level.
+  int16_t IncrementRepeated() {
+    int16_t last_repeated_ancestor = repeated_ancestor_def_level;
+
+    // Repeated fields add both a repetition and definition level. This is used
+    // to distinguish between an empty list and a list with an item in it.
+    ++rep_level;
+    ++def_level;
+    // For levels >= current_def_level it indicates the list was

Review comment:
       What is "current_def_level"?

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, &arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const ::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field
+                          Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1}));  //  primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/3, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          Levels{/*def_level=*/2, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},  // optional child struct
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // repeated field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // innter field
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {GroupNode::Make(
+              "list", Repetition::REPEATED,
+              {GroupNode::Make(
+                  "element", Repetition::OPTIONAL,
+                  {PrimitiveNode::Make("f0", Repetition::OPTIONAL, ParquetType::BOOLEAN),
+                   PrimitiveNode::Make("f1", Repetition::REQUIRED,
+                                       ParquetType::BOOLEAN)})})},
+          ConvertedType::LIST)}));

Review comment:
       Hmm... so the `ConvertedType::LIST` says that `child_list` is semantically a list even though it cannot be repeated, right? Is there a reason we don't use `LogicalType` instead? (AFAIU, `ConvertedType` is legacy?)

##########
File path: cpp/src/parquet/arrow/schema.cc
##########
@@ -606,23 +652,27 @@ Status NodeToSchemaField(const Node& node, int16_t current_def_level,
     if (node.is_repeated()) {
       // One-level list encoding, e.g.
       // a: repeated int32;
+      int16_t repeated_ancestor_def_level = current_levels.IncrementRepeated();
       out->children.resize(1);
       auto child_field = ::arrow::field(node.name(), type, /*nullable=*/false);
-      RETURN_NOT_OK(PopulateLeaf(column_index, child_field, current_def_level,
-                                 current_rep_level, ctx, out, &out->children[0]));
+      RETURN_NOT_OK(PopulateLeaf(column_index, child_field, current_levels, ctx, out,
+                                 &out->children[0]));
 
       out->field = ::arrow::field(node.name(), ::arrow::list(child_field),
                                   /*nullable=*/false, FieldIdMetadata(node.field_id()));
       // Is this right?

Review comment:
       Do we need to keep this comment?

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, &arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const ::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field
+                          Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1}));  //  primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/3, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          Levels{/*def_level=*/2, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},  // optional child struct
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // repeated field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // innter field
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {GroupNode::Make(
+              "list", Repetition::REPEATED,
+              {GroupNode::Make(
+                  "element", Repetition::OPTIONAL,
+                  {PrimitiveNode::Make("f0", Repetition::OPTIONAL, ParquetType::BOOLEAN),
+                   PrimitiveNode::Make("f1", Repetition::REQUIRED,
+                                       ParquetType::BOOLEAN)})})},
+          ConvertedType::LIST)}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner struct field
+
+                          Levels{/*def_level=*/5, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f0 bool field
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/1));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner struct field
+
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f1 bool field
+
+  // Legacy 2-level necoding
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("bool", Repetition::REPEATED, ParquetType::BOOLEAN)},
+          ConvertedType::LIST)}));
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // inner struct field

Review comment:
       You mean "bool field"?

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, &arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const ::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field
+                          Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1}));  //  primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/3, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          Levels{/*def_level=*/2, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},  // optional child struct
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // repeated field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // innter field
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {GroupNode::Make(
+              "list", Repetition::REPEATED,
+              {GroupNode::Make(
+                  "element", Repetition::OPTIONAL,
+                  {PrimitiveNode::Make("f0", Repetition::OPTIONAL, ParquetType::BOOLEAN),
+                   PrimitiveNode::Make("f1", Repetition::REQUIRED,
+                                       ParquetType::BOOLEAN)})})},
+          ConvertedType::LIST)}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner struct field
+
+                          Levels{/*def_level=*/5, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f0 bool field
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/1));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner struct field
+
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f1 bool field
+
+  // Legacy 2-level necoding

Review comment:
       Can you elaborate why it's legacy? What would be the non-legacy encoding?

##########
File path: cpp/src/parquet/arrow/schema.cc
##########
@@ -554,41 +605,36 @@ Status GroupToSchemaField(const GroupNode& node, int16_t current_def_level,
     // repeated group $NAME {
     //   r/o TYPE[0] f0
     //   r/o TYPE[1] f1
-    // }

Review comment:
       Why?

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, &arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const ::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field
+                          Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1}));  //  primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/3, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          Levels{/*def_level=*/2, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},  // optional child struct
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // repeated field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // innter field
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {GroupNode::Make(
+              "list", Repetition::REPEATED,
+              {GroupNode::Make(
+                  "element", Repetition::OPTIONAL,
+                  {PrimitiveNode::Make("f0", Repetition::OPTIONAL, ParquetType::BOOLEAN),
+                   PrimitiveNode::Make("f1", Repetition::REQUIRED,
+                                       ParquetType::BOOLEAN)})})},
+          ConvertedType::LIST)}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner struct field
+
+                          Levels{/*def_level=*/5, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f0 bool field
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/1));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner struct field
+
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f1 bool field
+
+  // Legacy 2-level necoding
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("bool", Repetition::REPEATED, ParquetType::BOOLEAN)},
+          ConvertedType::LIST)}));
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // inner struct field
+}
+
+TEST_F(TestLevels, ListErrors) {
+  {
+    ::arrow::Status error = MaybeSetParquetSchema(GroupNode::Make(
+        "child_list", Repetition::REPEATED,
+        {PrimitiveNode::Make("bool", Repetition::REPEATED, ParquetType::BOOLEAN)},
+        ConvertedType::LIST));
+    EXPECT_TRUE(error.IsInvalid());
+    std::string expected("LIST-annotated groups must not be repeated.");
+    EXPECT_EQ(error.message().substr(0, expected.size()), expected);

Review comment:
       Or something like:
   ```c++
     EXPECT_THAT(
         error.message(),
         testing::StartsWith(
             "LIST-annotated groups must not be repeated"));
   ```

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, &arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const ::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field
+                          Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1}));  //  primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));

Review comment:
       Or is it `struct(child: struct(inner: boolean not null))`?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] pitrou commented on pull request #7973: ARROW-8493: [C++][Parquet] Start populating repeated ancestor defintion

Posted by GitBox <gi...@apache.org>.
pitrou commented on pull request #7973:
URL: https://github.com/apache/arrow/pull/7973#issuecomment-677763466


   (also, perhaps need to update the TODO in that comment)


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] emkornfield commented on a change in pull request #7973: ARROW-8493: [C++][Parquet] Start populating repeated ancestor defintion

Posted by GitBox <gi...@apache.org>.
emkornfield commented on a change in pull request #7973:
URL: https://github.com/apache/arrow/pull/7973#discussion_r474391446



##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, &arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const ::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field

Review comment:
       This is tested above.  There is also a JIRA open to ensure we have complete type coverage in conversion and we can add more tests if the ones already present aren't sufficient.  Added a comment this is `(non-null list(non-null bool)`




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] kiszk commented on pull request #7973: ARROW-8493: [C++][Parquet] Start populating repeated ancestor defintion

Posted by GitBox <gi...@apache.org>.
kiszk commented on pull request #7973:
URL: https://github.com/apache/arrow/pull/7973#issuecomment-675087462


   Is it better to create an issue in PARQUET instead of ARROW?


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] pitrou commented on a change in pull request #7973: ARROW-8493: [C++][Parquet] Start populating repeated ancestor defintion

Posted by GitBox <gi...@apache.org>.
pitrou commented on a change in pull request #7973:
URL: https://github.com/apache/arrow/pull/7973#discussion_r474106190



##########
File path: cpp/src/parquet/arrow/schema.cc
##########
@@ -410,21 +410,66 @@ ::arrow::Result<std::shared_ptr<ArrowType>> GetTypeForNode(
   return storage_type;
 }
 
-Status NodeToSchemaField(const Node& node, int16_t max_def_level, int16_t max_rep_level,
+struct LevelInfo {
+  int16_t def_level = 0;
+  int16_t rep_level = 0;
+  int16_t repeated_ancestor_def_level = 0;

Review comment:
       Structure with these three fields seem to be in multiple numbers now? Perhaps we should simply use `LevelInfo` as a plain datatype everywhere instead of copying all three fields by hand everytime?

##########
File path: cpp/src/parquet/arrow/schema.cc
##########
@@ -410,21 +410,66 @@ ::arrow::Result<std::shared_ptr<ArrowType>> GetTypeForNode(
   return storage_type;
 }
 
-Status NodeToSchemaField(const Node& node, int16_t max_def_level, int16_t max_rep_level,
+struct LevelInfo {
+  int16_t def_level = 0;
+  int16_t rep_level = 0;
+  int16_t repeated_ancestor_def_level = 0;

Review comment:
       Structures with these three fields seem to be in multiple numbers now? Perhaps we should simply use `LevelInfo` as a plain datatype everywhere instead of copying all three fields by hand everytime?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] nealrichardson closed pull request #7973: ARROW-8493: [C++][Parquet] Start populating repeated ancestor defintion

Posted by GitBox <gi...@apache.org>.
nealrichardson closed pull request #7973:
URL: https://github.com/apache/arrow/pull/7973


   


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



[GitHub] [arrow] pitrou commented on a change in pull request #7973: ARROW-8493: [C++][Parquet] Start populating repeated ancestor defintion

Posted by GitBox <gi...@apache.org>.
pitrou commented on a change in pull request #7973:
URL: https://github.com/apache/arrow/pull/7973#discussion_r477352114



##########
File path: cpp/src/parquet/level_conversion.h
##########
@@ -20,10 +20,117 @@
 #include <cstdint>
 
 #include "parquet/platform.h"
+#include "parquet/schema.h"
 
 namespace parquet {
 namespace internal {
 
+struct PARQUET_EXPORT LevelInfo {
+  LevelInfo()
+      : null_slot_usage(1), def_level(0), rep_level(0), repeated_ancestor_def_level(0) {}
+  LevelInfo(int32_t null_slots, int32_t definition_level, int32_t repitition_level,
+            int32_t repeated_ancestor_definition_level)
+      : null_slot_usage(null_slots),
+        def_level(definition_level),
+        rep_level(repitition_level),
+        repeated_ancestor_def_level(repeated_ancestor_definition_level) {}
+
+  bool operator==(const LevelInfo& b) const {
+    return null_slot_usage == b.null_slot_usage && def_level == b.def_level &&
+           rep_level == b.rep_level &&
+           repeated_ancestor_def_level == b.repeated_ancestor_def_level;
+  }
+
+  // How many slots a null element consumes.
+  // This is only ever >1 for descendents of
+  // FixedSizeList.
+  int32_t null_slot_usage = 1;
+
+  // The definition level at which the value for the field
+  // is considered not null (definition levels greater than
+  // or equal to indicate this value indicate a not-null
+  // value for the field). For list fields definition levels
+  // greater then or equal to this field indicate a present
+  // , possibly null, element.
+  int16_t def_level = 0;
+
+  // The repetition level corresponding to this element
+  // or the closest repeated ancestor.  Any repetition
+  // level less than this indicates either a new list OR
+  // an empty list (which is determined in conjunction
+  // definition_level).
+  int16_t rep_level = 0;
+
+  // The definition level indicating the level at which the closest
+  // repeated ancestor was not empty.  This is used to discriminate
+  // between a value less than |definition_level|
+  // being null or excluded entirely.
+  // For instance if we have an arrow schema like:
+  // list(struct(f0: int)).  Then then there are the following
+  // definition levels:
+  // 0 = null list
+  // 1 = present but empty list.
+  // 2 = a null value in the list
+  // 3 = a non null struct but null integer.
+  // 4 = a present integer.

Review comment:
       +1, thanks for this example!

##########
File path: cpp/src/parquet/level_conversion.h
##########
@@ -20,10 +20,117 @@
 #include <cstdint>
 
 #include "parquet/platform.h"
+#include "parquet/schema.h"
 
 namespace parquet {
 namespace internal {
 
+struct PARQUET_EXPORT LevelInfo {
+  LevelInfo()
+      : null_slot_usage(1), def_level(0), rep_level(0), repeated_ancestor_def_level(0) {}
+  LevelInfo(int32_t null_slots, int32_t definition_level, int32_t repitition_level,
+            int32_t repeated_ancestor_definition_level)
+      : null_slot_usage(null_slots),
+        def_level(definition_level),
+        rep_level(repitition_level),
+        repeated_ancestor_def_level(repeated_ancestor_definition_level) {}
+
+  bool operator==(const LevelInfo& b) const {
+    return null_slot_usage == b.null_slot_usage && def_level == b.def_level &&
+           rep_level == b.rep_level &&
+           repeated_ancestor_def_level == b.repeated_ancestor_def_level;
+  }
+
+  // How many slots a null element consumes.
+  // This is only ever >1 for descendents of
+  // FixedSizeList.
+  int32_t null_slot_usage = 1;
+
+  // The definition level at which the value for the field
+  // is considered not null (definition levels greater than
+  // or equal to indicate this value indicate a not-null
+  // value for the field). For list fields definition levels
+  // greater then or equal to this field indicate a present
+  // , possibly null, element.
+  int16_t def_level = 0;
+
+  // The repetition level corresponding to this element
+  // or the closest repeated ancestor.  Any repetition
+  // level less than this indicates either a new list OR
+  // an empty list (which is determined in conjunction
+  // definition_level).
+  int16_t rep_level = 0;
+
+  // The definition level indicating the level at which the closest
+  // repeated ancestor was not empty.  This is used to discriminate

Review comment:
       Do you mean logical ancestor (in Arrow terms)? Or physical ancestor (in Parquet nesting)?

##########
File path: cpp/src/parquet/level_conversion.h
##########
@@ -20,10 +20,117 @@
 #include <cstdint>
 
 #include "parquet/platform.h"
+#include "parquet/schema.h"
 
 namespace parquet {
 namespace internal {
 
+struct PARQUET_EXPORT LevelInfo {
+  LevelInfo()
+      : null_slot_usage(1), def_level(0), rep_level(0), repeated_ancestor_def_level(0) {}
+  LevelInfo(int32_t null_slots, int32_t definition_level, int32_t repitition_level,
+            int32_t repeated_ancestor_definition_level)
+      : null_slot_usage(null_slots),
+        def_level(definition_level),
+        rep_level(repitition_level),
+        repeated_ancestor_def_level(repeated_ancestor_definition_level) {}
+
+  bool operator==(const LevelInfo& b) const {
+    return null_slot_usage == b.null_slot_usage && def_level == b.def_level &&
+           rep_level == b.rep_level &&
+           repeated_ancestor_def_level == b.repeated_ancestor_def_level;
+  }
+
+  // How many slots a null element consumes.
+  // This is only ever >1 for descendents of
+  // FixedSizeList.
+  int32_t null_slot_usage = 1;
+
+  // The definition level at which the value for the field
+  // is considered not null (definition levels greater than
+  // or equal to indicate this value indicate a not-null

Review comment:
       "or equal to this value"?

##########
File path: cpp/src/parquet/level_conversion.h
##########
@@ -20,10 +20,117 @@
 #include <cstdint>
 
 #include "parquet/platform.h"
+#include "parquet/schema.h"
 
 namespace parquet {
 namespace internal {
 
+struct PARQUET_EXPORT LevelInfo {
+  LevelInfo()
+      : null_slot_usage(1), def_level(0), rep_level(0), repeated_ancestor_def_level(0) {}
+  LevelInfo(int32_t null_slots, int32_t definition_level, int32_t repitition_level,
+            int32_t repeated_ancestor_definition_level)
+      : null_slot_usage(null_slots),
+        def_level(definition_level),
+        rep_level(repitition_level),
+        repeated_ancestor_def_level(repeated_ancestor_definition_level) {}
+
+  bool operator==(const LevelInfo& b) const {
+    return null_slot_usage == b.null_slot_usage && def_level == b.def_level &&
+           rep_level == b.rep_level &&
+           repeated_ancestor_def_level == b.repeated_ancestor_def_level;
+  }
+
+  // How many slots a null element consumes.
+  // This is only ever >1 for descendents of
+  // FixedSizeList.
+  int32_t null_slot_usage = 1;
+
+  // The definition level at which the value for the field
+  // is considered not null (definition levels greater than
+  // or equal to indicate this value indicate a not-null
+  // value for the field). For list fields definition levels
+  // greater then or equal to this field indicate a present
+  // , possibly null, element.
+  int16_t def_level = 0;
+
+  // The repetition level corresponding to this element
+  // or the closest repeated ancestor.  Any repetition
+  // level less than this indicates either a new list OR
+  // an empty list (which is determined in conjunction
+  // definition_level).

Review comment:
       Do you mean "def_level"? Or "repeated_ancestor_def_level" perhaps?

##########
File path: cpp/src/parquet/arrow/schema.cc
##########
@@ -477,12 +484,11 @@ Status ListToSchemaField(const GroupNode& group, int16_t current_def_level,
   const Node& list_node = *group.field(0);
 
   if (!list_node.is_repeated()) {
-    return Status::NotImplemented(
+    return Status::Invalid(
         "Non-repeated nodes in a LIST-annotated group are not supported.");
   }
 
-  ++current_def_level;
-  ++current_rep_level;
+  int16_t repeated_ancesor_def_level = current_levels.IncrementRepeated();

Review comment:
       "ancestor"?

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1144,244 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, &arrow_schema));
 }
 
+::arrow::Result<std::deque<LevelInfo>> RootToTreeLeafLevels(
+    const SchemaManifest& manifest, int column_number) {
+  std::deque<LevelInfo> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front(field->level_info);
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const ::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(LevelInfo{/*null_slot_usage=*/1,
+                                            /*def_level=*/0, /*rep_level=*/0,
+                                            /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1,
+                                            /*rep_level=*/0,
+                                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: list(bool not null) not null
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 0},  // List Field
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1}));  //  primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  // Arrow schema: struct(child: struct(inner: boolean not null))
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: struct(child: struct(inner: boolean ))
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: struct(child: struct(inner: boolean)) not null
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/0, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  // Arrow schema: list(struct(child: struct(list(bool not null) not null)) non null) not
+  // null
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1},  // optional child struct
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, /*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 1},  // repeated field
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, /*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 3}));  // innter field

Review comment:
       "inner"

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1144,244 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, &arrow_schema));
 }
 
+::arrow::Result<std::deque<LevelInfo>> RootToTreeLeafLevels(
+    const SchemaManifest& manifest, int column_number) {
+  std::deque<LevelInfo> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front(field->level_info);
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const ::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(LevelInfo{/*null_slot_usage=*/1,
+                                            /*def_level=*/0, /*rep_level=*/0,
+                                            /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1,
+                                            /*rep_level=*/0,
+                                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: list(bool not null) not null
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 0},  // List Field
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1}));  //  primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  // Arrow schema: struct(child: struct(inner: boolean not null))
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: struct(child: struct(inner: boolean ))
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: struct(child: struct(inner: boolean)) not null
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/0, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  // Arrow schema: list(struct(child: struct(list(bool not null) not null)) non null) not
+  // null
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1},  // optional child struct
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, /*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 1},  // repeated field
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, /*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 3}));  // innter field
+
+  // Arrow schema: list(struct(child_list: list(struct(f0: bool f1: bool no-required )))
+  // not null) not null
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {GroupNode::Make(
+              "list", Repetition::REPEATED,
+              {GroupNode::Make(
+                  "element", Repetition::OPTIONAL,
+                  {PrimitiveNode::Make("f0", Repetition::OPTIONAL, ParquetType::BOOLEAN),
+                   PrimitiveNode::Make("f1", Repetition::REQUIRED,
+                                       ParquetType::BOOLEAN)})})},
+          LogicalType::List())}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1},
+                  // Def_level=2 is handled together with def_level=3
+                  // When decoding.  Def_level=2 indicate present but empty
+                  // list.  def_level=3 indicates a present element in the
+                  // list.
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, /*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 1},  // list field
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/4, /*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 3},  // inner struct field
+
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/5, /*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 3}));  // f0 bool field

Review comment:
       Hmm, ok, now I don't understand why 4 LevelInfo are returned. I would expect 5:
   * for outer `list`
   * for outer `list(struct)`
   * for `list(struct(child: list))`
   * for `list(struct(child: list(struct)))`
   * for `list(struct(child: list(struct: f0)))`
   

##########
File path: cpp/src/parquet/level_conversion.h
##########
@@ -20,10 +20,117 @@
 #include <cstdint>
 
 #include "parquet/platform.h"
+#include "parquet/schema.h"
 
 namespace parquet {
 namespace internal {
 
+struct PARQUET_EXPORT LevelInfo {
+  LevelInfo()
+      : null_slot_usage(1), def_level(0), rep_level(0), repeated_ancestor_def_level(0) {}
+  LevelInfo(int32_t null_slots, int32_t definition_level, int32_t repitition_level,

Review comment:
       "repetition_level"

##########
File path: cpp/src/parquet/level_conversion.h
##########
@@ -20,10 +20,117 @@
 #include <cstdint>
 
 #include "parquet/platform.h"
+#include "parquet/schema.h"
 
 namespace parquet {
 namespace internal {
 
+struct PARQUET_EXPORT LevelInfo {
+  LevelInfo()
+      : null_slot_usage(1), def_level(0), rep_level(0), repeated_ancestor_def_level(0) {}
+  LevelInfo(int32_t null_slots, int32_t definition_level, int32_t repitition_level,
+            int32_t repeated_ancestor_definition_level)
+      : null_slot_usage(null_slots),
+        def_level(definition_level),
+        rep_level(repitition_level),
+        repeated_ancestor_def_level(repeated_ancestor_definition_level) {}
+
+  bool operator==(const LevelInfo& b) const {
+    return null_slot_usage == b.null_slot_usage && def_level == b.def_level &&
+           rep_level == b.rep_level &&
+           repeated_ancestor_def_level == b.repeated_ancestor_def_level;
+  }
+
+  // How many slots a null element consumes.
+  // This is only ever >1 for descendents of
+  // FixedSizeList.
+  int32_t null_slot_usage = 1;
+
+  // The definition level at which the value for the field
+  // is considered not null (definition levels greater than
+  // or equal to indicate this value indicate a not-null
+  // value for the field). For list fields definition levels
+  // greater then or equal to this field indicate a present
+  // , possibly null, element.
+  int16_t def_level = 0;
+
+  // The repetition level corresponding to this element
+  // or the closest repeated ancestor.  Any repetition
+  // level less than this indicates either a new list OR
+  // an empty list (which is determined in conjunction
+  // definition_level).
+  int16_t rep_level = 0;
+
+  // The definition level indicating the level at which the closest
+  // repeated ancestor was not empty.  This is used to discriminate
+  // between a value less than |definition_level|
+  // being null or excluded entirely.
+  // For instance if we have an arrow schema like:
+  // list(struct(f0: int)).  Then then there are the following
+  // definition levels:
+  // 0 = null list
+  // 1 = present but empty list.
+  // 2 = a null value in the list
+  // 3 = a non null struct but null integer.
+  // 4 = a present integer.
+  // When reconstructing the struct and integer Array's
+  // repeated_ancestor_def_level would be 2.  Any

Review comment:
       The sentence is not clear here, could you rephrase?
   Do you mean something like:
   > When reconstructing the struct and integer, their `repeated_ancestor_def_level` would be 2.

##########
File path: cpp/src/parquet/level_conversion.h
##########
@@ -20,10 +20,117 @@
 #include <cstdint>
 
 #include "parquet/platform.h"
+#include "parquet/schema.h"
 
 namespace parquet {
 namespace internal {
 
+struct PARQUET_EXPORT LevelInfo {
+  LevelInfo()
+      : null_slot_usage(1), def_level(0), rep_level(0), repeated_ancestor_def_level(0) {}
+  LevelInfo(int32_t null_slots, int32_t definition_level, int32_t repitition_level,
+            int32_t repeated_ancestor_definition_level)
+      : null_slot_usage(null_slots),
+        def_level(definition_level),
+        rep_level(repitition_level),
+        repeated_ancestor_def_level(repeated_ancestor_definition_level) {}
+
+  bool operator==(const LevelInfo& b) const {
+    return null_slot_usage == b.null_slot_usage && def_level == b.def_level &&
+           rep_level == b.rep_level &&
+           repeated_ancestor_def_level == b.repeated_ancestor_def_level;
+  }
+
+  // How many slots a null element consumes.

Review comment:
       In other words, is it the number of definition levels per null element?
   (I assume "slot" doesn't mean "value slot" because nulls are not encoded in values)

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1144,244 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, &arrow_schema));
 }
 
+::arrow::Result<std::deque<LevelInfo>> RootToTreeLeafLevels(
+    const SchemaManifest& manifest, int column_number) {
+  std::deque<LevelInfo> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front(field->level_info);
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const ::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(LevelInfo{/*null_slot_usage=*/1,
+                                            /*def_level=*/0, /*rep_level=*/0,
+                                            /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1,
+                                            /*rep_level=*/0,
+                                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: list(bool not null) not null
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 0},  // List Field
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1}));  //  primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  // Arrow schema: struct(child: struct(inner: boolean not null))
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: struct(child: struct(inner: boolean ))
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: struct(child: struct(inner: boolean)) not null
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/0, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  // Arrow schema: list(struct(child: struct(list(bool not null) not null)) non null) not

Review comment:
       The outer struct is nullable and there seem to be too many nesting levels, so I'd say
   ```
   list(struct(child: list(bool not null) not null)) not null
   ```

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1144,244 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, &arrow_schema));
 }
 
+::arrow::Result<std::deque<LevelInfo>> RootToTreeLeafLevels(
+    const SchemaManifest& manifest, int column_number) {
+  std::deque<LevelInfo> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front(field->level_info);
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const ::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(LevelInfo{/*null_slot_usage=*/1,
+                                            /*def_level=*/0, /*rep_level=*/0,
+                                            /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1,
+                                            /*rep_level=*/0,
+                                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: list(bool not null) not null
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 0},  // List Field
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1}));  //  primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  // Arrow schema: struct(child: struct(inner: boolean not null))
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: struct(child: struct(inner: boolean ))
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: struct(child: struct(inner: boolean)) not null
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/0, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {

Review comment:
       Sorry to ask more, but can we also have simpler tests with a single top-level list field?
   (e.g. `list(bool not null)`, `list(bool)`, `list(bool) not null`...)
   
   Once we have that, it's easier to reason about the more complicated ones.

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1144,244 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, &arrow_schema));
 }
 
+::arrow::Result<std::deque<LevelInfo>> RootToTreeLeafLevels(
+    const SchemaManifest& manifest, int column_number) {
+  std::deque<LevelInfo> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front(field->level_info);
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const ::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(LevelInfo{/*null_slot_usage=*/1,
+                                            /*def_level=*/0, /*rep_level=*/0,
+                                            /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1,
+                                            /*rep_level=*/0,
+                                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: list(bool not null) not null
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 0},  // List Field
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1}));  //  primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  // Arrow schema: struct(child: struct(inner: boolean not null))
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: struct(child: struct(inner: boolean ))
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: struct(child: struct(inner: boolean)) not null
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/0, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  // Arrow schema: list(struct(child: struct(list(bool not null) not null)) non null) not
+  // null
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1},  // optional child struct
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, /*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 1},  // repeated field
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, /*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 3}));  // innter field
+
+  // Arrow schema: list(struct(child_list: list(struct(f0: bool f1: bool no-required )))
+  // not null) not null
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {GroupNode::Make(
+              "list", Repetition::REPEATED,
+              {GroupNode::Make(
+                  "element", Repetition::OPTIONAL,
+                  {PrimitiveNode::Make("f0", Repetition::OPTIONAL, ParquetType::BOOLEAN),
+                   PrimitiveNode::Make("f1", Repetition::REQUIRED,
+                                       ParquetType::BOOLEAN)})})},
+          LogicalType::List())}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1},
+                  // Def_level=2 is handled together with def_level=3
+                  // When decoding.  Def_level=2 indicate present but empty
+                  // list.  def_level=3 indicates a present element in the
+                  // list.
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, /*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 1},  // list field
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/4, /*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 3},  // inner struct field
+
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/5, /*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 3}));  // f0 bool field
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/1));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1},
+                  // Def_level=2 is handled together with def_level=3
+                  // When decoding.  Def_level=2 indicate present but empty
+                  // list.  def_level=3 indicates a present element in the
+                  // list.
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, /*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 1},  // list field
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/4, /*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 3},  // inner struct field
+
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/4, /*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 3}));  // f1 bool field
+
+  // Arrow schema: list(struct(child_list: list(bool not null)) not null) not null
+  // Legacy 2-level necoding (required for backwards compatibility.  See

Review comment:
       "encoding"

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1144,244 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, &arrow_schema));
 }
 
+::arrow::Result<std::deque<LevelInfo>> RootToTreeLeafLevels(
+    const SchemaManifest& manifest, int column_number) {
+  std::deque<LevelInfo> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front(field->level_info);
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const ::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(LevelInfo{/*null_slot_usage=*/1,
+                                            /*def_level=*/0, /*rep_level=*/0,
+                                            /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1,
+                                            /*rep_level=*/0,
+                                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: list(bool not null) not null
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 0},  // List Field
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1}));  //  primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  // Arrow schema: struct(child: struct(inner: boolean not null))
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: struct(child: struct(inner: boolean ))
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+
+  // Arrow schema: struct(child: struct(inner: boolean)) not null
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/0, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/0,
+                            /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  // Arrow schema: list(struct(child: struct(list(bool not null) not null)) non null) not
+  // null
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<LevelInfo> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(
+      levels,
+      ElementsAre(LevelInfo{/*null_slot_usage=*/1, /*def_level=*/1, /*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1},
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/2, /*rep_level=*/1,
+                            /*ancestor_list_def_level*/ 1},  // optional child struct
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, /*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 1},  // repeated field
+                  LevelInfo{/*null_slot_usage=*/1, /*def_level=*/3, /*rep_level=*/2,
+                            /*ancestor_list_def_level*/ 3}));  // innter field
+
+  // Arrow schema: list(struct(child_list: list(struct(f0: bool f1: bool no-required )))
+  // not null) not null

Review comment:
       The outer struct seems nullable, also can we use "not null" everywhere instead of "no-required"?

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, &arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const ::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field
+                          Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1}));  //  primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/3, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          Levels{/*def_level=*/2, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},  // optional child struct
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // repeated field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // innter field
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {GroupNode::Make(
+              "list", Repetition::REPEATED,
+              {GroupNode::Make(
+                  "element", Repetition::OPTIONAL,
+                  {PrimitiveNode::Make("f0", Repetition::OPTIONAL, ParquetType::BOOLEAN),
+                   PrimitiveNode::Make("f1", Repetition::REQUIRED,
+                                       ParquetType::BOOLEAN)})})},
+          ConvertedType::LIST)}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner struct field
+
+                          Levels{/*def_level=*/5, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f0 bool field
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/1));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner struct field
+
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f1 bool field
+
+  // Legacy 2-level necoding
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("bool", Repetition::REPEATED, ParquetType::BOOLEAN)},
+          ConvertedType::LIST)}));
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // inner struct field
+}
+
+TEST_F(TestLevels, ListErrors) {
+  {
+    ::arrow::Status error = MaybeSetParquetSchema(GroupNode::Make(
+        "child_list", Repetition::REPEATED,
+        {PrimitiveNode::Make("bool", Repetition::REPEATED, ParquetType::BOOLEAN)},
+        ConvertedType::LIST));
+    EXPECT_TRUE(error.IsInvalid());
+    std::string expected("LIST-annotated groups must not be repeated.");
+    EXPECT_EQ(error.message().substr(0, expected.size()), expected);
+  }

Review comment:
       However, I see this kind of comments in `path_internal_test.cc`:
   ```
     // Translates to parquet schema:
     // optional group bag {
     //   repeated group [unseen] (List) {
     //       required int64 Entries;
     //   }
     // }
   ```
   Should the List annotation be on the toplevel "bag" node, rather than on the middle "[unseen]" node?
   (also, what does "[unseen]" mean? shouldn't it be named "list" as per the Parquet spec?)

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, &arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const ::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field
+                          Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1}));  //  primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/3, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          Levels{/*def_level=*/2, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},  // optional child struct
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // repeated field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // innter field
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {GroupNode::Make(
+              "list", Repetition::REPEATED,
+              {GroupNode::Make(
+                  "element", Repetition::OPTIONAL,
+                  {PrimitiveNode::Make("f0", Repetition::OPTIONAL, ParquetType::BOOLEAN),
+                   PrimitiveNode::Make("f1", Repetition::REQUIRED,
+                                       ParquetType::BOOLEAN)})})},
+          ConvertedType::LIST)}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner struct field
+
+                          Levels{/*def_level=*/5, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f0 bool field
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/1));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner struct field
+
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f1 bool field
+
+  // Legacy 2-level necoding
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("bool", Repetition::REPEATED, ParquetType::BOOLEAN)},
+          ConvertedType::LIST)}));
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // inner struct field
+}
+
+TEST_F(TestLevels, ListErrors) {
+  {
+    ::arrow::Status error = MaybeSetParquetSchema(GroupNode::Make(
+        "child_list", Repetition::REPEATED,
+        {PrimitiveNode::Make("bool", Repetition::REPEATED, ParquetType::BOOLEAN)},
+        ConvertedType::LIST));
+    EXPECT_TRUE(error.IsInvalid());
+    std::string expected("LIST-annotated groups must not be repeated.");
+    EXPECT_EQ(error.message().substr(0, expected.size()), expected);
+  }

Review comment:
       Ok, I read the spec now, I understand better :-)

##########
File path: cpp/src/parquet/arrow/arrow_schema_test.cc
##########
@@ -1140,5 +1143,231 @@ TEST(TestFromParquetSchema, CorruptMetadata) {
   ASSERT_RAISES(IOError, FromParquetSchema(parquet_schema, props, &arrow_schema));
 }
 
+struct Levels {
+  int16_t def_level;
+  int16_t rep_level;
+  int16_t repeated_ancestor_def;
+  friend std::ostream& operator<<(std::ostream& os, const Levels& levels) {
+    // This print method is to silence valgrind issues.  What's printed
+    // is not important because all asserts happen directly on
+    // members.
+    os << "{def=" << levels.def_level << ", rep=" << levels.rep_level
+       << ", repeated_ancestor_def=" << levels.repeated_ancestor_def << "}";
+    return os;
+  }
+};
+
+bool operator==(const Levels& a, const Levels& b) {
+  return a.def_level == b.def_level && a.rep_level == b.rep_level &&
+         a.repeated_ancestor_def == b.repeated_ancestor_def;
+}
+
+::arrow::Result<std::deque<Levels>> RootToTreeLeafLevels(const SchemaManifest& manifest,
+                                                         int column_number) {
+  std::deque<Levels> out;
+  const SchemaField* field;
+  RETURN_NOT_OK(manifest.GetColumnField(column_number, &field));
+  while (field != nullptr) {
+    out.push_front({field->definition_level, field->repetition_level,
+                    field->repeated_ancestor_definition_level});
+    field = manifest.GetParent(field);
+  }
+  return out;
+}
+
+class TestLevels : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+
+  ::arrow::Status MaybeSetParquetSchema(const NodePtr& column) {
+    descriptor_.reset(new SchemaDescriptor());
+    manifest_.reset(new SchemaManifest());
+    descriptor_->Init(GroupNode::Make("root", Repetition::REQUIRED, {column}));
+    return SchemaManifest::Make(descriptor_.get(),
+                                std::shared_ptr<const ::arrow::KeyValueMetadata>(),
+                                ArrowReaderProperties(), manifest_.get());
+  }
+  void SetParquetSchema(const NodePtr& column) {
+    ASSERT_OK(MaybeSetParquetSchema(column));
+  }
+
+ protected:
+  std::unique_ptr<SchemaDescriptor> descriptor_;
+  std::unique_ptr<SchemaManifest> manifest_;
+};
+
+TEST_F(TestLevels, TestPrimitive) {
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REQUIRED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::OPTIONAL, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(
+      PrimitiveNode::Make("node_name", Repetition::REPEATED, ParquetType::BOOLEAN));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 0},  // List Field
+                          Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1}));  //  primitive field
+}
+
+TEST_F(TestLevels, TestSimpleGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REQUIRED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::OPTIONAL,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/3, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REQUIRED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::OPTIONAL, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels, ElementsAre(Levels{/*def_level=*/0, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/1, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0},
+                                  Levels{/*def_level=*/2, /*rep_level=*/0,
+                                         /*ancestor_list_def_level*/ 0}));
+}
+
+TEST_F(TestLevels, TestRepeatedGroups) {
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("inner", Repetition::REPEATED, ParquetType::BOOLEAN)})}));
+  ASSERT_OK_AND_ASSIGN(std::deque<Levels> levels,
+                       RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          Levels{/*def_level=*/2, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},  // optional child struct
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // repeated field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // innter field
+
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {GroupNode::Make(
+              "list", Repetition::REPEATED,
+              {GroupNode::Make(
+                  "element", Repetition::OPTIONAL,
+                  {PrimitiveNode::Make("f0", Repetition::OPTIONAL, ParquetType::BOOLEAN),
+                   PrimitiveNode::Make("f1", Repetition::REQUIRED,
+                                       ParquetType::BOOLEAN)})})},
+          ConvertedType::LIST)}));
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner struct field
+
+                          Levels{/*def_level=*/5, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f0 bool field
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/1));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3},  // inner struct field
+
+                          Levels{/*def_level=*/4, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // f1 bool field
+
+  // Legacy 2-level necoding
+  SetParquetSchema(GroupNode::Make(
+      "parent", Repetition::REPEATED,
+      {GroupNode::Make(
+          "child_list", Repetition::OPTIONAL,
+          {PrimitiveNode::Make("bool", Repetition::REPEATED, ParquetType::BOOLEAN)},
+          ConvertedType::LIST)}));
+
+  ASSERT_OK_AND_ASSIGN(levels, RootToTreeLeafLevels(*manifest_, /*column_number=*/0));
+  EXPECT_THAT(levels,
+              ElementsAre(Levels{/*def_level=*/1, /*rep_level=*/1,
+                                 /*ancestor_list_def_level*/ 1},
+                          // Def_ldevl=2 is skipped because it represents a null list.
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 1},  // list field
+                          Levels{/*def_level=*/3, /*rep_level=*/2,
+                                 /*ancestor_list_def_level*/ 3}));  // inner struct field
+}
+
+TEST_F(TestLevels, ListErrors) {
+  {
+    ::arrow::Status error = MaybeSetParquetSchema(GroupNode::Make(
+        "child_list", Repetition::REPEATED,
+        {PrimitiveNode::Make("bool", Repetition::REPEATED, ParquetType::BOOLEAN)},
+        ConvertedType::LIST));
+    EXPECT_TRUE(error.IsInvalid());
+    std::string expected("LIST-annotated groups must not be repeated.");
+    EXPECT_EQ(error.message().substr(0, expected.size()), expected);

Review comment:
       I see, thank you.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org