You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by we...@apache.org on 2017/04/25 13:40:50 UTC

parquet-cpp git commit: PARQUET-963: Return NotImplemented when attempting to read a struct field

Repository: parquet-cpp
Updated Branches:
  refs/heads/master a8dee1fe9 -> 4beac90a3


PARQUET-963: Return NotImplemented when attempting to read a struct field

cc @xhochy @itaiin @advancedxy We are not yet able to correctly to read structs. This showed up in ARROW-601, so this raises an exception for now

Author: Wes McKinney <we...@twosigma.com>

Closes #308 from wesm/PARQUET-963 and squashes the following commits:

da42932 [Wes McKinney] Return NotImplemented when attempting to read a struct field


Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/4beac90a
Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/4beac90a
Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/4beac90a

Branch: refs/heads/master
Commit: 4beac90a36283ee1d0b3f43aa322317d62add643
Parents: a8dee1f
Author: Wes McKinney <we...@twosigma.com>
Authored: Tue Apr 25 09:40:44 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Tue Apr 25 09:40:44 2017 -0400

----------------------------------------------------------------------
 src/parquet/arrow/arrow-reader-writer-test.cc | 18 +++---------------
 src/parquet/arrow/reader.cc                   | 15 ++++++++++-----
 2 files changed, 13 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/4beac90a/src/parquet/arrow/arrow-reader-writer-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/arrow-reader-writer-test.cc b/src/parquet/arrow/arrow-reader-writer-test.cc
index 7b63514..1d87606 100644
--- a/src/parquet/arrow/arrow-reader-writer-test.cc
+++ b/src/parquet/arrow/arrow-reader-writer-test.cc
@@ -1113,26 +1113,14 @@ class TestNestedSchemaRead : public ::testing::Test {
 
 TEST_F(TestNestedSchemaRead, ReadIntoTableFull) {
   std::shared_ptr<Table> table;
-  ASSERT_OK_NO_THROW(reader_->ReadTable(&table));
-  ASSERT_EQ(table->num_rows(), 0);
-  ASSERT_EQ(table->num_columns(), 2);
-  ASSERT_EQ(table->schema()->field(0)->type()->num_children(), 2);
+  ASSERT_RAISES(NotImplemented, reader_->ReadTable(&table));
 }
 
 TEST_F(TestNestedSchemaRead, ReadTablePartial) {
   std::shared_ptr<Table> table;
 
-  // columns: {group1.leaf1, leaf3}
-  ASSERT_OK_NO_THROW(reader_->ReadTable({0, 2}, &table));
-  ASSERT_EQ(table->num_rows(), 0);
-  ASSERT_EQ(table->num_columns(), 2);
-  ASSERT_EQ(table->schema()->field(0)->type()->num_children(), 1);
-
-  // columns: {group1.leaf1, group1.leaf2}
-  ASSERT_OK_NO_THROW(reader_->ReadTable({0, 1}, &table));
-  ASSERT_EQ(table->num_rows(), 0);
-  ASSERT_EQ(table->num_columns(), 1);
-  ASSERT_EQ(table->schema()->field(0)->type()->num_children(), 2);
+  ASSERT_RAISES(NotImplemented, reader_->ReadTable({0, 2}, &table));
+  ASSERT_RAISES(NotImplemented, reader_->ReadTable({0, 1}, &table));
 
   // columns: {leaf3}
   ASSERT_OK_NO_THROW(reader_->ReadTable({2}, &table));

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/4beac90a/src/parquet/arrow/reader.cc
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/reader.cc b/src/parquet/arrow/reader.cc
index 852649a..cc3e3fa 100644
--- a/src/parquet/arrow/reader.cc
+++ b/src/parquet/arrow/reader.cc
@@ -722,13 +722,18 @@ Status ColumnReader::Impl::InitValidBits(int batch_size) {
 
 Status ColumnReader::Impl::WrapIntoListArray(const int16_t* def_levels,
     const int16_t* rep_levels, int64_t total_levels_read, std::shared_ptr<Array>* array) {
-  if (descr_->max_repetition_level() > 0) {
-    std::shared_ptr<::arrow::Schema> arrow_schema;
-    RETURN_NOT_OK(
-        FromParquetSchema(input_->schema(), {input_->column_index()}, &arrow_schema));
+  std::shared_ptr<::arrow::Schema> arrow_schema;
+  RETURN_NOT_OK(
+      FromParquetSchema(input_->schema(), {input_->column_index()}, &arrow_schema));
+  std::shared_ptr<Field> current_field = arrow_schema->field(0);
+
+  if (current_field->type()->id() == ::arrow::Type::STRUCT) {
+    return Status::NotImplemented(
+        "Structs are not yet supported.");
+  }
 
+  if (descr_->max_repetition_level() > 0) {
     // Walk downwards to extract nullability
-    std::shared_ptr<Field> current_field = arrow_schema->field(0);
     std::vector<bool> nullable;
     std::vector<std::shared_ptr<::arrow::Int32Builder>> offset_builders;
     std::vector<std::shared_ptr<::arrow::BooleanBuilder>> valid_bits_builders;