You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2018/12/24 22:49:30 UTC

[arrow] branch master updated: PARQUET-1481: [C++] Throw exception when encountering bad Thrift metadata in RecordReader

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new cfaea42  PARQUET-1481: [C++] Throw exception when encountering bad Thrift metadata in RecordReader
cfaea42 is described below

commit cfaea429d0f2d3d9baa2a10d6da759ffd0f9d7f8
Author: Wes McKinney <we...@apache.org>
AuthorDate: Mon Dec 24 16:49:22 2018 -0600

    PARQUET-1481: [C++] Throw exception when encountering bad Thrift metadata in RecordReader
    
    Author: Wes McKinney <we...@apache.org>
    
    Closes #3242 from wesm/PARQUET-1481 and squashes the following commits:
    
    b074227ba <Wes McKinney> Add test case with example corrupt data file
    59400a2f1 <Wes McKinney> Throw exception when encountering bad Thrift metadata in RecordReader
---
 cpp/src/parquet/arrow/arrow-reader-writer-test.cc | 29 ++++++++++++++++-------
 cpp/src/parquet/arrow/record_reader.cc            |  8 +++++--
 cpp/submodules/parquet-testing                    |  2 +-
 3 files changed, 28 insertions(+), 11 deletions(-)

diff --git a/cpp/src/parquet/arrow/arrow-reader-writer-test.cc b/cpp/src/parquet/arrow/arrow-reader-writer-test.cc
index 4e62a22..bb97632 100644
--- a/cpp/src/parquet/arrow/arrow-reader-writer-test.cc
+++ b/cpp/src/parquet/arrow/arrow-reader-writer-test.cc
@@ -2291,21 +2291,34 @@ TEST(TestImpalaConversion, ArrowTimestampToImpalaTimestamp) {
   ASSERT_EQ(expected, calculated);
 }
 
-TEST(TestArrowReaderAdHoc, Int96BadMemoryAccess) {
-  // PARQUET-995
+void TryReadDataFile(const std::string& testing_file_path, bool should_succeed = true) {
   std::string dir_string(test::get_data_dir());
   std::stringstream ss;
-  ss << dir_string << "/"
-     << "alltypes_plain.parquet";
+  ss << dir_string << "/" << testing_file_path;
   auto path = ss.str();
 
   auto pool = ::arrow::default_memory_pool();
 
   std::unique_ptr<FileReader> arrow_reader;
-  ASSERT_NO_THROW(
-      arrow_reader.reset(new FileReader(pool, ParquetFileReader::OpenFile(path, false))));
-  std::shared_ptr<::arrow::Table> table;
-  ASSERT_OK_NO_THROW(arrow_reader->ReadTable(&table));
+  try {
+    arrow_reader.reset(new FileReader(pool, ParquetFileReader::OpenFile(path, false)));
+    std::shared_ptr<::arrow::Table> table;
+    ASSERT_OK(arrow_reader->ReadTable(&table));
+  } catch (const ParquetException& e) {
+    if (should_succeed) {
+      FAIL() << "Exception thrown when reading file: " << e.what();
+    }
+  }
+}
+
+TEST(TestArrowReaderAdHoc, Int96BadMemoryAccess) {
+  // PARQUET-995
+  TryReadDataFile("alltypes_plain.parquet");
+}
+
+TEST(TestArrowReaderAdHoc, CorruptedSchema) {
+  // PARQUET-1481
+  TryReadDataFile("bad_data/PARQUET-1481.parquet", false /* should_succeed */);
 }
 
 class TestArrowReaderAdHocSparkAndHvr
diff --git a/cpp/src/parquet/arrow/record_reader.cc b/cpp/src/parquet/arrow/record_reader.cc
index d1bf2c5..4a988da 100644
--- a/cpp/src/parquet/arrow/record_reader.cc
+++ b/cpp/src/parquet/arrow/record_reader.cc
@@ -850,8 +850,12 @@ std::shared_ptr<RecordReader> RecordReader::Make(const ColumnDescriptor* descr,
     case Type::FIXED_LEN_BYTE_ARRAY:
       return std::shared_ptr<RecordReader>(
           new RecordReader(new TypedRecordReader<FLBAType>(descr, pool)));
-    default:
-      DCHECK(false);
+    default: {
+      // PARQUET-1481: This can occur if the file is corrupt
+      std::stringstream ss;
+      ss << "Invalid physical column type: " << static_cast<int>(descr->physical_type());
+      throw ParquetException(ss.str());
+    }
   }
   // Unreachable code, but supress compiler warning
   return nullptr;
diff --git a/cpp/submodules/parquet-testing b/cpp/submodules/parquet-testing
index 92a8e6c..8eb0213 160000
--- a/cpp/submodules/parquet-testing
+++ b/cpp/submodules/parquet-testing
@@ -1 +1 @@
-Subproject commit 92a8e6c2efdce1925c605d6313994db2c94478fb
+Subproject commit 8eb0213c491752c9bbb1b884fcbb21deb548e464