You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2018/12/24 22:49:30 UTC
[arrow] branch master updated: PARQUET-1481: [C++] Throw exception
when encountering bad Thrift metadata in RecordReader
This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new cfaea42 PARQUET-1481: [C++] Throw exception when encountering bad Thrift metadata in RecordReader
cfaea42 is described below
commit cfaea429d0f2d3d9baa2a10d6da759ffd0f9d7f8
Author: Wes McKinney <we...@apache.org>
AuthorDate: Mon Dec 24 16:49:22 2018 -0600
PARQUET-1481: [C++] Throw exception when encountering bad Thrift metadata in RecordReader
Author: Wes McKinney <we...@apache.org>
Closes #3242 from wesm/PARQUET-1481 and squashes the following commits:
b074227ba <Wes McKinney> Add test case with example corrupt data file
59400a2f1 <Wes McKinney> Throw exception when encountering bad Thrift metadata in RecordReader
---
cpp/src/parquet/arrow/arrow-reader-writer-test.cc | 29 ++++++++++++++++-------
cpp/src/parquet/arrow/record_reader.cc | 8 +++++--
cpp/submodules/parquet-testing | 2 +-
3 files changed, 28 insertions(+), 11 deletions(-)
diff --git a/cpp/src/parquet/arrow/arrow-reader-writer-test.cc b/cpp/src/parquet/arrow/arrow-reader-writer-test.cc
index 4e62a22..bb97632 100644
--- a/cpp/src/parquet/arrow/arrow-reader-writer-test.cc
+++ b/cpp/src/parquet/arrow/arrow-reader-writer-test.cc
@@ -2291,21 +2291,34 @@ TEST(TestImpalaConversion, ArrowTimestampToImpalaTimestamp) {
ASSERT_EQ(expected, calculated);
}
-TEST(TestArrowReaderAdHoc, Int96BadMemoryAccess) {
- // PARQUET-995
+void TryReadDataFile(const std::string& testing_file_path, bool should_succeed = true) {
std::string dir_string(test::get_data_dir());
std::stringstream ss;
- ss << dir_string << "/"
- << "alltypes_plain.parquet";
+ ss << dir_string << "/" << testing_file_path;
auto path = ss.str();
auto pool = ::arrow::default_memory_pool();
std::unique_ptr<FileReader> arrow_reader;
- ASSERT_NO_THROW(
- arrow_reader.reset(new FileReader(pool, ParquetFileReader::OpenFile(path, false))));
- std::shared_ptr<::arrow::Table> table;
- ASSERT_OK_NO_THROW(arrow_reader->ReadTable(&table));
+ try {
+ arrow_reader.reset(new FileReader(pool, ParquetFileReader::OpenFile(path, false)));
+ std::shared_ptr<::arrow::Table> table;
+ ASSERT_OK(arrow_reader->ReadTable(&table));
+ } catch (const ParquetException& e) {
+ if (should_succeed) {
+ FAIL() << "Exception thrown when reading file: " << e.what();
+ }
+ }
+}
+
+TEST(TestArrowReaderAdHoc, Int96BadMemoryAccess) {
+ // PARQUET-995
+ TryReadDataFile("alltypes_plain.parquet");
+}
+
+TEST(TestArrowReaderAdHoc, CorruptedSchema) {
+ // PARQUET-1481
+ TryReadDataFile("bad_data/PARQUET-1481.parquet", false /* should_succeed */);
}
class TestArrowReaderAdHocSparkAndHvr
diff --git a/cpp/src/parquet/arrow/record_reader.cc b/cpp/src/parquet/arrow/record_reader.cc
index d1bf2c5..4a988da 100644
--- a/cpp/src/parquet/arrow/record_reader.cc
+++ b/cpp/src/parquet/arrow/record_reader.cc
@@ -850,8 +850,12 @@ std::shared_ptr<RecordReader> RecordReader::Make(const ColumnDescriptor* descr,
case Type::FIXED_LEN_BYTE_ARRAY:
return std::shared_ptr<RecordReader>(
new RecordReader(new TypedRecordReader<FLBAType>(descr, pool)));
- default:
- DCHECK(false);
+ default: {
+ // PARQUET-1481: This can occur if the file is corrupt
+ std::stringstream ss;
+ ss << "Invalid physical column type: " << static_cast<int>(descr->physical_type());
+ throw ParquetException(ss.str());
+ }
}
// Unreachable code, but supress compiler warning
return nullptr;
diff --git a/cpp/submodules/parquet-testing b/cpp/submodules/parquet-testing
index 92a8e6c..8eb0213 160000
--- a/cpp/submodules/parquet-testing
+++ b/cpp/submodules/parquet-testing
@@ -1 +1 @@
-Subproject commit 92a8e6c2efdce1925c605d6313994db2c94478fb
+Subproject commit 8eb0213c491752c9bbb1b884fcbb21deb548e464