You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ks...@apache.org on 2019/04/18 08:30:49 UTC
[arrow] branch master updated: ARROW-5177: [C++/Python] Check
column index when reading Parquet column
This is an automated email from the ASF dual-hosted git repository.
kszucs pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 661d1cf ARROW-5177: [C++/Python] Check column index when reading Parquet column
661d1cf is described below
commit 661d1cf8cf7854af515b5640c8de7ce125b45b89
Author: Antoine Pitrou <an...@python.org>
AuthorDate: Thu Apr 18 10:30:28 2019 +0200
ARROW-5177: [C++/Python] Check column index when reading Parquet column
Author: Antoine Pitrou <an...@python.org>
Closes #4163 from pitrou/ARROW-5177-parquet-read-column-invalid-index and squashes the following commits:
a3fdf2108 <Antoine Pitrou> ARROW-5177: Check column index when reading Parquet column
---
cpp/src/parquet/arrow/reader.cc | 7 +++++++
python/pyarrow/tests/test_parquet.py | 13 +++++++++++++
2 files changed, 20 insertions(+)
diff --git a/cpp/src/parquet/arrow/reader.cc b/cpp/src/parquet/arrow/reader.cc
index f891682..e2143c0 100644
--- a/cpp/src/parquet/arrow/reader.cc
+++ b/cpp/src/parquet/arrow/reader.cc
@@ -380,6 +380,13 @@ FileReader::~FileReader() {}
Status FileReader::Impl::GetColumn(int i, FileColumnIteratorFactory iterator_factory,
std::unique_ptr<ColumnReader>* out) {
+ if (i < 0 || i >= this->num_columns()) {
+ return Status::Invalid("Column index out of bounds (got ", i,
+ ", should be "
+ "between 0 and ",
+ this->num_columns() - 1, ")");
+ }
+
std::unique_ptr<FileColumnIterator> input(iterator_factory(i, reader_.get()));
bool read_dict = reader_properties_.read_dictionary(i);
diff --git a/python/pyarrow/tests/test_parquet.py b/python/pyarrow/tests/test_parquet.py
index 26d561f..3190b0a 100644
--- a/python/pyarrow/tests/test_parquet.py
+++ b/python/pyarrow/tests/test_parquet.py
@@ -2552,3 +2552,16 @@ def test_partitioned_dataset(tempdir):
partition_cols=['one', 'two'])
table = pq.ParquetDataset(path).read()
pq.write_table(table, path / "output.parquet")
+
+
+def test_read_column_invalid_index():
+ table = pa.Table.from_arrays([pa.array([4, 5]), pa.array(["foo", "bar"])],
+ ['ints', 'strs'])
+ bio = pa.BufferOutputStream()
+ pq.write_table(table, bio)
+ f = pq.ParquetFile(bio.getvalue())
+ assert f.reader.read_column(0).to_pylist() == [4, 5]
+ assert f.reader.read_column(1).to_pylist() == ["foo", "bar"]
+ for index in (-1, 2):
+ with pytest.raises((ValueError, IndexError)):
+ f.reader.read_column(index)