You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by am...@apache.org on 2022/04/13 10:31:02 UTC
[arrow] branch master updated: ARROW-16025: [Python][C++] Fix segmentation fault when closing ORCFileWritter
This is an automated email from the ASF dual-hosted git repository.
amolina pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new ccaef092c2 ARROW-16025: [Python][C++] Fix segmentation fault when closing ORCFileWritter
ccaef092c2 is described below
commit ccaef092c297b8af5d375bc542d908cda8fd415e
Author: Raúl Cumplido <ra...@gmail.com>
AuthorDate: Wed Apr 13 12:30:53 2022 +0200
ARROW-16025: [Python][C++] Fix segmentation fault when closing ORCFileWritter
This PR fixes ARROW-16025 (Calling nonexistent method of pyarrow.orc.ORCWriter causes segfault).
The segmentation fault can be reproduced with the test:
```python
def test_wrong_usage_orc_writer(tempdir):
from pyarrow import orc
path = str(tempdir / 'test.orc')
with orc.ORCWriter(path) as writer:
with pytest.raises(AttributeError):
writer.test()
```
The issue was that closing `ORCFileWriter` without actually writing was trying to close a null writer (`writer_->close();`) causing the segmentation fault.
Closes #12816 from raulcd/ARROW-16025
Authored-by: Raúl Cumplido <ra...@gmail.com>
Signed-off-by: Alessandro Molina <am...@turbogears.org>
---
cpp/src/arrow/adapters/orc/adapter.cc | 4 +++-
cpp/src/arrow/adapters/orc/adapter_test.cc | 17 +++++++++++++++++
python/pyarrow/tests/test_orc.py | 21 +++++++++++++++++++++
3 files changed, 41 insertions(+), 1 deletion(-)
diff --git a/cpp/src/arrow/adapters/orc/adapter.cc b/cpp/src/arrow/adapters/orc/adapter.cc
index 41bb7f25cb..7d3309e4e7 100644
--- a/cpp/src/arrow/adapters/orc/adapter.cc
+++ b/cpp/src/arrow/adapters/orc/adapter.cc
@@ -837,7 +837,9 @@ class ORCFileWriter::Impl {
}
Status Close() {
- writer_->close();
+ if (writer_) {
+ writer_->close();
+ }
return Status::OK();
}
diff --git a/cpp/src/arrow/adapters/orc/adapter_test.cc b/cpp/src/arrow/adapters/orc/adapter_test.cc
index 8d2ad777ca..6b9aa4740b 100644
--- a/cpp/src/arrow/adapters/orc/adapter_test.cc
+++ b/cpp/src/arrow/adapters/orc/adapter_test.cc
@@ -406,6 +406,23 @@ TEST(TestAdapterRead, ReadIntAndStringFileMultipleStripes) {
// Trivial
+class TestORCWriterTrivialNoWrite : public ::testing::Test {};
+TEST_F(TestORCWriterTrivialNoWrite, noWrite) {
+ EXPECT_OK_AND_ASSIGN(auto buffer_output_stream,
+ io::BufferOutputStream::Create(kDefaultSmallMemStreamSize / 16));
+ auto write_options = adapters::orc::WriteOptions();
+#ifdef ARROW_WITH_SNAPPY
+ write_options.compression = Compression::SNAPPY;
+#else
+ write_options.compression = Compression::UNCOMPRESSED;
+#endif
+ write_options.file_version = adapters::orc::FileVersion(0, 11);
+ write_options.compression_block_size = 32768;
+ write_options.row_index_stride = 5000;
+ EXPECT_OK_AND_ASSIGN(auto writer, adapters::orc::ORCFileWriter::Open(
+ buffer_output_stream.get(), write_options));
+ ARROW_EXPECT_OK(writer->Close());
+}
class TestORCWriterTrivialNoConversion : public ::testing::Test {
public:
TestORCWriterTrivialNoConversion() {
diff --git a/python/pyarrow/tests/test_orc.py b/python/pyarrow/tests/test_orc.py
index abdd8bc11f..866cc01452 100644
--- a/python/pyarrow/tests/test_orc.py
+++ b/python/pyarrow/tests/test_orc.py
@@ -613,3 +613,24 @@ def test_column_selection(tempdir):
with pytest.raises(ValueError):
orc_file.read(columns=[5])
+
+
+def test_wrong_usage_orc_writer(tempdir):
+ from pyarrow import orc
+
+ path = str(tempdir / 'test.orc')
+ with orc.ORCWriter(path) as writer:
+ with pytest.raises(AttributeError):
+ writer.test()
+
+
+def test_orc_writer_with_null_arrays(tempdir):
+ from pyarrow import orc
+ import pyarrow as pa
+
+ path = str(tempdir / 'test.orc')
+ a = pa.array([1, None, 3, None])
+ b = pa.array([None, None, None, None])
+ table = pa.table({"int64": a, "utf8": b})
+ with pytest.raises(pa.ArrowNotImplementedError):
+ orc.write_table(table, path)