You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by am...@apache.org on 2022/04/13 10:31:02 UTC

[arrow] branch master updated: ARROW-16025: [Python][C++] Fix segmentation fault when closing ORCFileWritter

This is an automated email from the ASF dual-hosted git repository.

amolina pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new ccaef092c2 ARROW-16025: [Python][C++] Fix segmentation fault when closing ORCFileWritter
ccaef092c2 is described below

commit ccaef092c297b8af5d375bc542d908cda8fd415e
Author: Raúl Cumplido <ra...@gmail.com>
AuthorDate: Wed Apr 13 12:30:53 2022 +0200

    ARROW-16025: [Python][C++] Fix segmentation fault when closing ORCFileWritter
    
    This PR fixes ARROW-16025 (Calling nonexistent method of pyarrow.orc.ORCWriter causes segfault).
    
    The segmentation fault can be reproduced with the test:
    ```python
    def test_wrong_usage_orc_writer(tempdir):
        from pyarrow import orc
    
        path = str(tempdir / 'test.orc')
        with orc.ORCWriter(path) as writer:
            with pytest.raises(AttributeError):
                writer.test()
    ```
    
    The issue was that closing `ORCFileWriter` without actually writing was trying to close a null writer (`writer_->close();`) causing the segmentation fault.
    
    Closes #12816 from raulcd/ARROW-16025
    
    Authored-by: Raúl Cumplido <ra...@gmail.com>
    Signed-off-by: Alessandro Molina <am...@turbogears.org>
---
 cpp/src/arrow/adapters/orc/adapter.cc      |  4 +++-
 cpp/src/arrow/adapters/orc/adapter_test.cc | 17 +++++++++++++++++
 python/pyarrow/tests/test_orc.py           | 21 +++++++++++++++++++++
 3 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/cpp/src/arrow/adapters/orc/adapter.cc b/cpp/src/arrow/adapters/orc/adapter.cc
index 41bb7f25cb..7d3309e4e7 100644
--- a/cpp/src/arrow/adapters/orc/adapter.cc
+++ b/cpp/src/arrow/adapters/orc/adapter.cc
@@ -837,7 +837,9 @@ class ORCFileWriter::Impl {
   }
 
   Status Close() {
-    writer_->close();
+    if (writer_) {
+      writer_->close();
+    }
     return Status::OK();
   }
 
diff --git a/cpp/src/arrow/adapters/orc/adapter_test.cc b/cpp/src/arrow/adapters/orc/adapter_test.cc
index 8d2ad777ca..6b9aa4740b 100644
--- a/cpp/src/arrow/adapters/orc/adapter_test.cc
+++ b/cpp/src/arrow/adapters/orc/adapter_test.cc
@@ -406,6 +406,23 @@ TEST(TestAdapterRead, ReadIntAndStringFileMultipleStripes) {
 
 // Trivial
 
+class TestORCWriterTrivialNoWrite : public ::testing::Test {};
+TEST_F(TestORCWriterTrivialNoWrite, noWrite) {
+  EXPECT_OK_AND_ASSIGN(auto buffer_output_stream,
+                       io::BufferOutputStream::Create(kDefaultSmallMemStreamSize / 16));
+  auto write_options = adapters::orc::WriteOptions();
+#ifdef ARROW_WITH_SNAPPY
+  write_options.compression = Compression::SNAPPY;
+#else
+  write_options.compression = Compression::UNCOMPRESSED;
+#endif
+  write_options.file_version = adapters::orc::FileVersion(0, 11);
+  write_options.compression_block_size = 32768;
+  write_options.row_index_stride = 5000;
+  EXPECT_OK_AND_ASSIGN(auto writer, adapters::orc::ORCFileWriter::Open(
+                                        buffer_output_stream.get(), write_options));
+  ARROW_EXPECT_OK(writer->Close());
+}
 class TestORCWriterTrivialNoConversion : public ::testing::Test {
  public:
   TestORCWriterTrivialNoConversion() {
diff --git a/python/pyarrow/tests/test_orc.py b/python/pyarrow/tests/test_orc.py
index abdd8bc11f..866cc01452 100644
--- a/python/pyarrow/tests/test_orc.py
+++ b/python/pyarrow/tests/test_orc.py
@@ -613,3 +613,24 @@ def test_column_selection(tempdir):
 
     with pytest.raises(ValueError):
         orc_file.read(columns=[5])
+
+
+def test_wrong_usage_orc_writer(tempdir):
+    from pyarrow import orc
+
+    path = str(tempdir / 'test.orc')
+    with orc.ORCWriter(path) as writer:
+        with pytest.raises(AttributeError):
+            writer.test()
+
+
+def test_orc_writer_with_null_arrays(tempdir):
+    from pyarrow import orc
+    import pyarrow as pa
+
+    path = str(tempdir / 'test.orc')
+    a = pa.array([1, None, 3, None])
+    b = pa.array([None, None, None, None])
+    table = pa.table({"int64": a, "utf8": b})
+    with pytest.raises(pa.ArrowNotImplementedError):
+        orc.write_table(table, path)