You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ks...@apache.org on 2021/05/17 11:37:10 UTC

[arrow] 04/13: ARROW-12622: [Python] Fix segfault in read_csv when not on main thread

This is an automated email from the ASF dual-hosted git repository.

kszucs pushed a commit to branch maint-4.0.x
in repository https://gitbox.apache.org/repos/asf/arrow.git

commit dbf16e3de7fa4f03c61a9b4d9b2743fe77ad91c0
Author: David Li <li...@gmail.com>
AuthorDate: Mon May 3 16:53:16 2021 +0200

    ARROW-12622: [Python] Fix segfault in read_csv when not on main thread
    
    An uninitialized StopToken caused segfaults if you ever called read_csv with cancellation disabled or when not on the main thread (e.g. if used in a Flight server). If we have a 4.0.1 I think this qualifies as a regression.
    
    Closes #10227 from lidavidm/arrow-12622
    
    Authored-by: David Li <li...@gmail.com>
    Signed-off-by: Antoine Pitrou <an...@python.org>
---
 python/pyarrow/error.pxi         | 2 +-
 python/pyarrow/tests/test_csv.py | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/python/pyarrow/error.pxi b/python/pyarrow/error.pxi
index f9e45f2..2866848 100644
--- a/python/pyarrow/error.pxi
+++ b/python/pyarrow/error.pxi
@@ -188,8 +188,8 @@ cdef class SignalStopHandler:
                 if signal.getsignal(sig) not in (signal.SIG_DFL,
                                                  signal.SIG_IGN, None)]
 
+        self._stop_token = StopToken()
         if not self._signals.empty():
-            self._stop_token = StopToken()
             self._stop_token.init(GetResultValue(
                 SetSignalStopSource()).token())
             self._enabled = True
diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py
index 395f948..34ab556 100644
--- a/python/pyarrow/tests/test_csv.py
+++ b/python/pyarrow/tests/test_csv.py
@@ -942,6 +942,14 @@ class BaseTestCSVRead:
         assert isinstance(e, pa.ArrowCancelled)
         assert e.signum == signal.SIGINT
 
+    def test_cancellation_disabled(self):
+        # ARROW-12622: reader would segfault when the cancelling signal
+        # handler was not enabled (e.g. if disabled, or if not on the
+        # main thread)
+        t = threading.Thread(target=lambda: self.read_bytes(b"f64\n0.1"))
+        t.start()
+        t.join()
+
 
 class TestSerialCSVRead(BaseTestCSVRead, unittest.TestCase):