You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ks...@apache.org on 2021/05/17 11:37:10 UTC
[arrow] 04/13: ARROW-12622: [Python] Fix segfault in read_csv when
not on main thread
This is an automated email from the ASF dual-hosted git repository.
kszucs pushed a commit to branch maint-4.0.x
in repository https://gitbox.apache.org/repos/asf/arrow.git
commit dbf16e3de7fa4f03c61a9b4d9b2743fe77ad91c0
Author: David Li <li...@gmail.com>
AuthorDate: Mon May 3 16:53:16 2021 +0200
ARROW-12622: [Python] Fix segfault in read_csv when not on main thread
An uninitialized StopToken caused segfaults if you ever called read_csv with cancellation disabled or when not on the main thread (e.g. if used in a Flight server). If we have a 4.0.1 I think this qualifies as a regression.
Closes #10227 from lidavidm/arrow-12622
Authored-by: David Li <li...@gmail.com>
Signed-off-by: Antoine Pitrou <an...@python.org>
---
python/pyarrow/error.pxi | 2 +-
python/pyarrow/tests/test_csv.py | 8 ++++++++
2 files changed, 9 insertions(+), 1 deletion(-)
diff --git a/python/pyarrow/error.pxi b/python/pyarrow/error.pxi
index f9e45f2..2866848 100644
--- a/python/pyarrow/error.pxi
+++ b/python/pyarrow/error.pxi
@@ -188,8 +188,8 @@ cdef class SignalStopHandler:
if signal.getsignal(sig) not in (signal.SIG_DFL,
signal.SIG_IGN, None)]
+ self._stop_token = StopToken()
if not self._signals.empty():
- self._stop_token = StopToken()
self._stop_token.init(GetResultValue(
SetSignalStopSource()).token())
self._enabled = True
diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py
index 395f948..34ab556 100644
--- a/python/pyarrow/tests/test_csv.py
+++ b/python/pyarrow/tests/test_csv.py
@@ -942,6 +942,14 @@ class BaseTestCSVRead:
assert isinstance(e, pa.ArrowCancelled)
assert e.signum == signal.SIGINT
+ def test_cancellation_disabled(self):
+ # ARROW-12622: reader would segfault when the cancelling signal
+ # handler was not enabled (e.g. if disabled, or if not on the
+ # main thread)
+ t = threading.Thread(target=lambda: self.read_bytes(b"f64\n0.1"))
+ t.start()
+ t.join()
+
class TestSerialCSVRead(BaseTestCSVRead, unittest.TestCase):