You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2019/07/13 17:02:20 UTC

[arrow] 25/43: ARROW-5863: [Python] Use atexit module for extension type finalization to avoid segfault

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch maint-0.14.x
in repository https://gitbox.apache.org/repos/asf/arrow.git

commit e698b052314a8253dc77dcbc57fba3ccb6eaf74d
Author: Wes McKinney <we...@apache.org>
AuthorDate: Mon Jul 8 16:27:51 2019 -0500

    ARROW-5863: [Python] Use atexit module for extension type finalization to avoid segfault
    
    As reported on JIRA, the following script provokes a segfault
    
    ```
    #! /usr/bin/env python
    
    import pyarrow
    import sys
    del sys.modules['pyarrow.lib']
    ```
    
    For some reason this does not trigger the destruction of the private `_ExtensionTypesInitializer` object. Not sure why (Antoine may know). Using the atexit module instead seems to do the trick
    
    Author: Wes McKinney <we...@apache.org>
    
    Closes #4824 from wesm/ARROW-5863 and squashes the following commits:
    
    ba60578fa <Wes McKinney> Use atexit module for extension type finalization
---
 python/pyarrow/types.pxi | 51 +++++++++++++++++++++++-------------------------
 1 file changed, 24 insertions(+), 27 deletions(-)

diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi
index 79e9713..0c3f8b0 100644
--- a/python/pyarrow/types.pxi
+++ b/python/pyarrow/types.pxi
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import atexit
 import re
 import warnings
 
@@ -589,32 +590,6 @@ cdef class UnknownExtensionType(ExtensionType):
         return self.serialized
 
 
-cdef class _ExtensionTypesInitializer:
-    #
-    # A private object that handles process-wide registration of the Python
-    # ExtensionType.
-    #
-
-    def __cinit__(self):
-        cdef:
-            DataType storage_type
-            shared_ptr[CExtensionType] cpy_ext_type
-
-        # Make a dummy C++ ExtensionType
-        storage_type = null()
-        check_status(CPyExtensionType.FromClass(storage_type.sp_type,
-                                                ExtensionType, &cpy_ext_type))
-        check_status(
-            RegisterPyExtensionType(<shared_ptr[CDataType]> cpy_ext_type))
-
-    def __dealloc__(self):
-        # This needs to be done explicitly before the Python interpreter is
-        # finalized.  If the C++ type is destroyed later in the process
-        # teardown stage, it will invoke CPython APIs such as Py_DECREF
-        # with a destroyed interpreter.
-        check_status(UnregisterPyExtensionType())
-
-
 cdef class Field:
     """
     A named field, with a data type, nullability, and optional metadata.
@@ -1863,4 +1838,26 @@ def is_float_value(object obj):
     return IsPyFloat(obj)
 
 
-_extension_types_initializer = _ExtensionTypesInitializer()
+def _register_py_extension_type():
+    cdef:
+        DataType storage_type
+        shared_ptr[CExtensionType] cpy_ext_type
+
+    # Make a dummy C++ ExtensionType
+    storage_type = null()
+    check_status(CPyExtensionType.FromClass(storage_type.sp_type,
+                                            ExtensionType, &cpy_ext_type))
+    check_status(
+        RegisterPyExtensionType(<shared_ptr[CDataType]> cpy_ext_type))
+
+
+def _unregister_py_extension_type():
+    # This needs to be done explicitly before the Python interpreter is
+    # finalized.  If the C++ type is destroyed later in the process
+    # teardown stage, it will invoke CPython APIs such as Py_DECREF
+    # with a destroyed interpreter.
+    check_status(UnregisterPyExtensionType())
+
+
+_register_py_extension_type()
+atexit.register(_unregister_py_extension_type)