You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by tq...@apache.org on 2021/05/15 01:17:41 UTC

[tvm] branch main updated: [RUNTIME] Improve signal handling in python env. (#7919)

This is an automated email from the ASF dual-hosted git repository.

tqchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git


The following commit(s) were added to refs/heads/main by this push:
     new dfe4ceb  [RUNTIME] Improve signal handling in python env. (#7919)
dfe4ceb is described below

commit dfe4cebbdadab3d4e6e6ba3951276a51a4ffeaf6
Author: Tianqi Chen <tq...@users.noreply.github.com>
AuthorDate: Fri May 14 21:17:14 2021 -0400

    [RUNTIME] Improve signal handling in python env. (#7919)
    
    * [RUNTIME] Improve signal handling in python env.
    
    Python execution environment handles the signal by caching
    the signal a state and invokes the handler when execution
    goes into the python interpreter.
    
    This model can cause problem when runnning a long running
    c++ function. As keyboard interrupt can only be caught in the end.
    
    Additionally, because python registered special signal handlers.
    Socket operations can return EINTR that needs to be explicitly
    retried when the interrupt is not a KeyboardInterrupt.
    
    This PR adds the following changes to resolve these problems.
    
    - Allow execution env(python) to register CheckSignals function
      to the TVM runtime.
    - Add runtime::EnvCheckSignals to check the signal error.
    - Add retry when EINTR is encountered in socket.
    - Register the python C API functions in cython mode.
    
    To testout the EnvCheckSignals, run the following code
    
    ```python
    import tvm.testing
    tvm.testing.run_check_signal(10)
    ```
    
    Note that the C API functions are only registered in cython FFI mode
    because ctypes have problems invoking these functions. This however
    won't affect the correctness, but will defer the interrupt handling
    to function return sites.
    
    Co-authored-by: Andrew Reusch <ar...@octoml.ai>
    Co-authored-by: Robert Kimball <bo...@gmail.com>
    
    * Address comments
    
    * Alternative implementation that preserves python exception.
    
    * Address comments
    
    * Update check signals
    
    Co-authored-by: Andrew Reusch <ar...@octoml.ai>
    Co-authored-by: Robert Kimball <bo...@gmail.com>
---
 include/tvm/runtime/c_backend_api.h     | 14 ++++++
 include/tvm/runtime/c_runtime_api.h     | 67 +++++++++++++++-------------
 include/tvm/runtime/logging.h           | 18 ++++++++
 include/tvm/runtime/registry.h          | 44 +++++++++++++++++++
 python/tvm/_ffi/_cython/base.pxi        | 28 +++++++++++-
 python/tvm/_ffi/_cython/packed_func.pxi |  1 +
 python/tvm/_ffi/base.py                 |  2 +
 src/runtime/registry.cc                 | 77 +++++++++++++++++++++++++++++++++
 src/runtime/runtime_base.h              | 28 +++++++-----
 src/support/ffi_testing.cc              | 10 +++++
 src/support/socket.h                    | 57 +++++++++++++++++++++---
 11 files changed, 296 insertions(+), 50 deletions(-)

diff --git a/include/tvm/runtime/c_backend_api.h b/include/tvm/runtime/c_backend_api.h
index f74b2d3..bb6ff1d 100644
--- a/include/tvm/runtime/c_backend_api.h
+++ b/include/tvm/runtime/c_backend_api.h
@@ -61,6 +61,7 @@ typedef int (*TVMBackendPackedCFunc)(TVMValue* args, int* type_codes, int num_ar
  * \return 0 when no error is thrown, -1 when failure happens
  */
 TVM_DLL int TVMBackendGetFuncFromEnv(void* mod_node, const char* func_name, TVMFunctionHandle* out);
+
 /*!
  * \brief Backend function to register system-wide library symbol.
  *
@@ -100,6 +101,19 @@ TVM_DLL void* TVMBackendAllocWorkspace(int device_type, int device_id, uint64_t
 TVM_DLL int TVMBackendFreeWorkspace(int device_type, int device_id, void* ptr);
 
 /*!
+ * \brief Backend function to register execution environment(e.g. python)
+ *        specific C APIs.
+ *
+ * \note  We only register the C API function when absolutely necessary (e.g. when signal handler
+ *  cannot trap back into python). In most cases we should use the PackedFunc FFI.
+ *
+ * \param name The name of the symbol
+ * \param ptr The symbol address.
+ * \return 0 when no error is thrown, -1 when failure happens
+ */
+TVM_DLL int TVMBackendRegisterEnvCAPI(const char* name, void* ptr);
+
+/*!
  * \brief Environment for TVM parallel task.
  */
 typedef struct {
diff --git a/include/tvm/runtime/c_runtime_api.h b/include/tvm/runtime/c_runtime_api.h
index 44dba4d..17d1ba2 100644
--- a/include/tvm/runtime/c_runtime_api.h
+++ b/include/tvm/runtime/c_runtime_api.h
@@ -32,6 +32,11 @@
  *  The common flow is:
  *   - Use TVMFuncListGlobalNames to get global function name
  *   - Use TVMFuncCall to call these functions.
+ *
+ *  Possible return values of the API functions:
+ *  * 0: success
+ *  * -1: the error can be retrieved through TVMGetLastError.
+ *  * -2: a frontend error occurred and recorded in the frontend.
  */
 #ifndef TVM_RUNTIME_C_RUNTIME_API_H_
 #define TVM_RUNTIME_C_RUNTIME_API_H_
@@ -178,7 +183,7 @@ TVM_DLL void TVMAPISetLastError(const char* msg);
 /*!
  * \brief return str message of the last error
  *  all function in this file will return 0 when success
- *  and -1 when an error occurred,
+ *  and nonzero when an error occurred,
  *  TVMGetLastError can be called to retrieve the error
  *
  *  this function is threadsafe and can be called by different thread
@@ -191,7 +196,7 @@ TVM_DLL const char* TVMGetLastError(void);
  * \param format The format of the module.
  * \param out The result module
  *
- * \return 0 when success, -1 when failure happens
+ * \return 0 when success, nonzero when failure happens
  * \note The resulting module do not contain import relation.
  *  It can be reconstructed by TVMModImport.
  */
@@ -203,7 +208,7 @@ TVM_DLL int TVMModLoadFromFile(const char* file_name, const char* format, TVMMod
  *
  * \param mod The module handle.
  * \param dep The dependent module to be imported.
- * \return 0 when success, -1 when failure happens
+ * \return 0 when success, nonzero when failure happens
  */
 TVM_DLL int TVMModImport(TVMModuleHandle mod, TVMModuleHandle dep);
 
@@ -213,7 +218,7 @@ TVM_DLL int TVMModImport(TVMModuleHandle mod, TVMModuleHandle dep);
  * \param func_name The name of the function.
  * \param query_imports Whether to query imported modules
  * \param out The result function, can be NULL if it is not available.
- * \return 0 when no error is thrown, -1 when failure happens
+ * \return 0 when no error is thrown, nonzero when failure happens
  */
 TVM_DLL int TVMModGetFunction(TVMModuleHandle mod, const char* func_name, int query_imports,
                               TVMFunctionHandle* out);
@@ -227,14 +232,14 @@ TVM_DLL int TVMModGetFunction(TVMModuleHandle mod, const char* func_name, int qu
  *  Or if this module is imported by another active module.
  *
  *  The all functions remains valid until TVMFuncFree is called.
- * \return 0 when success, -1 when failure happens
+ * \return 0 when success, nonzero when failure happens
  */
 TVM_DLL int TVMModFree(TVMModuleHandle mod);
 
 /*!
  * \brief Free the function when it is no longer needed.
  * \param func The function handle
- * \return 0 when success, -1 when failure happens
+ * \return 0 when success, nonzero when failure happens
  */
 TVM_DLL int TVMFuncFree(TVMFunctionHandle func);
 
@@ -249,7 +254,7 @@ TVM_DLL int TVMFuncFree(TVMFunctionHandle func);
  * \param ret_val The return value.
  * \param ret_type_code the type code of return value.
  *
- * \return 0 when success, -1 when failure happens
+ * \return 0 when success, nonzero when failure happens
  * \note TVM calls always exchanges with type bits=64, lanes=1
  *
  * \note API calls always exchanges with type bits=64, lanes=1
@@ -282,7 +287,7 @@ TVM_DLL int TVMCFuncSetReturn(TVMRetValueHandle ret, TVMValue* value, int* type_
  * \param code The type code to be translated.
  * \note This function will do a shallow copy when necessary.
  *
- * \return 0 when success, -1 when failure happens.
+ * \return 0 when success, nonzero when failure happens.
  */
 TVM_DLL int TVMCbArgToReturn(TVMValue* value, int* code);
 
@@ -326,7 +331,7 @@ typedef int (*TVMExtensionFuncDeclarer)(TVMFunctionHandle register_func_handle);
  * \param resource_handle The resource handle from front-end, can be NULL.
  * \param fin The finalizer on resource handle when the FunctionHandle get freed, can be NULL
  * \param out the result function handle.
- * \return 0 when success, -1 when failure happens
+ * \return 0 when success, nonzero when failure happens
  */
 TVM_DLL int TVMFuncCreateFromCFunc(TVMPackedCFunc func, void* resource_handle,
                                    TVMPackedCFuncFinalizer fin, TVMFunctionHandle* out);
@@ -357,7 +362,7 @@ TVM_DLL int TVMFuncGetGlobal(const char* name, TVMFunctionHandle* out);
  * \brief List all the globally registered function name
  * \param out_size The number of functions
  * \param out_array The array of function names.
- * \return 0 when success, -1 when failure happens
+ * \return 0 when success, nonzero when failure happens
  */
 TVM_DLL int TVMFuncListGlobalNames(int* out_size, const char*** out_array);
 
@@ -380,7 +385,7 @@ TVM_DLL int TVMFuncRemoveGlobal(const char* name);
  * \param device_type The device type.
  * \param device_id The device id.
  * \param out The output handle.
- * \return 0 when success, -1 when failure happens
+ * \return 0 when success, nonzero when failure happens
  */
 TVM_DLL int TVMArrayAlloc(const tvm_index_t* shape, int ndim, int dtype_code, int dtype_bits,
                           int dtype_lanes, int device_type, int device_id, TVMArrayHandle* out);
@@ -388,7 +393,7 @@ TVM_DLL int TVMArrayAlloc(const tvm_index_t* shape, int ndim, int dtype_code, in
 /*!
  * \brief Free the TVM Array.
  * \param handle The array handle to be freed.
- * \return 0 when success, -1 when failure happens
+ * \return 0 when success, nonzero when failure happens
  */
 TVM_DLL int TVMArrayFree(TVMArrayHandle handle);
 
@@ -397,7 +402,7 @@ TVM_DLL int TVMArrayFree(TVMArrayHandle handle);
  * \param handle The array handle.
  * \param data the data pointer
  * \param nbytes The number of bytes to copy.
- * \return 0 when success, -1 when failure happens
+ * \return 0 when success, nonzero when failure happens
  */
 TVM_DLL int TVMArrayCopyFromBytes(TVMArrayHandle handle, void* data, size_t nbytes);
 
@@ -406,7 +411,7 @@ TVM_DLL int TVMArrayCopyFromBytes(TVMArrayHandle handle, void* data, size_t nbyt
  * \param handle The array handle.
  * \param data the data pointer
  * \param nbytes The number of bytes to copy.
- * \return 0 when success, -1 when failure happens
+ * \return 0 when success, nonzero when failure happens
  */
 TVM_DLL int TVMArrayCopyToBytes(TVMArrayHandle handle, void* data, size_t nbytes);
 
@@ -415,7 +420,7 @@ TVM_DLL int TVMArrayCopyToBytes(TVMArrayHandle handle, void* data, size_t nbytes
  * \param from The array to be copied from.
  * \param to The target space.
  * \param stream The stream where the copy happens, can be NULL.
- * \return 0 when success, -1 when failure happens
+ * \return 0 when success, nonzero when failure happens
  */
 TVM_DLL int TVMArrayCopyFromTo(TVMArrayHandle from, TVMArrayHandle to, TVMStreamHandle stream);
 
@@ -424,7 +429,7 @@ TVM_DLL int TVMArrayCopyFromTo(TVMArrayHandle from, TVMArrayHandle to, TVMStream
  * with the DLManagedTensor.
  * \param from The source DLManagedTensor.
  * \param out The output array handle.
- * \return 0 when success, -1 when failure happens
+ * \return 0 when success, nonzero when failure happens
  */
 TVM_DLL int TVMArrayFromDLPack(DLManagedTensor* from, TVMArrayHandle* out);
 
@@ -433,7 +438,7 @@ TVM_DLL int TVMArrayFromDLPack(DLManagedTensor* from, TVMArrayHandle* out);
  * the array.
  * \param from The source array.
  * \param out The DLManagedTensor handle.
- * \return 0 when success, -1 when failure happens
+ * \return 0 when success, nonzero when failure happens
  */
 TVM_DLL int TVMArrayToDLPack(TVMArrayHandle from, DLManagedTensor** out);
 
@@ -449,7 +454,7 @@ TVM_DLL void TVMDLManagedTensorCallDeleter(DLManagedTensor* dltensor);
  * \param device_type The device type.
  * \param device_id The device id.
  * \param out The new stream handle.
- * \return 0 when success, -1 when failure happens
+ * \return 0 when success, nonzero when failure happens
  */
 TVM_DLL int TVMStreamCreate(int device_type, int device_id, TVMStreamHandle* out);
 
@@ -459,7 +464,7 @@ TVM_DLL int TVMStreamCreate(int device_type, int device_id, TVMStreamHandle* out
  * \param device_type The device type.
  * \param device_id The device id.
  * \param stream The stream to be freed.
- * \return 0 when success, -1 when failure happens
+ * \return 0 when success, nonzero when failure happens
  */
 TVM_DLL int TVMStreamFree(int device_type, int device_id, TVMStreamHandle stream);
 
@@ -472,7 +477,7 @@ TVM_DLL int TVMStreamFree(int device_type, int device_id, TVMStreamHandle stream
  * \param device_type The device type.
  * \param device_id The device id.
  * \param handle The stream handle.
- * \return 0 when success, -1 when failure happens
+ * \return 0 when success, nonzero when failure happens
  */
 TVM_DLL int TVMSetStream(int device_type, int device_id, TVMStreamHandle handle);
 
@@ -482,7 +487,7 @@ TVM_DLL int TVMSetStream(int device_type, int device_id, TVMStreamHandle handle)
  * \param device_type The device type.
  * \param device_id The device id.
  * \param stream The stream to be synchronized.
- * \return 0 when success, -1 when failure happens
+ * \return 0 when success, nonzero when failure happens
  */
 TVM_DLL int TVMSynchronize(int device_type, int device_id, TVMStreamHandle stream);
 
@@ -493,7 +498,7 @@ TVM_DLL int TVMSynchronize(int device_type, int device_id, TVMStreamHandle strea
  * \param device_id The device id.
  * \param src The source stream to synchronize.
  * \param dst The destination stream to synchronize.
- * \return 0 when success, -1 when failure happens
+ * \return 0 when success, nonzero when failure happens
  */
 TVM_DLL int TVMStreamStreamSynchronize(int device_type, int device_id, TVMStreamHandle src,
                                        TVMStreamHandle dst);
@@ -503,7 +508,7 @@ TVM_DLL int TVMStreamStreamSynchronize(int device_type, int device_id, TVMStream
  *
  * \param obj The object handle.
  * \param out_tindex the output type index.
- * \return 0 when success, -1 when failure happens
+ * \return 0 when success, nonzero when failure happens
  */
 TVM_DLL int TVMObjectGetTypeIndex(TVMObjectHandle obj, unsigned* out_tindex);
 
@@ -511,7 +516,7 @@ TVM_DLL int TVMObjectGetTypeIndex(TVMObjectHandle obj, unsigned* out_tindex);
  * \brief Convert type key to type index.
  * \param type_key The key of the type.
  * \param out_tindex the corresponding type index.
- * \return 0 when success, -1 when failure happens
+ * \return 0 when success, nonzero when failure happens
  */
 TVM_DLL int TVMObjectTypeKey2Index(const char* type_key, unsigned* out_tindex);
 
@@ -520,7 +525,7 @@ TVM_DLL int TVMObjectTypeKey2Index(const char* type_key, unsigned* out_tindex);
  *
  * \param obj The object handle.
  * \note Internally we increase the reference counter of the object.
- * \return 0 when success, -1 when failure happens
+ * \return 0 when success, nonzero when failure happens
  */
 TVM_DLL int TVMObjectRetain(TVMObjectHandle obj);
 
@@ -530,7 +535,7 @@ TVM_DLL int TVMObjectRetain(TVMObjectHandle obj);
  * \param obj The object handle.
  * \note Internally we decrease the reference counter of the object.
  *       The object will be freed when every reference to the object are removed.
- * \return 0 when success, -1 when failure happens
+ * \return 0 when success, nonzero when failure happens
  */
 TVM_DLL int TVMObjectFree(TVMObjectHandle obj);
 
@@ -549,7 +554,7 @@ TVM_DLL int TVMByteArrayFree(TVMByteArray* arr);
  * \param type_hint The type of elements. Only needed by certain backends such
  *                   as nbytes & alignment are sufficient for most backends.
  * \param out_data The allocated device pointer.
- * \return 0 when success, -1 when failure happens
+ * \return 0 when success, nonzero when failure happens
  */
 TVM_DLL int TVMDeviceAllocDataSpace(DLDevice dev, size_t nbytes, size_t alignment,
                                     DLDataType type_hint, void** out_data);
@@ -565,7 +570,7 @@ TVM_DLL int TVMDeviceAllocDataSpace(DLDevice dev, size_t nbytes, size_t alignmen
  * \param mem_scope The memory scope of the tensor,
  *        can be nullptr, which indicate the default global DRAM
  * \param out_data The allocated device pointer.
- * \return 0 when success, -1 when failure happens
+ * \return 0 when success, nonzero when failure happens
  */
 TVM_DLL int TVMDeviceAllocDataSpaceWithScope(DLDevice dev, int ndim, const int64_t* shape,
                                              DLDataType dtype, const char* mem_scope,
@@ -575,7 +580,7 @@ TVM_DLL int TVMDeviceAllocDataSpaceWithScope(DLDevice dev, int ndim, const int64
  * \brief Free a data space on device.
  * \param dev The device to perform operation.
  * \param ptr The data space.
- * \return 0 when success, -1 when failure happens
+ * \return 0 when success, nonzero when failure happens
  */
 TVM_DLL int TVMDeviceFreeDataSpace(DLDevice dev, void* ptr);
 
@@ -586,7 +591,7 @@ TVM_DLL int TVMDeviceFreeDataSpace(DLDevice dev, void* ptr);
  * \param from The source tensor.
  * \param to The target tensor.
  * \param stream Optional stream object.
- * \return 0 when success, -1 when failure happens.
+ * \return 0 when success, nonzero when failure happens.
  */
 TVM_DLL int TVMDeviceCopyDataFromTo(DLTensor* from, DLTensor* to, TVMStreamHandle stream);
 
@@ -595,7 +600,7 @@ TVM_DLL int TVMDeviceCopyDataFromTo(DLTensor* from, DLTensor* to, TVMStreamHandl
  * \param child_type_index The type index of the derived type.
  * \param parent_type_index The type index of the parent type.
  * \param is_derived A boolean representing whether this predicate holds.
- * \return 0 when success, -1 when failure happens.
+ * \return 0 when success, nonzero when failure happens.
  */
 TVM_DLL int TVMObjectDerivedFrom(uint32_t child_type_index, uint32_t parent_type_index,
                                  int* is_derived);
diff --git a/include/tvm/runtime/logging.h b/include/tvm/runtime/logging.h
index c452a33..704c2d9 100644
--- a/include/tvm/runtime/logging.h
+++ b/include/tvm/runtime/logging.h
@@ -199,6 +199,24 @@ class Error : public ::dmlc::Error {  // for backwards compatibility
 };
 
 /*!
+ * \brief Error message already set in frontend env.
+ *
+ *  This error can be thrown by EnvCheckSignals to indicate
+ *  that there is an error set in the frontend environment(e.g.
+ *  python interpreter). The TVM FFI should catch this error
+ *  and return a proper code tell the frontend caller about
+ *  this fact.
+ */
+class EnvErrorAlreadySet : public ::dmlc::Error {
+ public:
+  /*!
+   * \brief Construct an error.
+   * \param s The message to be displayed with the error.
+   */
+  explicit EnvErrorAlreadySet(const std::string& s) : ::dmlc::Error(s) {}
+};
+
+/*!
  * \brief Error type for errors from CHECK, ICHECK, and LOG(FATAL). This error
  * contains a backtrace of where it occurred.
  */
diff --git a/include/tvm/runtime/registry.h b/include/tvm/runtime/registry.h
index 859a8ac..9e363d7 100644
--- a/include/tvm/runtime/registry.h
+++ b/include/tvm/runtime/registry.h
@@ -52,6 +52,50 @@
 namespace tvm {
 namespace runtime {
 
+/*!
+ * \brief Check if signals have been sent to the process and if so
+ *  invoke the registered signal handler in the frontend environment.
+ *
+ *  When runnning TVM in another langugage(python), the signal handler
+ *  may not be immediately executed, but instead the signal is marked
+ *  in the interpreter state(to ensure non-blocking of the signal handler).
+ *
+ *  This function can be explicitly invoked to check the cached signal
+ *  and run the related processing if a signal is marked.
+ *
+ *  On Linux, when siginterrupt() is set, invoke this function whenever a syscall returns EINTR.
+ *  When it is not set, invoke it between long-running syscalls when you will not immediately
+ *  return to the frontend. On Windows, the same rules apply, but due to differences in signal
+ *  processing, these are likely to only make a difference when used with Ctrl+C and socket calls.
+ *
+ *  Not inserting this function will not cause any correctness
+ *  issue, but will delay invoking the Python-side signal handler until the function returns to
+ *  the Python side. This means that the effect of e.g. pressing Ctrl+C or sending signals the
+ *  process will be delayed until function return. When a C function is blocked on a syscall
+ *  such as accept(), it needs to be called when EINTR is received.
+ *  So this function is not needed in most API functions, which can finish quickly in a
+ *  reasonable, deterministic amount of time.
+ *
+ * \code
+ *
+ * int check_signal_every_k_iter = 10;
+ *
+ * for (int iter = 0; iter < very_large_number; ++iter) {
+ *   if (iter % check_signal_every_k_iter == 0) {
+ *     tvm::runtime::EnvCheckSignals();
+ *   }
+ *   // do work here
+ * }
+ *
+ * \endcode
+ *
+ * \note This function is a nop when no PyErr_CheckSignals is registered.
+ *
+ * \throws This function throws an exception when the frontend signal handler
+ *         indicate an error happens, otherwise it returns normally.
+ */
+TVM_DLL void EnvCheckSignals();
+
 /*! \brief Registry for global function */
 class Registry {
  public:
diff --git a/python/tvm/_ffi/_cython/base.pxi b/python/tvm/_ffi/_cython/base.pxi
index bf4d6b9..7bbe216 100644
--- a/python/tvm/_ffi/_cython/base.pxi
+++ b/python/tvm/_ffi/_cython/base.pxi
@@ -155,9 +155,13 @@ cdef inline c_str(pystr):
     return pystr.encode("utf-8")
 
 
-cdef inline CALL(int ret):
+cdef inline int CALL(int ret) except -2:
+    # -2 brings exception
+    if ret == -2:
+        return -2
     if ret != 0:
         raise get_last_ffi_error()
+    return 0
 
 
 cdef inline object ctypes_handle(void* chandle):
@@ -170,3 +174,25 @@ cdef inline void* c_handle(object handle):
     cdef unsigned long long v_ptr
     v_ptr = handle.value
     return <void*>(v_ptr)
+
+
+# python env API
+cdef extern from "Python.h":
+    int PyErr_CheckSignals()
+
+cdef extern from "tvm/runtime/c_backend_api.h":
+    int TVMBackendRegisterEnvCAPI(const char* name, void* ptr)
+
+cdef _init_env_api():
+    # Initialize env api for signal handling
+    # so backend can call tvm::runtime::EnvCheckSignals to check
+    # signal when executing a long running function.
+    #
+    # This feature is only enabled in cython for now due to problems of calling
+    # these functions in ctypes.
+    #
+    # When the functions are not registered, the signals will be handled
+    # only when the FFI function returns.
+    CALL(TVMBackendRegisterEnvCAPI(c_str("PyErr_CheckSignals"), <void*>PyErr_CheckSignals))
+
+_init_env_api()
diff --git a/python/tvm/_ffi/_cython/packed_func.pxi b/python/tvm/_ffi/_cython/packed_func.pxi
index 30b879d..ad896a3 100644
--- a/python/tvm/_ffi/_cython/packed_func.pxi
+++ b/python/tvm/_ffi/_cython/packed_func.pxi
@@ -319,6 +319,7 @@ cdef class PackedFuncBase:
     def __call__(self, *args):
         cdef TVMValue ret_val
         cdef int ret_tcode
+        ret_tcode = kTVMNullptr
         FuncCall(self.chandle, args, &ret_val, &ret_tcode)
         return make_ret(ret_val, ret_tcode)
 
diff --git a/python/tvm/_ffi/base.py b/python/tvm/_ffi/base.py
index 0496195..164ea53 100644
--- a/python/tvm/_ffi/base.py
+++ b/python/tvm/_ffi/base.py
@@ -60,6 +60,7 @@ def _load_lib():
 
 
 try:
+    # The following import is needed for TVM to work with pdb
     import readline  # pylint: disable=unused-import
 except ImportError:
     pass
@@ -75,6 +76,7 @@ _RUNTIME_ONLY = "runtime" in _LIB_NAME
 # The FFI mode of TVM
 _FFI_MODE = os.environ.get("TVM_FFI", "auto")
 
+
 # ----------------------------
 # helper function in ctypes.
 # ----------------------------
diff --git a/src/runtime/registry.cc b/src/runtime/registry.cc
index 92b39f0..1a963be 100644
--- a/src/runtime/registry.cc
+++ b/src/runtime/registry.cc
@@ -22,6 +22,7 @@
  * \brief The global registry of packed function.
  */
 #include <dmlc/thread_local.h>
+#include <tvm/runtime/c_backend_api.h>
 #include <tvm/runtime/logging.h>
 #include <tvm/runtime/registry.h>
 
@@ -102,6 +103,76 @@ std::vector<std::string> Registry::ListNames() {
   return keys;
 }
 
+/*!
+ * \brief Execution environment specific API registry.
+ *
+ *  This registry stores C API function pointers about
+ *  execution environment(e.g. python) specific API function that
+ *  we need for specific low-level handling(e.g. signal checking).
+ *
+ *  We only stores the C API function when absolutely necessary (e.g. when signal handler
+ *  cannot trap back into python). Always consider use the PackedFunc FFI when possible
+ *  in other cases.
+ */
+class EnvCAPIRegistry {
+ public:
+  /*!
+   * \brief Callback to check if signals have been sent to the process and
+   *        if so invoke the registered signal handler in the frontend environment.
+   *
+   *  When runnning TVM in another langugage(python), the signal handler
+   *  may not be immediately executed, but instead the signal is marked
+   *  in the interpreter state(to ensure non-blocking of the signal handler).
+   *
+   * \return 0 if no error happens, -1 if error happens.
+   */
+  typedef int (*F_PyErr_CheckSignals)();
+
+  // NOTE: the following function are only registered
+  // in a python environment.
+  /*!
+   * \brief PyErr_CheckSignal function
+   */
+  F_PyErr_CheckSignals pyerr_check_signals = nullptr;
+
+  static EnvCAPIRegistry* Global() {
+    static EnvCAPIRegistry* inst = new EnvCAPIRegistry();
+    return inst;
+  }
+
+  // register environment(e.g. python) specific api functions
+  void Register(const std::string& symbol_name, void* fptr) {
+    if (symbol_name == "PyErr_CheckSignals") {
+      Update(symbol_name, &pyerr_check_signals, fptr);
+    } else {
+      LOG(FATAL) << "Unknown env API " << symbol_name;
+    }
+  }
+
+  // implementation of tvm::runtime::EnvCheckSignals
+  void CheckSignals() {
+    // check python signal to see if there are exception raised
+    if (pyerr_check_signals != nullptr && (*pyerr_check_signals)() != 0) {
+      // The error will let FFI know that the frontend environment
+      // already set an error.
+      throw EnvErrorAlreadySet("");
+    }
+  }
+
+ private:
+  // update the internal API table
+  template <typename FType>
+  void Update(const std::string& symbol_name, FType* target, void* ptr) {
+    FType ptr_casted = reinterpret_cast<FType>(ptr);
+    if (target[0] != nullptr && target[0] != ptr_casted) {
+      LOG(WARNING) << "tvm.runtime.RegisterEnvCAPI overrides an existing function " << symbol_name;
+    }
+    target[0] = ptr_casted;
+  }
+};
+
+void EnvCheckSignals() { EnvCAPIRegistry::Global()->CheckSignals(); }
+
 }  // namespace runtime
 }  // namespace tvm
 
@@ -152,3 +223,9 @@ int TVMFuncRemoveGlobal(const char* name) {
   tvm::runtime::Registry::Remove(name);
   API_END();
 }
+
+int TVMBackendRegisterEnvCAPI(const char* name, void* ptr) {
+  API_BEGIN();
+  tvm::runtime::EnvCAPIRegistry::Global()->Register(name, ptr);
+  API_END();
+}
diff --git a/src/runtime/runtime_base.h b/src/runtime/runtime_base.h
index 7abb329..3037c8d 100644
--- a/src/runtime/runtime_base.h
+++ b/src/runtime/runtime_base.h
@@ -32,23 +32,29 @@
 #define API_BEGIN() try {
 /*! \brief every function starts with API_BEGIN();
      and finishes with API_END() or API_END_HANDLE_ERROR */
-#define API_END()                           \
-  }                                         \
-  catch (std::exception & _except_) {       \
-    return TVMAPIHandleException(_except_); \
-  }                                         \
+#define API_END()                                         \
+  }                                                       \
+  catch (::tvm::runtime::EnvErrorAlreadySet & _except_) { \
+    return -2;                                            \
+  }                                                       \
+  catch (std::exception & _except_) {                     \
+    return TVMAPIHandleException(_except_);               \
+  }                                                       \
   return 0;  // NOLINT(*)
 /*!
  * \brief every function starts with API_BEGIN();
  *   and finishes with API_END() or API_END_HANDLE_ERROR
  *   The finally clause contains procedure to cleanup states when an error happens.
  */
-#define API_END_HANDLE_ERROR(Finalize)      \
-  }                                         \
-  catch (std::exception & _except_) {       \
-    Finalize;                               \
-    return TVMAPIHandleException(_except_); \
-  }                                         \
+#define API_END_HANDLE_ERROR(Finalize)                    \
+  }                                                       \
+  catch (::tvm::runtime::EnvErrorAlreadySet & _except_) { \
+    return -2;                                            \
+  }                                                       \
+  catch (std::exception & _except_) {                     \
+    Finalize;                                             \
+    return TVMAPIHandleException(_except_);               \
+  }                                                       \
   return 0;  // NOLINT(*)
 
 /*!
diff --git a/src/support/ffi_testing.cc b/src/support/ffi_testing.cc
index bac888a..a7a91c1 100644
--- a/src/support/ffi_testing.cc
+++ b/src/support/ffi_testing.cc
@@ -28,6 +28,8 @@
 #include <tvm/te/tensor.h>
 #include <tvm/tir/expr.h>
 
+#include <thread>
+
 namespace tvm {
 // Attrs used to python API
 struct TestAttrs : public AttrsNode<TestAttrs> {
@@ -81,6 +83,14 @@ TVM_REGISTER_GLOBAL("testing.device_test").set_body([](TVMArgs args, TVMRetValue
   *ret = dev;
 });
 
+TVM_REGISTER_GLOBAL("testing.run_check_signal").set_body_typed([](int nsec) {
+  for (int i = 0; i < nsec; ++i) {
+    tvm::runtime::EnvCheckSignals();
+    std::this_thread::sleep_for(std::chrono::seconds(1));
+  }
+  LOG(INFO) << "Function finished without catching signal";
+});
+
 // in src/api_test.cc
 void ErrorTest(int x, int y) {
   // raise ValueError
diff --git a/src/support/socket.h b/src/support/socket.h
index 11060ae..a83a67c 100644
--- a/src/support/socket.h
+++ b/src/support/socket.h
@@ -50,6 +50,7 @@ using ssize_t = int;
 #include <unistd.h>
 #endif
 #include <tvm/runtime/logging.h>
+#include <tvm/runtime/registry.h>
 
 #include <cstring>
 #include <string>
@@ -68,6 +69,7 @@ static inline int poll(struct pollfd* pfd, int nfds, int timeout) {
 
 namespace tvm {
 namespace support {
+
 /*!
  * \brief Get current host name.
  * \return The hostname.
@@ -306,7 +308,7 @@ class Socket {
     }
   }
   /*!
-   * \return last error of socket 2operation
+   * \return last error of socket operation
    */
   static int GetLastError() {
 #ifdef _WIN32
@@ -361,6 +363,42 @@ class Socket {
 #endif
   }
 
+  /*!
+   * \brief Call a function and retry if an EINTR error is encountered.
+   *
+   *  Socket operations can return EINTR when the interrupt handler
+   *  is registered by the execution environment(e.g. python).
+   *  We should retry if there is no KeyboardInterrupt recorded in
+   *  the environment.
+   *
+   * \note This function is needed to avoid rare interrupt event
+   *       in long running server code.
+   *
+   * \param func The function to retry.
+   * \return The return code returned by function f or error_value on retry failure.
+   */
+  template <typename FuncType>
+  ssize_t RetryCallOnEINTR(FuncType func) {
+    ssize_t ret = func();
+    // common path
+    if (ret != -1) return ret;
+    // less common path
+    do {
+      if (GetLastError() == EINTR) {
+        // Call into env check signals to see if there are
+        // environment specific(e.g. python) signal exceptions.
+        // This function will throw an exception if there is
+        // if the process received a signal that requires TVM to return immediately (e.g. SIGINT).
+        runtime::EnvCheckSignals();
+      } else {
+        // other errors
+        return ret;
+      }
+      ret = func();
+    } while (ret == -1);
+    return ret;
+  }
+
  protected:
   explicit Socket(SockType sockfd) : sockfd(sockfd) {}
 };
@@ -407,7 +445,7 @@ class TCPSocket : public Socket {
    * \return The accepted socket connection.
    */
   TCPSocket Accept() {
-    SockType newfd = accept(sockfd, nullptr, nullptr);
+    SockType newfd = RetryCallOnEINTR([&]() { return accept(sockfd, nullptr, nullptr); });
     if (newfd == INVALID_SOCKET) {
       Socket::Error("Accept");
     }
@@ -420,7 +458,8 @@ class TCPSocket : public Socket {
    */
   TCPSocket Accept(SockAddr* addr) {
     socklen_t addrlen = sizeof(addr->addr);
-    SockType newfd = accept(sockfd, reinterpret_cast<sockaddr*>(&addr->addr), &addrlen);
+    SockType newfd = RetryCallOnEINTR(
+        [&]() { return accept(sockfd, reinterpret_cast<sockaddr*>(&addr->addr), &addrlen); });
     if (newfd == INVALID_SOCKET) {
       Socket::Error("Accept");
     }
@@ -460,7 +499,8 @@ class TCPSocket : public Socket {
    */
   ssize_t Send(const void* buf_, size_t len, int flag = 0) {
     const char* buf = reinterpret_cast<const char*>(buf_);
-    return send(sockfd, buf, static_cast<sock_size_t>(len), flag);
+    return RetryCallOnEINTR(
+        [&]() { return send(sockfd, buf, static_cast<sock_size_t>(len), flag); });
   }
   /*!
    * \brief receive data using the socket
@@ -472,7 +512,8 @@ class TCPSocket : public Socket {
    */
   ssize_t Recv(void* buf_, size_t len, int flags = 0) {
     char* buf = reinterpret_cast<char*>(buf_);
-    return recv(sockfd, buf, static_cast<sock_size_t>(len), flags);
+    return RetryCallOnEINTR(
+        [&]() { return recv(sockfd, buf, static_cast<sock_size_t>(len), flags); });
   }
   /*!
    * \brief peform block write that will attempt to send all data out
@@ -485,7 +526,8 @@ class TCPSocket : public Socket {
     const char* buf = reinterpret_cast<const char*>(buf_);
     size_t ndone = 0;
     while (ndone < len) {
-      ssize_t ret = send(sockfd, buf, static_cast<ssize_t>(len - ndone), 0);
+      ssize_t ret = RetryCallOnEINTR(
+          [&]() { return send(sockfd, buf, static_cast<ssize_t>(len - ndone), 0); });
       if (ret == -1) {
         if (LastErrorWouldBlock()) return ndone;
         Socket::Error("SendAll");
@@ -506,7 +548,8 @@ class TCPSocket : public Socket {
     char* buf = reinterpret_cast<char*>(buf_);
     size_t ndone = 0;
     while (ndone < len) {
-      ssize_t ret = recv(sockfd, buf, static_cast<sock_size_t>(len - ndone), MSG_WAITALL);
+      ssize_t ret = RetryCallOnEINTR(
+          [&]() { return recv(sockfd, buf, static_cast<sock_size_t>(len - ndone), MSG_WAITALL); });
       if (ret == -1) {
         if (LastErrorWouldBlock()) {
           LOG(FATAL) << "would block";