You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@qpid.apache.org by ac...@apache.org on 2014/10/23 16:00:07 UTC

svn commit: r1633814 - in /qpid/dispatch/trunk: include/qpid/dispatch/python_embedded.h include/qpid/dispatch/threading.h src/dispatch.c src/error.c src/posix/threading.c src/python_embedded.c src/router_agent.c src/router_pynode.c

Author: aconway
Date: Thu Oct 23 14:00:06 2014
New Revision: 1633814

URL: http://svn.apache.org/r1633814
Log:
DISPATCH-72: Sporadic core dumps in dispatch tests.

Added missing lock scopes around calls to python code, this resoloved the
sporadic core dumps (over 1000 test runs overnight with no failures)

Initially I speculated that the problem was that we don't lock the GIL as recommended by
the python C API docs https://docs.python.org/2/c-api/init.html#non-python-created-threads.
Instead we use a separate mutex in the C code.

I attempted to use the GIL but that caused deadlocks. They appeared to be
associated with locking a python mutex in the management agent - this causes
python to unlock and relock the GIL. That allows other threads into the python
code which somehow causes deadlock - I am not sure exactly why.

I believe it is OK to use the separate C mutex *provided* we never create
threads in python code.  If we do that then I think we will need to re-visit the
GIL and figure out how to lock it correctly.

Modified:
    qpid/dispatch/trunk/include/qpid/dispatch/python_embedded.h
    qpid/dispatch/trunk/include/qpid/dispatch/threading.h
    qpid/dispatch/trunk/src/dispatch.c
    qpid/dispatch/trunk/src/error.c
    qpid/dispatch/trunk/src/posix/threading.c
    qpid/dispatch/trunk/src/python_embedded.c
    qpid/dispatch/trunk/src/router_agent.c
    qpid/dispatch/trunk/src/router_pynode.c

Modified: qpid/dispatch/trunk/include/qpid/dispatch/python_embedded.h
URL: http://svn.apache.org/viewvc/qpid/dispatch/trunk/include/qpid/dispatch/python_embedded.h?rev=1633814&r1=1633813&r2=1633814&view=diff
==============================================================================
--- qpid/dispatch/trunk/include/qpid/dispatch/python_embedded.h (original)
+++ qpid/dispatch/trunk/include/qpid/dispatch/python_embedded.h Thu Oct 23 14:00:06 2014
@@ -76,7 +76,8 @@ PyObject *qd_field_to_py(qd_parsed_field
  * These are temporary and will eventually be replaced by having an internal python
  * work queue that feeds a dedicated embedded-python thread.
  */
-void qd_python_lock(void);
-void qd_python_unlock(void);
+typedef PyGILState_STATE qd_python_lock_state_t;
+qd_python_lock_state_t qd_python_lock(void);
+void qd_python_unlock(qd_python_lock_state_t state);
 
 #endif

Modified: qpid/dispatch/trunk/include/qpid/dispatch/threading.h
URL: http://svn.apache.org/viewvc/qpid/dispatch/trunk/include/qpid/dispatch/threading.h?rev=1633814&r1=1633813&r2=1633814&view=diff
==============================================================================
--- qpid/dispatch/trunk/include/qpid/dispatch/threading.h (original)
+++ qpid/dispatch/trunk/include/qpid/dispatch/threading.h Thu Oct 23 14:00:06 2014
@@ -51,4 +51,10 @@ sys_thread_t *sys_thread(void *(*run_fun
 void          sys_thread_free(sys_thread_t *thread);
 void          sys_thread_join(sys_thread_t *thread);
 
+/** Return the OS identifier for this thread */
+long sys_thread_id(sys_thread_t *thread);
+
+/** Return the OS identifier for the current thread */
+long sys_thread_self();
+
 #endif

Modified: qpid/dispatch/trunk/src/dispatch.c
URL: http://svn.apache.org/viewvc/qpid/dispatch/trunk/src/dispatch.c?rev=1633814&r1=1633813&r2=1633814&view=diff
==============================================================================
--- qpid/dispatch/trunk/src/dispatch.c (original)
+++ qpid/dispatch/trunk/src/dispatch.c Thu Oct 23 14:00:06 2014
@@ -78,15 +78,16 @@ STATIC_ASSERT(sizeof(long) >= sizeof(voi
 
 qd_error_t qd_dispatch_load_config(qd_dispatch_t *qd, const char *config_path)
 {
+    qd_python_lock_state_t lock_state = qd_python_lock();
     PyObject *module = PyImport_ImportModule("qpid_dispatch_internal.management.config");
-    if (!module) return qd_error_py();
-    PyObject *configure_dispatch = PyObject_GetAttrString(module, "configure_dispatch");
-    Py_DECREF(module);
-    if (!configure_dispatch) return qd_error_py();
-    PyObject *result = PyObject_CallFunction(configure_dispatch, "(ls)", (long)qd, config_path);
-    Py_DECREF(configure_dispatch);
-    if (!result) return qd_error_py();
-    return QD_ERROR_NONE;
+    PyObject *configure_dispatch = module ? PyObject_GetAttrString(module, "configure_dispatch") : NULL;
+    Py_XDECREF(module);
+    PyObject *result = configure_dispatch ? PyObject_CallFunction(configure_dispatch, "(ls)", (long)qd, config_path) : NULL;
+    Py_XDECREF(configure_dispatch);
+    if (!result) qd_error_py();
+    Py_XDECREF(result);
+    qd_python_unlock(lock_state);
+    return qd_error_code();
 }
 
 

Modified: qpid/dispatch/trunk/src/error.c
URL: http://svn.apache.org/viewvc/qpid/dispatch/trunk/src/error.c?rev=1633814&r1=1633813&r2=1633814&view=diff
==============================================================================
--- qpid/dispatch/trunk/src/error.c (original)
+++ qpid/dispatch/trunk/src/error.c Thu Oct 23 14:00:06 2014
@@ -144,16 +144,20 @@ qd_error_t qd_error_py_impl(const char *
 	PyObject *type, *value, *trace;
 	PyErr_Fetch(&type, &value, &trace); /* Note clears the python error indicator */
 
-	PyObject *type_name = type ? PyObject_GetAttrString(type, "__name__") : NULL;
-	PyErr_Print();
-	PyObject *value_str = value ? PyObject_Str(value) : NULL;
-	const char* str = value_str ? PyString_AsString(value_str) : "Unknown";
+	PyObject *py_type_name = type ? PyObject_GetAttrString(type, "__name__") : NULL;
+        const char *type_name = py_type_name ? PyString_AsString(py_type_name) : NULL;
+
+	PyObject *py_value_str = value ? PyObject_Str(value) : NULL;
+        const char *value_str = py_value_str ? PyString_AsString(py_value_str) : NULL;
+        if (!value_str) value_str = "Unknown";
+
+        PyErr_Clear(); /* Ignore errors while we're trying to build the values. */
 	if (type_name)
-	    qd_error_impl(QD_ERROR_PYTHON, file, line, "%s: %s", PyString_AsString(type_name), str);
+	    qd_error_impl(QD_ERROR_PYTHON, file, line, "%s: %s", type_name, value_str);
 	else
-	    qd_error_impl(QD_ERROR_PYTHON, file, line, "%s", str);
-	Py_XDECREF(value_str);
-	Py_XDECREF(type_name);
+	    qd_error_impl(QD_ERROR_PYTHON, file, line, "%s", value_str);
+	Py_XDECREF(py_value_str);
+	Py_XDECREF(py_type_name);
 
 	log_trace_py(type, value, trace, QD_LOG_ERROR, file, line);
 

Modified: qpid/dispatch/trunk/src/posix/threading.c
URL: http://svn.apache.org/viewvc/qpid/dispatch/trunk/src/posix/threading.c?rev=1633814&r1=1633813&r2=1633814&view=diff
==============================================================================
--- qpid/dispatch/trunk/src/posix/threading.c (original)
+++ qpid/dispatch/trunk/src/posix/threading.c Thu Oct 23 14:00:06 2014
@@ -24,14 +24,27 @@
 
 struct sys_mutex_t {
     pthread_mutex_t mutex;
+#ifndef NDEBUG
+    // In a debug build, used to assert correct use of mutex.
     int             acquired;
+#endif
 };
 
+// NOTE: normally it is incorrect for an assert expression to have side effects,
+// since it could change the behavior between a debug and a release build.  In
+// this case however the mutex->acquired field only exists in a debug build, so
+// we want operations on mutex->acquired to be compiled out of a release build.
+#define ACQUIRE(mutex) assert(!mutex->acquired++)
+#define RELEASE(mutex) assert(!--mutex->acquired)
+
+
 sys_mutex_t *sys_mutex(void)
 {
     sys_mutex_t *mutex = NEW(sys_mutex_t);
     pthread_mutex_init(&(mutex->mutex), 0);
+#ifndef NDEBUG
     mutex->acquired = 0;
+#endif
     return mutex;
 }
 
@@ -47,15 +60,13 @@ void sys_mutex_free(sys_mutex_t *mutex)
 void sys_mutex_lock(sys_mutex_t *mutex)
 {
     pthread_mutex_lock(&(mutex->mutex));
-    assert(!mutex->acquired);
-    mutex->acquired++;
+    ACQUIRE(mutex);
 }
 
 
 void sys_mutex_unlock(sys_mutex_t *mutex)
 {
-    mutex->acquired--;
-    assert(!mutex->acquired);
+    RELEASE(mutex);
     pthread_mutex_unlock(&(mutex->mutex));
 }
 
@@ -82,10 +93,9 @@ void sys_cond_free(sys_cond_t *cond)
 
 void sys_cond_wait(sys_cond_t *cond, sys_mutex_t *held_mutex)
 {
-    assert(held_mutex->acquired);
-    held_mutex->acquired--;
+    RELEASE(held_mutex);
     pthread_cond_wait(&(cond->cond), &(held_mutex->mutex));
-    held_mutex->acquired++;
+    ACQUIRE(held_mutex);
 }
 
 
@@ -150,6 +160,13 @@ sys_thread_t *sys_thread(void *(*run_fun
     return thread;
 }
 
+long sys_thread_id(sys_thread_t *thread) {
+    return (long) thread->thread;
+}
+
+long sys_thread_self() {
+    return pthread_self();
+}
 
 void sys_thread_free(sys_thread_t *thread)
 {

Modified: qpid/dispatch/trunk/src/python_embedded.c
URL: http://svn.apache.org/viewvc/qpid/dispatch/trunk/src/python_embedded.c?rev=1633814&r1=1633813&r2=1633814&view=diff
==============================================================================
--- qpid/dispatch/trunk/src/python_embedded.c (original)
+++ qpid/dispatch/trunk/src/python_embedded.c Thu Oct 23 14:00:06 2014
@@ -441,7 +441,7 @@ static PyObject *py_iter_parse(qd_field_
         }
         PyObject *value = qd_field_to_py(parsed);
         qd_parse_free(parsed);
-        qd_error_py();
+        if (!value) qd_error_py();
         return value;
     }
     qd_error(QD_ERROR_MESSAGE, "Failed to parse message field");
@@ -489,12 +489,14 @@ static void qd_io_rx_handler(void *conte
     if (!qd_message_check(msg, QD_DEPTH_BODY))
         return;
 
+    // This is called from non-python threads so we need to acquire the GIL to use python APIS.
+    qd_python_lock_state_t lock_state = qd_python_lock();
     PyObject *py_msg = PyObject_CallFunction(message_type, NULL);
     if (!py_msg) {
         qd_error_py();
+        qd_python_unlock(lock_state);
         return;
     }
-
     iter_to_py_attr(qd_message_field_iterator(msg, QD_FIELD_TO), py_iter_copy, py_msg, "address");
     iter_to_py_attr(qd_message_field_iterator(msg, QD_FIELD_REPLY_TO), py_iter_copy, py_msg, "reply_to");
     // Note: correlation ID requires _typed()
@@ -502,13 +504,12 @@ static void qd_io_rx_handler(void *conte
     iter_to_py_attr(qd_message_field_iterator(msg, QD_FIELD_APPLICATION_PROPERTIES), py_iter_parse, py_msg, "properties");
     iter_to_py_attr(qd_message_field_iterator(msg, QD_FIELD_BODY), py_iter_parse, py_msg, "body");
 
-    sys_mutex_lock(ilock);
     PyObject *value = PyObject_CallFunction(self->handler, "Ol", py_msg, link_id);
-    sys_mutex_unlock(ilock);
 
     Py_DECREF(py_msg);
     Py_XDECREF(value);
     qd_error_py();
+    qd_python_unlock(lock_state);
 }
 
 
@@ -730,12 +731,13 @@ static void qd_python_setup(void)
     }
 }
 
-void qd_python_lock(void)
+qd_python_lock_state_t qd_python_lock(void)
 {
     sys_mutex_lock(ilock);
+    return 0;
 }
 
-void qd_python_unlock(void)
+void qd_python_unlock(qd_python_lock_state_t lock_state)
 {
     sys_mutex_unlock(ilock);
 }

Modified: qpid/dispatch/trunk/src/router_agent.c
URL: http://svn.apache.org/viewvc/qpid/dispatch/trunk/src/router_agent.c?rev=1633814&r1=1633813&r2=1633814&view=diff
==============================================================================
--- qpid/dispatch/trunk/src/router_agent.c (original)
+++ qpid/dispatch/trunk/src/router_agent.c Thu Oct 23 14:00:06 2014
@@ -256,7 +256,7 @@ qd_error_t qd_c_entity_update_router_lin
     qd_router_link_t *link = (qd_router_link_t*) impl;
     /* FIXME aconway 2014-10-17: old management used link->bit_mask as name/identity,
      * but even when prefixed with router.link this is not unique. Let python agent
-     * generate a name for now, revisit with a better name later.
+     * generate a name for now. A better ID would be router.link:<remote container>.<link name>.
      */
     if (!qd_entity_set_string(entity, "linkType", qd_link_type_name(link->link_type)) &&
         !qd_entity_set_string(entity, "linkDir", (link->link_direction == QD_INCOMING) ? "in": "out") &&

Modified: qpid/dispatch/trunk/src/router_pynode.c
URL: http://svn.apache.org/viewvc/qpid/dispatch/trunk/src/router_pynode.c?rev=1633814&r1=1633813&r2=1633814&view=diff
==============================================================================
--- qpid/dispatch/trunk/src/router_pynode.c (original)
+++ qpid/dispatch/trunk/src/router_pynode.c Thu Oct 23 14:00:06 2014
@@ -632,12 +632,12 @@ qd_error_t qd_pyrouter_tick(qd_router_t 
     PyObject *pValue;
 
     if (pyTick && router->router_mode == QD_ROUTER_MODE_INTERIOR) {
-        qd_python_lock();
+        qd_python_lock_state_t lock_state = qd_python_lock();
         pArgs  = PyTuple_New(0);
         pValue = PyObject_CallObject(pyTick, pArgs);
         Py_DECREF(pArgs);
 	Py_XDECREF(pValue);
-        qd_python_unlock();
+        qd_python_unlock(lock_state);
     }
     return qd_error_py();
 }
@@ -652,14 +652,14 @@ void qd_router_mobile_added(qd_router_t 
         qd_field_iterator_reset_view(iter, ITER_VIEW_ADDRESS_HASH);
         char *address = (char*) qd_field_iterator_copy(iter);
 
-        qd_python_lock();
+        qd_python_lock_state_t lock_state = qd_python_lock();
         pArgs = PyTuple_New(1);
         PyTuple_SetItem(pArgs, 0, PyString_FromString(address));
         pValue = PyObject_CallObject(pyAdded, pArgs);
 	qd_error_py();
         Py_DECREF(pArgs);
 	Py_XDECREF(pValue);
-        qd_python_unlock();
+        qd_python_unlock(lock_state);
 
         free(address);
     }
@@ -672,14 +672,14 @@ void qd_router_mobile_removed(qd_router_
     PyObject *pValue;
 
     if (pyRemoved && router->router_mode == QD_ROUTER_MODE_INTERIOR) {
-        qd_python_lock();
+        qd_python_lock_state_t lock_state = qd_python_lock();
         pArgs = PyTuple_New(1);
         PyTuple_SetItem(pArgs, 0, PyString_FromString(address));
         pValue = PyObject_CallObject(pyRemoved, pArgs);
 	qd_error_py();
         Py_DECREF(pArgs);
 	Py_XDECREF(pValue);
-        qd_python_unlock();
+        qd_python_unlock(lock_state);
     }
 }
 
@@ -690,14 +690,14 @@ void qd_router_link_lost(qd_router_t *ro
     PyObject *pValue;
 
     if (pyRemoved && router->router_mode == QD_ROUTER_MODE_INTERIOR) {
-        qd_python_lock();
+        qd_python_lock_state_t lock_state = qd_python_lock();
         pArgs = PyTuple_New(1);
         PyTuple_SetItem(pArgs, 0, PyInt_FromLong((long) link_mask_bit));
         pValue = PyObject_CallObject(pyLinkLost, pArgs);
 	qd_error_py();
         Py_DECREF(pArgs);
 	Py_XDECREF(pValue);
-        qd_python_unlock();
+        qd_python_unlock(lock_state);
     }
 }
 



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@qpid.apache.org
For additional commands, e-mail: commits-help@qpid.apache.org