You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@qpid.apache.org by ac...@apache.org on 2011/03/30 21:30:16 UTC

svn commit: r1087052 - in /qpid/trunk/qpid/cpp/src: qpid/sys/posix/Socket.cpp tests/brokertest.py

Author: aconway
Date: Wed Mar 30 19:30:16 2011
New Revision: 1087052

URL: http://svn.apache.org/viewvc?rev=1087052&view=rev
Log:
QPID-3129: cluster_tests.LongTests.test_failover hangs

Fix is a race condition in posix/Socket.cpp Socket::connect.

When connecting to a port on the same host which no longer has a
process associated with it the OS occasionally chooses the remote
port (which is unoccupied) as the port to bind the local end of the
socket, resulting in a "circular" connection.

This seems like something the OS should prevent but I have confirmed
that the sporadic hangs in cluster_tests.LongTests.test_failover on
RHEL5 are caused by such a circular connection.

The fix is to detect circular connections and raise an error.

Modified:
    qpid/trunk/qpid/cpp/src/qpid/sys/posix/Socket.cpp
    qpid/trunk/qpid/cpp/src/tests/brokertest.py

Modified: qpid/trunk/qpid/cpp/src/qpid/sys/posix/Socket.cpp
URL: http://svn.apache.org/viewvc/qpid/trunk/qpid/cpp/src/qpid/sys/posix/Socket.cpp?rev=1087052&r1=1087051&r2=1087052&view=diff
==============================================================================
--- qpid/trunk/qpid/cpp/src/qpid/sys/posix/Socket.cpp (original)
+++ qpid/trunk/qpid/cpp/src/qpid/sys/posix/Socket.cpp Wed Mar 30 19:30:16 2011
@@ -7,9 +7,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *   http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -45,9 +45,9 @@ namespace sys {
 namespace {
 std::string getName(int fd, bool local, bool includeService = false)
 {
-    ::sockaddr_storage name; // big enough for any socket address    
+    ::sockaddr_storage name; // big enough for any socket address
     ::socklen_t namelen = sizeof(name);
-    
+
     int result = -1;
     if (local) {
         result = ::getsockname(fd, (::sockaddr*)&name, &namelen);
@@ -60,8 +60,8 @@ std::string getName(int fd, bool local, 
     char servName[NI_MAXSERV];
     char dispName[NI_MAXHOST];
     if (includeService) {
-        if (int rc=::getnameinfo((::sockaddr*)&name, namelen, dispName, sizeof(dispName), 
-                                 servName, sizeof(servName), 
+        if (int rc=::getnameinfo((::sockaddr*)&name, namelen, dispName, sizeof(dispName),
+                                 servName, sizeof(servName),
                                  NI_NUMERICHOST | NI_NUMERICSERV) != 0)
             throw QPID_POSIX_ERROR(rc);
         return std::string(dispName) + ":" + std::string(servName);
@@ -75,9 +75,9 @@ std::string getName(int fd, bool local, 
 
 std::string getService(int fd, bool local)
 {
-    ::sockaddr_storage name; // big enough for any socket address    
+    ::sockaddr_storage name; // big enough for any socket address
     ::socklen_t namelen = sizeof(name);
-    
+
     int result = -1;
     if (local) {
         result = ::getsockname(fd, (::sockaddr*)&name, &namelen);
@@ -88,8 +88,8 @@ std::string getService(int fd, bool loca
     QPID_POSIX_CHECK(result);
 
     char servName[NI_MAXSERV];
-    if (int rc=::getnameinfo((::sockaddr*)&name, namelen, 0, 0, 
-                                 servName, sizeof(servName), 
+    if (int rc=::getnameinfo((::sockaddr*)&name, namelen, 0, 0,
+                                 servName, sizeof(servName),
                                  NI_NUMERICHOST | NI_NUMERICSERV) != 0)
         throw QPID_POSIX_ERROR(rc);
     return servName;
@@ -172,6 +172,23 @@ void Socket::connect(const SocketAddress
         (errno != EINPROGRESS)) {
         throw Exception(QPID_MSG(strError(errno) << ": " << connectname));
     }
+    // When connecting to a port on the same host which no longer has
+    // a process associated with it, the OS occasionally chooses the
+    // remote port (which is unoccupied) as the port to bind the local
+    // end of the socket, resulting in a "circular" connection.
+    //
+    // This seems like something the OS should prevent but I have
+    // confirmed that sporadic hangs in
+    // cluster_tests.LongTests.test_failover on RHEL5 are caused by
+    // such a circular connection.
+    //
+    // Raise an error if we see such a connection, since we know there is
+    // no listener on the peer address.
+    //
+    if (getLocalAddress() == getPeerAddress()) {
+        close();
+        throw Exception(QPID_MSG("Connection refused: " << connectname));
+    }
 }
 
 void

Modified: qpid/trunk/qpid/cpp/src/tests/brokertest.py
URL: http://svn.apache.org/viewvc/qpid/trunk/qpid/cpp/src/tests/brokertest.py?rev=1087052&r1=1087051&r2=1087052&view=diff
==============================================================================
--- qpid/trunk/qpid/cpp/src/tests/brokertest.py (original)
+++ qpid/trunk/qpid/cpp/src/tests/brokertest.py Wed Mar 30 19:30:16 2011
@@ -498,7 +498,7 @@ class BrokerTest(TestCase):
         r.close()
         self.assertEqual(expect_contents, actual_contents)
 
-def join(thread, timeout=1):
+def join(thread, timeout=10):
     thread.join(timeout)
     if thread.isAlive(): raise Exception("Timed out joining thread %s"%thread)
 



---------------------------------------------------------------------
Apache Qpid - AMQP Messaging Implementation
Project:      http://qpid.apache.org
Use/Interact: mailto:commits-subscribe@qpid.apache.org