You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@qpid.apache.org by ac...@apache.org on 2011/03/30 21:30:16 UTC
svn commit: r1087052 - in /qpid/trunk/qpid/cpp/src:
qpid/sys/posix/Socket.cpp tests/brokertest.py
Author: aconway
Date: Wed Mar 30 19:30:16 2011
New Revision: 1087052
URL: http://svn.apache.org/viewvc?rev=1087052&view=rev
Log:
QPID-3129: cluster_tests.LongTests.test_failover hangs
Fix is a race condition in posix/Socket.cpp Socket::connect.
When connecting to a port on the same host which no longer has a
process associated with it the OS occasionally chooses the remote
port (which is unoccupied) as the port to bind the local end of the
socket, resulting in a "circular" connection.
This seems like something the OS should prevent but I have confirmed
that the sporadic hangs in cluster_tests.LongTests.test_failover on
RHEL5 are caused by such a circular connection.
The fix is to detect circular connections and raise an error.
Modified:
qpid/trunk/qpid/cpp/src/qpid/sys/posix/Socket.cpp
qpid/trunk/qpid/cpp/src/tests/brokertest.py
Modified: qpid/trunk/qpid/cpp/src/qpid/sys/posix/Socket.cpp
URL: http://svn.apache.org/viewvc/qpid/trunk/qpid/cpp/src/qpid/sys/posix/Socket.cpp?rev=1087052&r1=1087051&r2=1087052&view=diff
==============================================================================
--- qpid/trunk/qpid/cpp/src/qpid/sys/posix/Socket.cpp (original)
+++ qpid/trunk/qpid/cpp/src/qpid/sys/posix/Socket.cpp Wed Mar 30 19:30:16 2011
@@ -7,9 +7,9 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -45,9 +45,9 @@ namespace sys {
namespace {
std::string getName(int fd, bool local, bool includeService = false)
{
- ::sockaddr_storage name; // big enough for any socket address
+ ::sockaddr_storage name; // big enough for any socket address
::socklen_t namelen = sizeof(name);
-
+
int result = -1;
if (local) {
result = ::getsockname(fd, (::sockaddr*)&name, &namelen);
@@ -60,8 +60,8 @@ std::string getName(int fd, bool local,
char servName[NI_MAXSERV];
char dispName[NI_MAXHOST];
if (includeService) {
- if (int rc=::getnameinfo((::sockaddr*)&name, namelen, dispName, sizeof(dispName),
- servName, sizeof(servName),
+ if (int rc=::getnameinfo((::sockaddr*)&name, namelen, dispName, sizeof(dispName),
+ servName, sizeof(servName),
NI_NUMERICHOST | NI_NUMERICSERV) != 0)
throw QPID_POSIX_ERROR(rc);
return std::string(dispName) + ":" + std::string(servName);
@@ -75,9 +75,9 @@ std::string getName(int fd, bool local,
std::string getService(int fd, bool local)
{
- ::sockaddr_storage name; // big enough for any socket address
+ ::sockaddr_storage name; // big enough for any socket address
::socklen_t namelen = sizeof(name);
-
+
int result = -1;
if (local) {
result = ::getsockname(fd, (::sockaddr*)&name, &namelen);
@@ -88,8 +88,8 @@ std::string getService(int fd, bool loca
QPID_POSIX_CHECK(result);
char servName[NI_MAXSERV];
- if (int rc=::getnameinfo((::sockaddr*)&name, namelen, 0, 0,
- servName, sizeof(servName),
+ if (int rc=::getnameinfo((::sockaddr*)&name, namelen, 0, 0,
+ servName, sizeof(servName),
NI_NUMERICHOST | NI_NUMERICSERV) != 0)
throw QPID_POSIX_ERROR(rc);
return servName;
@@ -172,6 +172,23 @@ void Socket::connect(const SocketAddress
(errno != EINPROGRESS)) {
throw Exception(QPID_MSG(strError(errno) << ": " << connectname));
}
+ // When connecting to a port on the same host which no longer has
+ // a process associated with it, the OS occasionally chooses the
+ // remote port (which is unoccupied) as the port to bind the local
+ // end of the socket, resulting in a "circular" connection.
+ //
+ // This seems like something the OS should prevent but I have
+ // confirmed that sporadic hangs in
+ // cluster_tests.LongTests.test_failover on RHEL5 are caused by
+ // such a circular connection.
+ //
+ // Raise an error if we see such a connection, since we know there is
+ // no listener on the peer address.
+ //
+ if (getLocalAddress() == getPeerAddress()) {
+ close();
+ throw Exception(QPID_MSG("Connection refused: " << connectname));
+ }
}
void
Modified: qpid/trunk/qpid/cpp/src/tests/brokertest.py
URL: http://svn.apache.org/viewvc/qpid/trunk/qpid/cpp/src/tests/brokertest.py?rev=1087052&r1=1087051&r2=1087052&view=diff
==============================================================================
--- qpid/trunk/qpid/cpp/src/tests/brokertest.py (original)
+++ qpid/trunk/qpid/cpp/src/tests/brokertest.py Wed Mar 30 19:30:16 2011
@@ -498,7 +498,7 @@ class BrokerTest(TestCase):
r.close()
self.assertEqual(expect_contents, actual_contents)
-def join(thread, timeout=1):
+def join(thread, timeout=10):
thread.join(timeout)
if thread.isAlive(): raise Exception("Timed out joining thread %s"%thread)
---------------------------------------------------------------------
Apache Qpid - AMQP Messaging Implementation
Project: http://qpid.apache.org
Use/Interact: mailto:commits-subscribe@qpid.apache.org