You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@qpid.apache.org by ac...@apache.org on 2010/03/08 18:34:10 UTC

svn commit: r920414 - in /qpid/trunk/qpid: cpp/src/qpid/cluster/ cpp/src/qpid/management/ cpp/src/tests/ cpp/src/tests/testagent/ python/qpid/

Author: aconway
Date: Mon Mar  8 17:34:09 2010
New Revision: 920414

URL: http://svn.apache.org/viewvc?rev=920414&view=rev
Log:
QPID-2436: Fix cluster update of remote agents.

The v2key of cluster agents was not being passed as part of a cluster update.
This meant they were not being associated with the correct shadow connections on
the updatee. This caused inconsistencies that shut down the new broker.

Modified:
    qpid/trunk/qpid/cpp/src/qpid/cluster/Cluster.cpp
    qpid/trunk/qpid/cpp/src/qpid/cluster/Connection.cpp
    qpid/trunk/qpid/cpp/src/qpid/cluster/InitialStatusMap.h
    qpid/trunk/qpid/cpp/src/qpid/management/ManagementAgent.cpp
    qpid/trunk/qpid/cpp/src/tests/cluster_tests.fail
    qpid/trunk/qpid/cpp/src/tests/cluster_tests.py
    qpid/trunk/qpid/cpp/src/tests/test_env.sh.in
    qpid/trunk/qpid/cpp/src/tests/testagent/Makefile.am
    qpid/trunk/qpid/python/qpid/brokertest.py

Modified: qpid/trunk/qpid/cpp/src/qpid/cluster/Cluster.cpp
URL: http://svn.apache.org/viewvc/qpid/trunk/qpid/cpp/src/qpid/cluster/Cluster.cpp?rev=920414&r1=920413&r2=920414&view=diff
==============================================================================
--- qpid/trunk/qpid/cpp/src/qpid/cluster/Cluster.cpp (original)
+++ qpid/trunk/qpid/cpp/src/qpid/cluster/Cluster.cpp Mon Mar  8 17:34:09 2010
@@ -198,8 +198,12 @@
             framing::cluster::StoreState(storeState), shutdownId, 
             firstConfig, l);
     }
-    void ready(const std::string& url) { cluster.ready(member, url, l); }
-    void configChange(const std::string& current) { cluster.configChange(member, current, l); }
+    void ready(const std::string& url) {
+        cluster.ready(member, url, l);
+    }
+    void configChange(const std::string& current) {
+        cluster.configChange(member, current, l);
+    }
     void updateOffer(uint64_t updatee) {
         cluster.updateOffer(member, updatee, l);
     }

Modified: qpid/trunk/qpid/cpp/src/qpid/cluster/Connection.cpp
URL: http://svn.apache.org/viewvc/qpid/trunk/qpid/cpp/src/qpid/cluster/Connection.cpp?rev=920414&r1=920413&r2=920414&view=diff
==============================================================================
--- qpid/trunk/qpid/cpp/src/qpid/cluster/Connection.cpp (original)
+++ qpid/trunk/qpid/cpp/src/qpid/cluster/Connection.cpp Mon Mar  8 17:34:09 2010
@@ -542,7 +542,7 @@
 void Connection::managementSchema(const std::string& data) {
     management::ManagementAgent* agent = cluster.getBroker().getManagementAgent();
     if (!agent)
-        throw Exception(QPID_MSG("Management schema update but no management agent."));
+        throw Exception(QPID_MSG("Management schema update but management not enabled."));
     framing::Buffer buf(const_cast<char*>(data.data()), data.size());
     agent->importSchemas(buf);
     QPID_LOG(debug, cluster << " updated management schemas");
@@ -557,7 +557,7 @@
 	     << objectNum << " seq " << bootSequence);
     management::ManagementAgent* agent = cluster.getBroker().getManagementAgent();
     if (!agent)
-        throw Exception(QPID_MSG("Management schema update but no management agent."));
+        throw Exception(QPID_MSG("Management schema update but management not enabled."));
     agent->setNextObjectId(objectNum);
     agent->setBootSequence(bootSequence);
 }
@@ -565,7 +565,7 @@
 void Connection::managementAgents(const std::string& data) {
     management::ManagementAgent* agent = cluster.getBroker().getManagementAgent();
     if (!agent)
-        throw Exception(QPID_MSG("Management agents update but no management agent."));
+        throw Exception(QPID_MSG("Management agent update but management not enabled."));
     framing::Buffer buf(const_cast<char*>(data.data()), data.size());
     agent->importAgents(buf);
     QPID_LOG(debug, cluster << " updated management agents");

Modified: qpid/trunk/qpid/cpp/src/qpid/cluster/InitialStatusMap.h
URL: http://svn.apache.org/viewvc/qpid/trunk/qpid/cpp/src/qpid/cluster/InitialStatusMap.h?rev=920414&r1=920413&r2=920414&view=diff
==============================================================================
--- qpid/trunk/qpid/cpp/src/qpid/cluster/InitialStatusMap.h (original)
+++ qpid/trunk/qpid/cpp/src/qpid/cluster/InitialStatusMap.h Mon Mar  8 17:34:09 2010
@@ -31,6 +31,11 @@
 
 /**
  * Track status of cluster members during initialization.
+ *
+ * When a new member joins the CPG cluster, all members send an initial-status
+ * control. This map tracks those controls and provides data to make descisions
+ * about joining the cluster.
+ *
  */
 class InitialStatusMap
 {
@@ -38,7 +43,7 @@
     typedef framing::ClusterInitialStatusBody Status;
 
     InitialStatusMap(const MemberId& self, size_t size);
-    /** Process a config change. @return true if we need to re-send our status */
+    /** Process a config change. May make isResendNeeded() true. */
     void configChange(const MemberSet& newConfig);
     /** @return true if we need to re-send status */
     bool isResendNeeded();
@@ -52,7 +57,7 @@
     bool transitionToComplete();
     /**@pre isComplete(). @return this node's elders */
     MemberSet getElders() const;
-    /**@pre isComplete(). @return True if we need an update. */
+    /**@pre isComplete(). @return True if we need to request an update. */
     bool isUpdateNeeded();
     /**@pre isComplete(). @return Cluster-wide cluster ID. */
     framing::Uuid getClusterId();

Modified: qpid/trunk/qpid/cpp/src/qpid/management/ManagementAgent.cpp
URL: http://svn.apache.org/viewvc/qpid/trunk/qpid/cpp/src/qpid/management/ManagementAgent.cpp?rev=920414&r1=920413&r2=920414&view=diff
==============================================================================
--- qpid/trunk/qpid/cpp/src/qpid/management/ManagementAgent.cpp (original)
+++ qpid/trunk/qpid/cpp/src/qpid/management/ManagementAgent.cpp Mon Mar  8 17:34:09 2010
@@ -285,7 +285,7 @@
 }
 
 void ManagementAgent::clusterUpdate() {
-    // Called on all cluster memebesr when a new member joins a cluster.
+    // Called on all cluster memebers when a new member joins a cluster.
     // Set clientWasAdded so that on the next periodicProcessing we will do 
     // a full update on all cluster members.
     clientWasAdded = true;
@@ -1450,7 +1450,11 @@
     outBuf.putLong(brokerBank);
     outBuf.putLong(agentBank);
     outBuf.putShortString(routingKey);
-    connectionRef.encode(outBuf);
+    // TODO aconway 2010-03-04: we send the v2Key instead of the
+    // ObjectId because that has the same meaning on different
+    // brokers. ObjectId::encode doesn't currently encode the v2Key,
+    // this can be cleaned up when it does.
+    outBuf.putMediumString(connectionRef.getV2Key());
     mgmtObject->writeProperties(outBuf);
 }
 
@@ -1458,16 +1462,24 @@
     brokerBank = inBuf.getLong();
     agentBank = inBuf.getLong();
     inBuf.getShortString(routingKey);
-    connectionRef.decode(inBuf);
+
+    // TODO aconway 2010-03-04: see comment in encode()
+    string connectionKey;
+    inBuf.getMediumString(connectionKey);
+    connectionRef = ObjectId(); // Clear out any existing value.
+    connectionRef.setV2Key(connectionKey);
+
     mgmtObject = new _qmf::Agent(&agent, this);
     mgmtObject->readProperties(inBuf);
-    agent.addObject(mgmtObject, 0);
+    // TODO aconway 2010-03-04: see comment in encode(), readProperties doesn't set v2key.
+    mgmtObject->set_connectionRef(connectionRef);
 }
 
 uint32_t ManagementAgent::RemoteAgent::encodedSize() const {
+    // TODO aconway 2010-03-04: see comment in encode()
     return sizeof(uint32_t) + sizeof(uint32_t) // 2 x Long
         + routingKey.size() + sizeof(uint8_t) // ShortString
-        + connectionRef.encodedSize()
+        + connectionRef.getV2Key().size() + sizeof(uint16_t) // medium string
         + mgmtObject->writePropertiesSize();
 }
 
@@ -1477,25 +1489,21 @@
          i != remoteAgents.end();
          ++i)
     {
-        ObjectId id = i->first;
+        // TODO aconway 2010-03-04: see comment in ManagementAgent::RemoteAgent::encode
         RemoteAgent* agent = i->second;
-        size_t encodedSize = id.encodedSize() + agent->encodedSize();
+        size_t encodedSize = agent->encodedSize();
         size_t end = out.size();
         out.resize(end + encodedSize);
         framing::Buffer outBuf(&out[end], encodedSize);
-        id.encode(outBuf);
         agent->encode(outBuf);
     }
 }
 
 void ManagementAgent::importAgents(qpid::framing::Buffer& inBuf) {
     while (inBuf.available()) {
-        ObjectId id;
-        inBuf.checkAvailable(id.encodedSize());
-        id.decode(inBuf);
         std::auto_ptr<RemoteAgent> agent(new RemoteAgent(*this));
         agent->decode(inBuf);
-        addObject (agent->mgmtObject, 0);
+        addObject(agent->mgmtObject, 0);
         remoteAgents[agent->connectionRef] = agent.release();
     }
 }

Modified: qpid/trunk/qpid/cpp/src/tests/cluster_tests.fail
URL: http://svn.apache.org/viewvc/qpid/trunk/qpid/cpp/src/tests/cluster_tests.fail?rev=920414&r1=920413&r2=920414&view=diff
==============================================================================
--- qpid/trunk/qpid/cpp/src/tests/cluster_tests.fail (original)
+++ qpid/trunk/qpid/cpp/src/tests/cluster_tests.fail Mon Mar  8 17:34:09 2010
@@ -1,3 +1,3 @@
-cluster_tests.LongTests.test_management
+
 
 

Modified: qpid/trunk/qpid/cpp/src/tests/cluster_tests.py
URL: http://svn.apache.org/viewvc/qpid/trunk/qpid/cpp/src/tests/cluster_tests.py?rev=920414&r1=920413&r2=920414&view=diff
==============================================================================
--- qpid/trunk/qpid/cpp/src/tests/cluster_tests.py (original)
+++ qpid/trunk/qpid/cpp/src/tests/cluster_tests.py Mon Mar  8 17:34:09 2010
@@ -144,7 +144,7 @@
             i += 1
             b = cluster.start(expect=EXPECT_EXIT_FAIL)
             ErrorGenerator(b)
-            time.sleep(1)
+            time.sleep(min(5,self.duration()/2))
         sender.stop()
         receiver.stop(sender.sent)
         for i in range(i, len(cluster)): cluster[i].kill()
@@ -152,7 +152,7 @@
     def test_management(self):
         """Run management clients and other clients concurrently."""
 
-        # FIXME aconway 2010-03-03: move to framework
+        # TODO aconway 2010-03-03: move to brokertest framework
         class ClientLoop(StoppableThread):
             """Run a client executable in a loop."""
             def __init__(self, broker, cmd):
@@ -173,14 +173,21 @@
                                 self.cmd, expect=EXPECT_UNKNOWN)
                         finally: self.lock.release()
                         try: exit = self.process.wait()
-                        except: exit = 1
+                        except OSError, e:
+                            # Seems to be a race in wait(), it throws
+                            # "no such process" during test shutdown.
+                            # Doesn't indicate a test error, ignore.
+                            return
+                        except Exception, e:
+                            self.process.unexpected(
+                                "client of %s: %s"%(self.broker.name, e))
                         self.lock.acquire()
                         try:
                             # Quit and ignore errors if stopped or expecting failure.
                             if self.stopped: break
                             if exit != 0:
-                                self.process.unexpected("client of %s exit status %s" %
-                                                        (self.broker.name, exit))
+                                self.process.unexpected(
+                                    "client of %s exit code %s"%(self.broker.name, exit))
                         finally: self.lock.release()
                 except Exception, e:
                     self.error = RethrownException("Error in ClientLoop.run")
@@ -218,9 +225,7 @@
                 ["perftest", "--count", 1000,
                  "--base-name", str(qpid.datatypes.uuid4()), "--port", broker.port()],
                 ["qpid-queue-stats", "-a", "localhost:%s" %(broker.port())],
-                [os.path.join(self.rootdir, "testagent/testagent"), "localhost",
-                 str(broker.port())]
-                ]:
+                ["testagent", "localhost", str(broker.port())] ]:
                 batch.append(ClientLoop(broker, cmd))
             clients.append(batch)
 
@@ -238,7 +243,7 @@
             start_mclients(b)
 
         while time.time() < endtime:
-            time.sleep(min(5,self.duration()))
+            time.sleep(min(5,self.duration()/2))
             for b in cluster[alive:]: b.ready() # Check if a broker crashed.
             # Kill the first broker. Ignore errors on its clients and all the mclients
             for c in clients[alive] + mclients: c.expect_fail()
@@ -252,7 +257,6 @@
             b = cluster.start()
             start_clients(b)
             for b in cluster[alive:]: start_mclients(b)
-
         for c in chain(mclients, *clients):
             c.stop()
 

Modified: qpid/trunk/qpid/cpp/src/tests/test_env.sh.in
URL: http://svn.apache.org/viewvc/qpid/trunk/qpid/cpp/src/tests/test_env.sh.in?rev=920414&r1=920413&r2=920414&view=diff
==============================================================================
--- qpid/trunk/qpid/cpp/src/tests/test_env.sh.in (original)
+++ qpid/trunk/qpid/cpp/src/tests/test_env.sh.in Mon Mar  8 17:34:09 2010
@@ -54,7 +54,7 @@
 export SENDER_EXEC=$QPID_TEST_EXEC_DIR/sender
 
 # Path
-export PATH=$top_builddir/src:$builddir:$srcdir:$PYTHON_COMMANDS:$QPID_TEST_EXEC_DIR:$PATH
+export PATH=$top_builddir/src:$builddir:$builddir/testagent:$srcdir:$PYTHON_COMMANDS:$QPID_TEST_EXEC_DIR:$PATH
 
 # Modules
 export TEST_STORE_LIB=$testmoduledir/test_store.so

Modified: qpid/trunk/qpid/cpp/src/tests/testagent/Makefile.am
URL: http://svn.apache.org/viewvc/qpid/trunk/qpid/cpp/src/tests/testagent/Makefile.am?rev=920414&r1=920413&r2=920414&view=diff
==============================================================================
--- qpid/trunk/qpid/cpp/src/tests/testagent/Makefile.am (original)
+++ qpid/trunk/qpid/cpp/src/tests/testagent/Makefile.am Mon Mar  8 17:34:09 2010
@@ -43,7 +43,10 @@
 
 INCLUDES = -I$(top_srcdir)/include -I$(top_builddir)/include -I$(top_srcdir)/src -I$(top_builddir)/src -Igen
 
-noinst_PROGRAMS=testagent
+qpidexecdir = $(libexecdir)/qpid
+qpidtestdir = $(qpidexecdir)/tests
+
+qpidtest_PROGRAMS=testagent
 testagent_SOURCES=testagent.cpp $(GEN_SRC)
 testagent_LDADD=$(top_builddir)/src/libqmf.la
 

Modified: qpid/trunk/qpid/python/qpid/brokertest.py
URL: http://svn.apache.org/viewvc/qpid/trunk/qpid/python/qpid/brokertest.py?rev=920414&r1=920413&r2=920414&view=diff
==============================================================================
--- qpid/trunk/qpid/python/qpid/brokertest.py (original)
+++ qpid/trunk/qpid/python/qpid/brokertest.py Mon Mar  8 17:34:09 2010
@@ -38,11 +38,10 @@
 EXPECT_RUNNING=3           # Expect to still be running at end of test
 EXPECT_UNKNOWN=4            # No expectation, don't check exit status.
 
-def is_exe(fpath):
-    return os.path.exists(fpath) and os.access(fpath, os.X_OK)
-
 def find_exe(program):
     """Find an executable in the system PATH"""
+    def is_exe(fpath):
+        return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
     dir, name = os.path.split(program)
     if dir:
         if is_exe(program): return program
@@ -144,13 +143,13 @@
         expect - if set verify expectation at end of test.
         drain  - if true (default) drain stdout/stderr to files.
         """
-        assert find_exe(cmd[0])
+        assert find_exe(cmd[0]), "executable not found: "+cmd[0]
         if type(cmd) is type(""): cmd = [cmd] # Make it a list.
         self.cmd  = [ str(x) for x in cmd ]
         popen2.Popen3.__init__(self, self.cmd, True)
         self.expect = expect
         self.was_shutdown = False # Set if we deliberately kill/terminate the process
-        self.pname = "%s-%d" % (os.path.split(self.cmd[0])[-1], self.pid)
+        self.pname = "%s-%d" % (os.path.split(self.cmd[0])[1], self.pid)
         msg = "Process %s" % self.pname
         self.stdin = ExceptionWrapper(self.tochild, msg)
         self.stdout = Popen.OutStream(self.fromchild, self.outfile("out"), msg)
@@ -353,7 +352,6 @@
             return False
         finally: f.close()
 
-    # FIXME aconway 2010-03-02: rename to wait_ready
     def ready(self):
         """Wait till broker is ready to serve clients"""
         # First make sure the broker is listening by checking the log.
@@ -361,7 +359,7 @@
             raise Exception("Timed out waiting for broker %s" % self.name)
         # Make a connection, this will wait for extended cluster init to finish.
         try: self.connect().close()
-        except: raise RethrownException("Broker %s failed ready test %s"%self.name)
+        except: raise RethrownException("Broker %s failed ready test"%self.name)
 
 class Cluster:
     """A cluster of brokers in a test."""
@@ -427,6 +425,7 @@
         for p in self.stopem:
             try: p.stop()
             except Exception, e: err.append(str(e))
+
         if err: raise Exception("Unexpected process status:\n    "+"\n    ".join(err))
 
     def cleanup_stop(self, stopable):



---------------------------------------------------------------------
Apache Qpid - AMQP Messaging Implementation
Project:      http://qpid.apache.org
Use/Interact: mailto:commits-subscribe@qpid.apache.org