You are viewing a plain text version of this content. The canonical link for it is here.

Posted to dev@httpd.apache.org by Aaron Bannert <aa...@clove.org> on 2001/08/28 02:09:01 UTC

[PATCH] worker MPM: reuse transaction pools

This patch implements a resource pool of context pools -- a queue of
available pools that the listener thread can pull from when accepting
a request. The worker thread that picks up that request then uses
that pool for the lifetime of that transaction, clear()ing the pool
and releasing it back to what I'm calling the "pool_queue" (har har).
This replaces the prior implementation that would create and destroy
a transaction pool for each and every request.

I'm seeing a small performance improvement with this patch, but I suspect
the fd_queue code could be improved for better parallelism. I also
suspect that with better testing this algorithm may prove more scalable.

-aaron


Index: worker.c
===================================================================
RCS file: /home/cvspublic/httpd-2.0/server/mpm/worker/worker.c,v
retrieving revision 1.17
diff -u -r1.17 worker.c
--- worker.c	2001/08/26 01:24:41	1.17
+++ worker.c	2001/08/28 00:00:03
@@ -125,6 +125,7 @@
 static int num_listensocks = 0;
 static apr_socket_t **listensocks;
 static fd_queue_t *worker_queue;
+static fd_queue_t *pool_queue; /* a resource pool of context pools */
 
 /* The structure used to pass unique initialization info to each thread */
 typedef struct {
@@ -203,6 +204,7 @@
     /* XXX: This will happen naturally on a graceful, and we don't care otherwise.
     ap_queue_signal_all_wakeup(worker_queue); */
     ap_queue_interrupt_all(worker_queue);
+    ap_queue_interrupt_all(pool_queue);
 }
 
 AP_DECLARE(apr_status_t) ap_mpm_query(int query_code, int *result)
@@ -556,6 +558,7 @@
     int thread_slot = ti->tid;
     apr_pool_t *tpool = apr_thread_pool_get(thd);
     apr_socket_t *csd = NULL;
+    apr_socket_t *dummycsd = NULL;
     apr_pool_t *ptrans;		/* Pool for per-transaction stuff */
     apr_socket_t *sd = NULL;
     int n;
@@ -641,9 +644,20 @@
         }
     got_fd:
         if (!workers_may_exit) {
-            /* create a new transaction pool for each accepted socket */
-            apr_pool_create(&ptrans, tpool);
 
+            /* pull the next available transaction pool from the queue */
+            if ((rv = ap_queue_pop(pool_queue, &dummycsd, &ptrans))
+                != FD_QUEUE_SUCCESS) {
+                if (rv == FD_QUEUE_EINTR) {
+                    goto got_fd;
+                }
+                else { /* got some error in the queue */
+                    csd = NULL;
+                    ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf, 
+                        "ap_queue_pop");
+                }
+            }
+
             if ((rv = apr_accept(&csd, sd, ptrans)) != APR_SUCCESS) {
                 csd = NULL;
                 ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf, 
@@ -678,9 +692,7 @@
     ap_scoreboard_image->parent[process_slot].quiescing = 1;
     kill(ap_my_pid, SIGTERM);
 
-/* this is uncommented when we make a pool-pool
     apr_thread_exit(thd, APR_SUCCESS);
-*/
     return NULL;
 }
 
@@ -695,8 +707,6 @@
 
     free(ti);
 
-    /* apr_pool_create(&ptrans, tpool); */
-
     while (!workers_may_exit) {
         rv = ap_queue_pop(worker_queue, &csd, &ptrans);
         /* We get FD_QUEUE_EINTR whenever ap_queue_pop() has been interrupted
@@ -708,11 +718,16 @@
         }
         process_socket(ptrans, csd, process_slot, thread_slot);
         requests_this_child--; /* FIXME: should be synchronized - aaron */
-        apr_pool_destroy(ptrans);
+
+        /* get this transaction pool ready for the next request */
+        apr_pool_clear(ptrans);
+        /* don't bother checking if we were interrupted in ap_queue_push,
+         * because we're going to check workers_may_exit right now anyway. */
+        ap_queue_push(pool_queue, NULL, ptrans);
     }
 
-    ap_update_child_status(process_slot, thread_slot, (dying) ? SERVER_DEAD : SERVER_GRACEFUL,
-        (request_rec *) NULL);
+    ap_update_child_status(process_slot, thread_slot,
+        (dying) ? SERVER_DEAD : SERVER_GRACEFUL, (request_rec *) NULL);
     apr_lock_acquire(worker_thread_count_mutex);
     worker_thread_count--;
     apr_lock_release(worker_thread_count_mutex);
@@ -731,7 +746,7 @@
     return 0;
 }
 
-static void *start_threads(apr_thread_t *thd, void * dummy)
+static void *start_threads(apr_thread_t *thd, void *dummy)
 {
     thread_starter *ts = dummy;
     apr_thread_t **threads = ts->threads;
@@ -743,12 +758,24 @@
     int i = 0;
     int threads_created = 0;
     apr_thread_t *listener;
+    apr_pool_t *ptrans;
+    apr_socket_t *dummycsd = NULL;
 
-    /* We must create the fd queue before we start up the listener
+    /* We must create the fd queues before we start up the listener
      * and worker threads. */
-    worker_queue = apr_pcalloc(pchild, sizeof(*worker_queue));
+    worker_queue = apr_pcalloc(pchild, sizeof(fd_queue_t));
     ap_queue_init(worker_queue, ap_threads_per_child, pchild);
 
+    /* create the resource pool of available transaction pools */
+    pool_queue = apr_pcalloc(pchild, sizeof(fd_queue_t));
+    ap_queue_init(pool_queue, ap_threads_per_child, pchild);
+    /* fill the pool_queue with real pools */
+    for (i = 0; i < ap_threads_per_child; i++) {
+        ptrans = NULL;
+        apr_pool_create(&ptrans, pchild);
+        ap_queue_push(pool_queue, dummycsd, ptrans);
+    }
+
     my_info = (proc_info *)malloc(sizeof(proc_info));
     my_info->pid = my_child_num;
     my_info->tid = i;
@@ -838,7 +865,7 @@
 
     ap_run_child_init(pchild, ap_server_conf);
 
-    /*done with init critical section */
+    /* done with init critical section */
 
     rv = apr_setup_signal_thread();
     if (rv != APR_SUCCESS) {

Re: [PATCH] worker MPM: reuse transaction pools

Posted by dean gaudet <de...@arctic.org>.

On Tue, 28 Aug 2001, Ryan Bloom wrote:

> Creating a pool requires locking a mutex.  The more we scale, the worse we
> perform.

a mutex is not a requirement... as i explained several months ago.  (and
as greg mentioned, this patch just moves the mutex to the queue from the
pool code, which makes it even more questionable.)

On Tue, 28 Aug 2001, Aaron Bannert wrote:

> Honestly, I can't give you any quantative results right now, as I don't
> have a very good load-testing environment set up. By "small" I mean, using
> 'ab' with various levels of concurrency showed a possible improvement (on
> my single CPU machine), definately no loss of efficiency. If anyone out
> there could give me some results from before and after on some MP machine
> (4way or more preferably) then that would be very useful.

generally loopback performance measurements should be considered suspect,
and should not be considered justification for commits, especially when
the commit increases code complexity.

-dean

Re: [PATCH] worker MPM: reuse transaction pools

Posted by Aaron Bannert <aa...@clove.org>.

On Tue, Aug 28, 2001 at 12:17:03PM -0700, Greg Stein wrote:
> On Mon, Aug 27, 2001 at 05:09:01PM -0700, Aaron Bannert wrote:
> > This patch implements a resource pool of context pools -- a queue of
> > available pools that the listener thread can pull from when accepting
> > a request. The worker thread that picks up that request then uses
> > that pool for the lifetime of that transaction, clear()ing the pool
> > and releasing it back to what I'm calling the "pool_queue" (har har).
> > This replaces the prior implementation that would create and destroy
> > a transaction pool for each and every request.
> > 
> > I'm seeing a small performance improvement with this patch, but I suspect
> > the fd_queue code could be improved for better parallelism. I also
> > suspect that with better testing this algorithm may prove more scalable.
> 
> What does "small" mean?
> 
> I can't believe it is all that large. Pool construction/destruction is
> actually quite fast. The bulk of the time is clearing the pool, which you
> must do anyways. I don't see how a pool queue can provide any benefit.
> 
> IOW, why should this complexity be added? Just how much does it improve
> things, and are you testing on a single or multi processor machine?
> 
> Cheers,
> -g
> 
> p.s and yes, I know Ryan just applied it, but that doesn't mean it should
> stay there :-)

Honestly, I can't give you any quantative results right now, as I don't
have a very good load-testing environment set up. By "small" I mean, using
'ab' with various levels of concurrency showed a possible improvement (on
my single CPU machine), definately no loss of efficiency. If anyone out
there could give me some results from before and after on some MP machine
(4way or more preferably) then that would be very useful.

I have an alternative that I've been working on. It's basicly a thread pool
where N threads are created and stuffed into some queue. Each element in
the queue contains: a mutex and condition variable, some state variable
(and int), a pointer to an apr_socket_t, and a pool. As the listener
prepares to accept a waiting request, it pops an element off the queue,
uses that pool to do the accept, sets the socket and signals the condition.
That thread then takes off, handles the request, clear()s the pool, and
returns itself back to the queue. (As soon as the queue is empty, the listener
can block until another element becomes available).

The benefits of this scheme over the current are:
1) less of the code is within critical sections, so more parallelism
2) less contention on the mutex that does the conditions, so more scalable
3) we get to keep the benefits of reusable transaction pools (which might
   be able to be optimized further with an SMS algorithm tuned specifically
   to the typical blocks needed for a single HTTP request transaction)

Time permitting, I will try to post a patch illustrating this later today.

-aaron

Re: [PATCH] worker MPM: reuse transaction pools

Posted by Greg Stein <gs...@lyra.org>.

On Tue, Aug 28, 2001 at 01:01:11PM -0700, Ryan Bloom wrote:
> On Tuesday 28 August 2001 12:17, Greg Stein wrote:
> > On Mon, Aug 27, 2001 at 05:09:01PM -0700, Aaron Bannert wrote:
> > > This patch implements a resource pool of context pools -- a queue of
>...
> > IOW, why should this complexity be added? Just how much does it improve
> > things, and are you testing on a single or multi processor machine?
> 
> Creating a pool requires locking a mutex.  The more we scale, the worse we
> perform.

And manipulating the pool queue also requires a mutex, right? You can't just
let people arbitrarily hit that queue. Locking is needed.

Thus... why have the complexity of a queue? I don't see where the saving
happens. We still have locks, we still have pool clearing, etc.

Cheers,
-g

-- 
Greg Stein, http://www.lyra.org/

Re: [PATCH] worker MPM: reuse transaction pools

Posted by Ryan Bloom <rb...@covalent.net>.

On Tuesday 28 August 2001 12:17, Greg Stein wrote:
> On Mon, Aug 27, 2001 at 05:09:01PM -0700, Aaron Bannert wrote:
> > This patch implements a resource pool of context pools -- a queue of
> > available pools that the listener thread can pull from when accepting
> > a request. The worker thread that picks up that request then uses
> > that pool for the lifetime of that transaction, clear()ing the pool
> > and releasing it back to what I'm calling the "pool_queue" (har har).
> > This replaces the prior implementation that would create and destroy
> > a transaction pool for each and every request.
> >
> > I'm seeing a small performance improvement with this patch, but I suspect
> > the fd_queue code could be improved for better parallelism. I also
> > suspect that with better testing this algorithm may prove more scalable.
>
> What does "small" mean?
>
> I can't believe it is all that large. Pool construction/destruction is
> actually quite fast. The bulk of the time is clearing the pool, which you
> must do anyways. I don't see how a pool queue can provide any benefit.
>
> IOW, why should this complexity be added? Just how much does it improve
> things, and are you testing on a single or multi processor machine?

Creating a pool requires locking a mutex.  The more we scale, the worse we
perform.

Ryan

______________________________________________________________
Ryan Bloom				rbb@apache.org
Covalent Technologies			rbb@covalent.net
--------------------------------------------------------------

Re: [PATCH] worker MPM: reuse transaction pools

Posted by Greg Stein <gs...@lyra.org>.

On Mon, Aug 27, 2001 at 05:09:01PM -0700, Aaron Bannert wrote:
> This patch implements a resource pool of context pools -- a queue of
> available pools that the listener thread can pull from when accepting
> a request. The worker thread that picks up that request then uses
> that pool for the lifetime of that transaction, clear()ing the pool
> and releasing it back to what I'm calling the "pool_queue" (har har).
> This replaces the prior implementation that would create and destroy
> a transaction pool for each and every request.
> 
> I'm seeing a small performance improvement with this patch, but I suspect
> the fd_queue code could be improved for better parallelism. I also
> suspect that with better testing this algorithm may prove more scalable.

What does "small" mean?

I can't believe it is all that large. Pool construction/destruction is
actually quite fast. The bulk of the time is clearing the pool, which you
must do anyways. I don't see how a pool queue can provide any benefit.

IOW, why should this complexity be added? Just how much does it improve
things, and are you testing on a single or multi processor machine?

Cheers,
-g

p.s and yes, I know Ryan just applied it, but that doesn't mean it should
stay there :-)

-- 
Greg Stein, http://www.lyra.org/

Re: [PATCH] worker MPM: reuse transaction pools

Posted by Ryan Bloom <rb...@covalent.net>.

On Tuesday 28 August 2001 07:04, Jeff Trawick wrote:
> Ryan Bloom <rb...@covalent.net> writes:
> > On Tuesday 28 August 2001 04:03, Jeff Trawick wrote:
> > > Aaron Bannert <aa...@clove.org> writes:
> > > > This patch implements a resource pool of context pools -- a queue of
> > > > available pools that the listener thread can pull from when accepting
> > > > a request. The worker thread that picks up that request then uses
> > > > that pool for the lifetime of that transaction, clear()ing the pool
> > > > and releasing it back to what I'm calling the "pool_queue" (har har).
> > > > This replaces the prior implementation that would create and destroy
> > > > a transaction pool for each and every request.
> > >
> > > (surely I'm missing something here... need to brew some coffee...)
> > >
> > > It seems that you allocate a ptrans for every thread but rather than
> > > store it in some thread-specific data you treat it like a shared
> > > resource, storing it in a queue, which introduces serialization and
> > > other overhead.
> > >
> > > If every thread has a ptrans, which it should, this overhead is
> > > unnecessary; the ptrans shouldn't be held in a shared data structure.
> >
> > How do you determine which ptrans to use unless it is a shared
> > resource?  The ptrans needs to be accessible by the listener to call
> > accept with.  If it is local to the worker thread, then you can't use it
> > in the listener.
>
> Yes, that is true...  I didn't even remember the code in my own MPM
> which has an accept thread and does essentially the same thing.

I'm likely to apply this when I get to the office in about an hour.

Ryan

______________________________________________________________
Ryan Bloom				rbb@apache.org
Covalent Technologies			rbb@covalent.net
--------------------------------------------------------------

Re: [PATCH] worker MPM: reuse transaction pools

Posted by Jeff Trawick <tr...@attglobal.net>.

Ryan Bloom <rb...@covalent.net> writes:

> On Tuesday 28 August 2001 04:03, Jeff Trawick wrote:
> > Aaron Bannert <aa...@clove.org> writes:
> > > This patch implements a resource pool of context pools -- a queue of
> > > available pools that the listener thread can pull from when accepting
> > > a request. The worker thread that picks up that request then uses
> > > that pool for the lifetime of that transaction, clear()ing the pool
> > > and releasing it back to what I'm calling the "pool_queue" (har har).
> > > This replaces the prior implementation that would create and destroy
> > > a transaction pool for each and every request.
> >
> > (surely I'm missing something here... need to brew some coffee...)
> >
> > It seems that you allocate a ptrans for every thread but rather than
> > store it in some thread-specific data you treat it like a shared
> > resource, storing it in a queue, which introduces serialization and
> > other overhead.
> >
> > If every thread has a ptrans, which it should, this overhead is
> > unnecessary; the ptrans shouldn't be held in a shared data structure.
> 
> How do you determine which ptrans to use unless it is a shared
> resource?  The ptrans needs to be accessible by the listener to call
> accept with.  If it is local to the worker thread, then you can't use it
> in the listener.

Yes, that is true...  I didn't even remember the code in my own MPM
which has an accept thread and does essentially the same thing.

-- 
Jeff Trawick | trawick@attglobal.net | PGP public key at web site:
       http://www.geocities.com/SiliconValley/Park/9289/
             Born in Roswell... married an alien...

Re: [PATCH] worker MPM: reuse transaction pools

Posted by Ryan Bloom <rb...@covalent.net>.

On Tuesday 28 August 2001 04:03, Jeff Trawick wrote:
> Aaron Bannert <aa...@clove.org> writes:
> > This patch implements a resource pool of context pools -- a queue of
> > available pools that the listener thread can pull from when accepting
> > a request. The worker thread that picks up that request then uses
> > that pool for the lifetime of that transaction, clear()ing the pool
> > and releasing it back to what I'm calling the "pool_queue" (har har).
> > This replaces the prior implementation that would create and destroy
> > a transaction pool for each and every request.
>
> (surely I'm missing something here... need to brew some coffee...)
>
> It seems that you allocate a ptrans for every thread but rather than
> store it in some thread-specific data you treat it like a shared
> resource, storing it in a queue, which introduces serialization and
> other overhead.
>
> If every thread has a ptrans, which it should, this overhead is
> unnecessary; the ptrans shouldn't be held in a shared data structure.

How do you determine which ptrans to use unless it is a shared
resource?  The ptrans needs to be accessible by the listener to call
accept with.  If it is local to the worker thread, then you can't use it
in the listener.

BTW, +1 for the patch.

Ryan

______________________________________________________________
Ryan Bloom				rbb@apache.org
Covalent Technologies			rbb@covalent.net
--------------------------------------------------------------

Re: [PATCH] worker MPM: reuse transaction pools

Posted by Jeff Trawick <tr...@attglobal.net>.

Aaron Bannert <aa...@clove.org> writes:

> This patch implements a resource pool of context pools -- a queue of
> available pools that the listener thread can pull from when accepting
> a request. The worker thread that picks up that request then uses
> that pool for the lifetime of that transaction, clear()ing the pool
> and releasing it back to what I'm calling the "pool_queue" (har har).
> This replaces the prior implementation that would create and destroy
> a transaction pool for each and every request.

(surely I'm missing something here... need to brew some coffee...)

It seems that you allocate a ptrans for every thread but rather than
store it in some thread-specific data you treat it like a shared
resource, storing it in a queue, which introduces serialization and
other overhead.

If every thread has a ptrans, which it should, this overhead is
unnecessary; the ptrans shouldn't be held in a shared data structure.

-- 
Jeff Trawick | trawick@attglobal.net | PGP public key at web site:
       http://www.geocities.com/SiliconValley/Park/9289/
             Born in Roswell... married an alien...

Re: [PATCH] worker MPM: reuse transaction pools

Posted by Aaron Bannert <aa...@clove.org>.

On Mon, Sep 10, 2001 at 03:13:48PM -0700, Brian Pane wrote:
> I suppose there are a total of four candidate designs, right?  It looks
> like you're varying two independent dimensions:
>   * condition variable strategy: one CV, or one per thread
>   * pool strategy: worker-managed, or listener-managed.
> so there are four possible permutations.  (The other two
> permutations would look like hybrids of "short and sweet" and
> "time-space tradeoff."  I don't know whether they're interesting
> algorithms or not, but it's worth noting that these hybrids are
> a possibility if you find in testing that neither of the first
> two designs stands out as the clear winner.)
> In the "pool strategy" dimension, I think a worker-managed pool design (as
> used in "time-space tradeoff") is a better choice than a 
> listener-managed one.
> With worker-managed, you can clear and re-use the pool, which is cheaper 
> than
> destroying and recreating it.  And it's very easy to eliminate locking for
> subpool creation by adding a thread-private free list (you could dig up my
> old patch for this, or Sander's patch which is a more general-purpose 
> solution).
> 
> In the "CV strategy" dimension, I'll wait for benchmark results before I try
> to guess which approach is fastest. :-)

Actually, the two variants you describe here are not entirely distinct. In
both of my above models there is a global CV for managing the worker
queue. You are correct about the pool strategy, but that is pretty much
the only variant.

In order to reuse pools, we have to have a way for the listener to
get ahold of the transaction pool before it calls the accept. The
product of accept() is the socket, which must then make it back to the
appropriate worker thread, and that worker must somehow be awakened to
begin processing of the accepted socket.

The "short and sweet" model is much more a producer-consumer: the listener
thread produces (pool, socket) pairs that are then consumed by the worker
threads.

The "time-space tradeoff" model implements a resource-pool of
waiting worker threads; the listener plucks out the next available
worker thread, calls accept() on it's behalf, and sends it on it's merry
way.

I'm beginning to realize that I will end up comparing the overhead of
creating/destroying a transaction pool once per request ("short and sweet"),
and the overhead of managing that transaction pool in the reusable case
with the added overhead of an extra CV/mutex per thread
("time-space tradeoff").

(Hope that makes more sense to you, it definately helped me :)
-aaron

Re: [PATCH] worker MPM: reuse transaction pools

Posted by Brian Pane <bp...@pacbell.net>.

Aaron Bannert wrote:

[...]

>1) "short and sweet"
>
> - single listener
>   - creates a new transaction pool
>   - uses that pool for the next accept()
>   - push()es the newly accepted socket and pool on the fd_queue
>
> - multiple workers
>   - waiting in pop() on the fd_queue
>   - performs process_socket() with the socket and pool received from pop()
>   - destroys the pool
>
> Notes: this is almost identical to what was in CVS before the patch
>        discussed below. The only change I would make would be to
>        remove one of the two condition variables.
>
>2) "time-space tradeoff"
>
> - single listener
>   - pop()s the next available worker-thread
>   - uses the pool from this worker-thread to make the call to accept()
>   - signals a condition in that worker-thread after the accept()
>
> - multiple workers
>   - creates a transaction pool
>     - push()es thread info (pool, socket-pointer, etc)
>     - waits on signal from listener
>     - (check that listener woke us up)
>     - perform process_socket() with the socket set from the listener
>     - clear the pool
>
I suppose there are a total of four candidate designs, right?  It looks
like you're varying two independent dimensions:
  * condition variable strategy: one CV, or one per thread
  * pool strategy: worker-managed, or listener-managed.
so there are four possible permutations.  (The other two
permutations would look like hybrids of "short and sweet" and
"time-space tradeoff."  I don't know whether they're interesting
algorithms or not, but it's worth noting that these hybrids are
a possibility if you find in testing that neither of the first
two designs stands out as the clear winner.)
In the "pool strategy" dimension, I think a worker-managed pool design (as
used in "time-space tradeoff") is a better choice than a 
listener-managed one.
With worker-managed, you can clear and re-use the pool, which is cheaper 
than
destroying and recreating it.  And it's very easy to eliminate locking for
subpool creation by adding a thread-private free list (you could dig up my
old patch for this, or Sander's patch which is a more general-purpose 
solution).

In the "CV strategy" dimension, I'll wait for benchmark results before I try
to guess which approach is fastest. :-)

--Brian

Re: [PATCH] worker MPM: reuse transaction pools

Posted by Aaron Bannert <aa...@clove.org>.

The patch in question was nothing more than an attempt to further improve
worker, and was perhaps a little premature. I have, however, spent some
time developing two possible alternative implementations of the worker
MPM in addition to what is currently in CVS (which means that I don't
expect what's in there to stay for much longer). I am hoping to gain
access to some SMP boxes to test out my theories, but until then I will
describe the two scenarios here (and perhaps also prepare patches for
everyone to run on their favorite SMP box at their leisure):

1) "short and sweet"

 - single listener
   - creates a new transaction pool
   - uses that pool for the next accept()
   - push()es the newly accepted socket and pool on the fd_queue

 - multiple workers
   - waiting in pop() on the fd_queue
   - performs process_socket() with the socket and pool received from pop()
   - destroys the pool

 Notes: this is almost identical to what was in CVS before the patch
        discussed below. The only change I would make would be to
        remove one of the two condition variables.

2) "time-space tradeoff"

 - single listener
   - pop()s the next available worker-thread
   - uses the pool from this worker-thread to make the call to accept()
   - signals a condition in that worker-thread after the accept()

 - multiple workers
   - creates a transaction pool
     - push()es thread info (pool, socket-pointer, etc)
     - waits on signal from listener
     - (check that listener woke us up)
     - perform process_socket() with the socket set from the listener
     - clear the pool

 Notes: This adds some complication to the fd_queue code, but it removes
        most of the complication that was a problem in the below patch.
        My implementation was able to remove much of the arithmetic from
        the critical sections of the fd_queue, possibly decreasing contention
        in this part of the code (especially good for scalability). From
        what we have in CVS, it brings us from 4 shared condition variables
        down to 1, and adds a new CV to each thread (sort of like a
        time-space tradeoff). Your note about LIFO vs. FIFO is noted, and
        I'll implement a minor modification to test out this theory.


I had hoped that I would be able to test out these two implementations on
some big hardware before posting such lengthy design descriptions to the
list, but if it would interest others I would be willing to prepare a patch
illustrating the two above designs. I welcome any critical disection of
my above designs.

-aaron


On Mon, Sep 10, 2001 at 01:30:03PM -0700, dean gaudet wrote:
> this is the wrong way to fix this problem.
> 
> i can't imagine any reason why creating a pool should be slow --
> rather than band-aid around it, i think it'd be better to find out that
> problem first.  it should be as simple as a couple pointer operations.
> 
> freelists are a feature of modern memory allocators -- including per-cpu
> optimisations.  the right fix is probably to start relying on libc more.
> (see apr-dev threads a few months ago where i was advocating getting rid
> of apr freelists entirely.)
> 
> fwiw -- freelist implementations are almost always better off LIFO rather
> than FIFO.  basically the most recently freed object is more likely to be
> mapped in the TLB and have valid cache lines.  older objects will incur
> TLB and cache misses.
> 
> you can take these comments as a veto, but i know the patch has been
> committed already.
> 
> -dean
> 
> On Mon, 27 Aug 2001, Aaron Bannert wrote:
> 
> > This patch implements a resource pool of context pools -- a queue of
> > available pools that the listener thread can pull from when accepting
> > a request. The worker thread that picks up that request then uses
> > that pool for the lifetime of that transaction, clear()ing the pool
> > and releasing it back to what I'm calling the "pool_queue" (har har).
> > This replaces the prior implementation that would create and destroy
> > a transaction pool for each and every request.
> >
> > I'm seeing a small performance improvement with this patch, but I suspect
> > the fd_queue code could be improved for better parallelism. I also
> > suspect that with better testing this algorithm may prove more scalable.
> >
> > -aaron

Re: [PATCH] worker MPM: reuse transaction pools

Posted by dean gaudet <de...@arctic.org>.

this is the wrong way to fix this problem.

i can't imagine any reason why creating a pool should be slow --
rather than band-aid around it, i think it'd be better to find out that
problem first.  it should be as simple as a couple pointer operations.

freelists are a feature of modern memory allocators -- including per-cpu
optimisations.  the right fix is probably to start relying on libc more.
(see apr-dev threads a few months ago where i was advocating getting rid
of apr freelists entirely.)

fwiw -- freelist implementations are almost always better off LIFO rather
than FIFO.  basically the most recently freed object is more likely to be
mapped in the TLB and have valid cache lines.  older objects will incur
TLB and cache misses.

you can take these comments as a veto, but i know the patch has been
committed already.

-dean

On Mon, 27 Aug 2001, Aaron Bannert wrote:

> This patch implements a resource pool of context pools -- a queue of
> available pools that the listener thread can pull from when accepting
> a request. The worker thread that picks up that request then uses
> that pool for the lifetime of that transaction, clear()ing the pool
> and releasing it back to what I'm calling the "pool_queue" (har har).
> This replaces the prior implementation that would create and destroy
> a transaction pool for each and every request.
>
> I'm seeing a small performance improvement with this patch, but I suspect
> the fd_queue code could be improved for better parallelism. I also
> suspect that with better testing this algorithm may prove more scalable.
>
> -aaron