You are viewing a plain text version of this content. The canonical link for it is here.
Posted to users@subversion.apache.org by Nix <ni...@esperi.org.uk> on 2005/07/24 16:35:00 UTC

Crash of svn-1.2.x in svn_ra_open()

I've seen this as long as I've been using 1.2.x: as you might imagine, a
crash in this routine is pretty disruptive :( notably, it stops me
doing remote updates of DAV sources, like, well, subversion's. :)

Here's a backtrace and locals dump:

amaterasu 477 /usr/packages/subversion/subversion% gdb --args /usr/bin/.svn ls http://svn.collab.net/repos/svn/tags/1.2.1
GNU gdb 6.3
Copyright 2004 Free Software Foundation, Inc.
GDB is free software, covered by the GNU General Public License, and you are
welcome to change it and/or distribute copies of it under certain conditions.
Type "show copying" to see the conditions.
There is absolutely no warranty for GDB.  Type "show warranty" for details.
This GDB was configured as "sparc-unknown-linux-gnu"...Using host libthread_db library "/lib/libthread_db.so.1".

(gdb) run
Starting program: /usr/bin/.svn ls http://svn.collab.net/repos/svn/tags/1.2.1
[Thread debugging using libthread_db enabled]
[New Thread 16384 (LWP 28846)]

Program received signal SIGSEGV, Segmentation fault.
[Switching to Thread 16384 (LWP 28846)]
0x7024495c in memcmp () from /lib/libc.so.6
(gdb) bt
#0  0x7024495c in memcmp () from /lib/libc.so.6
#1  0x70125968 in find_entry (ht=0x41488, key=0x70615a78, klen=15, val=0x40fa0) at tables/apr_hash.c:260
#2  0x70125b38 in apr_hash_set (ht=0x41488, key=0x70615a78, klen=-1, val=0x40fa0) at tables/apr_hash.c:338
#3  0x706102d4 in svn_ra_dav__open (session=0x72ca8, repos_URL=0x72bd8 "http://svn.collab.net/repos/svn/tags/1.2.1", callbacks=0x72c18, callback_baton=0x72c30, config=0x400, pool=0x714b8)
    at subversion/libsvn_ra_dav/session.c:710
#4  0x7009d620 in svn_ra_open (session_p=0xeff1cdd8, repos_URL=0x72bd8 "http://svn.collab.net/repos/svn/tags/1.2.1", callbacks=0x72c18, callback_baton=0x72c30, config=0x40eb0,
    pool=0x714b8) at subversion/libsvn_ra/ra_loader.c:277
#5  0x7004a03c in svn_client__open_ra_session (ra_session=0xeff1cdd8, base_url=0x72bd8 "http://svn.collab.net/repos/svn/tags/1.2.1", base_dir=0x0, base_access=0x0, commit_items=0x0,
    use_admin=0, read_only_wc=0, ctx=0x40e88, pool=0x714b8) at subversion/libsvn_client/ra.c:288
#6  0x7004af10 in svn_client__ra_session_from_path (ra_session_p=0xeff1ceb4, rev_p=0xeff1ceb0, url_p=0xeff1ceac, path_or_url=0x714f0 "http://svn.collab.net/repos/svn/tags/1.2.1",
    peg_revision_p=0xeff1cfa8, revision=0xeff1d1b0, ctx=0x72c08, pool=0x714b8) at subversion/libsvn_client/ra.c:887
#7  0x70047f98 in svn_client_ls2 (dirents=0xeff1cf50, path_or_url=0x714f0 "http://svn.collab.net/repos/svn/tags/1.2.1", peg_revision=0xeff1cfa8, revision=0xeff1d1b0, recurse=0,
    ctx=0x40e88, pool=0x714b8) at subversion/libsvn_client/ls.c:88
#8  0x000163fc in svn_cl__ls (os=0x40ab8, baton=0xeff1d0e0, pool=0x40a80) at subversion/clients/cmdline/ls-cmd.c:325
#9  0x00017d2c in main (argc=93184, argv=0x25000) at subversion/clients/cmdline/main.c:1449
(gdb) frame 1
#1  0x70125968 in find_entry (ht=0x41488, key=0x70615a78, klen=15, val=0x40fa0) at tables/apr_hash.c:260
260             if (he->hash == hash
(gdb) info locals
hep = (apr_hash_entry_t **) 0x414bc
he = (apr_hash_entry_t *) 0x41828
hash = 1584920659
(gdb) list
255         hash = ht->hash_func(key, &klen);
256
257         /* scan linked list */
258         for (hep = &ht->array[hash & ht->max], he = *hep;
259              he; hep = &he->next, he = *hep) {
260             if (he->hash == hash
261                 && he->klen == klen
262                 && memcmp(he->key, key, klen) == 0)
263                 break;
264         }
(gdb) print (char *)key
$2 = 0x70615a78 "svn:auth:config"
(gdb) print (char *)he->key
$3 = 0x705fda78 <Address 0x705fda78 out of bounds>
(gdb)
(gdb) frame 2
#2  0x70125b38 in apr_hash_set (ht=0x41488, key=0x70615a78, klen=-1, val=0x40fa0) at tables/apr_hash.c:338
338         hep = find_entry(ht, key, klen, val);
(gdb) list
333                                    const void *key,
334                                    apr_ssize_t klen,
335                                    const void *val)
336     {
337         apr_hash_entry_t **hep;
338         hep = find_entry(ht, key, klen, val);
339         if (*hep) {
340             if (!val) {
341                 /* delete entry */
342                 apr_hash_entry_t *old = *hep;
(gdb) frame 3
#3  0x706102d4 in svn_ra_dav__open (session=0x72ca8, repos_URL=0x72bd8 "http://svn.collab.net/repos/svn/tags/1.2.1", callbacks=0x72c18, callback_baton=0x72c30, config=0x400, pool=0x714b8)
    at subversion/libsvn_ra_dav/session.c:710
710       svn_auth_set_parameter(ras->callbacks->auth_baton,
(gdb) list
705       ras->sess2 = sess2;
706       ras->callbacks = callbacks;
707       ras->callback_baton = callback_baton;
708       ras->compression = compression;
709       /* save config and server group in the auth parameter hash */
710       svn_auth_set_parameter(ras->callbacks->auth_baton,
711                              SVN_AUTH_PARAM_CONFIG, cfg);
712       svn_auth_set_parameter(ras->callbacks->auth_baton,
713                              SVN_AUTH_PARAM_SERVER_GROUP, server_group);
714
(gdb)

(GCC-3.4.4 has inlined the svn_auth_set_parameter() call into its caller, so
it doesn't appear in the backtrace.)


I've seen this both with apr-1.0.1/apr-util 1.0.1 and with
apr-1.1.1/apr-util-1.1.2.

I'm trying to hunt down where this hash element is added, but it'd be
useful if I knew if anyone had seen this already.

-- 
`But of course, GR is the very best relativity for the masses.'
 --- Wayne Throop

---------------------------------------------------------------------
To unsubscribe, e-mail: users-unsubscribe@subversion.tigris.org
For additional commands, e-mail: users-help@subversion.tigris.org

Re: [PATCH] RA sessions should always be loaded from the same pool

Posted by "C. Michael Pilato" <cm...@collab.net>.
Philip Martin <ph...@codematters.co.uk> writes:

> The auth design generated *huge* threads on the dev list, I certainly
> gave up attempting to follow the discussion.  I never really
> understood why we chose to use a hash to pass parameters.

The idea was that new auth implementations could use that hash as some
kind of type-indifferent global storehouse.  In retrospect, the idea
is garbage, and has cause no small amount of annoyance for the Python
bindings which have to try to translate "potentially-unknown-C-type"
into a Python object and back again.

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
For additional commands, e-mail: dev-help@subversion.tigris.org

Re: [PATCH] RA sessions should always be loaded from the same pool

Posted by Philip Martin <ph...@codematters.co.uk>.
Nix <ni...@esperi.org.uk> writes:

> (I still wonder how this ever works on anyone's machine; perhaps a lot
> of OSes tend to put repeatedly dlopen()ed libraries at the same place
> every time?)

I suspect very few people use --enable-dso.

> I'm not sure if the underlying bug here is that the auth baton is
> persisting for too long, or that the DAV library's getting unloaded when
> it shouldn't be (why should a `svn ls' unload it only to load it again?)
> or that the bug is that we should never use string literals in
> dlopen()ed shared libraries...
>
> ... but the latter fix feels wrong somehow.

The auth design generated *huge* threads on the dev list, I certainly
gave up attempting to follow the discussion.  I never really
understood why we chose to use a hash to pass parameters.

It's not just the string literals used as hash keys, the hash values
are pointers to data allocated from the pool used to load the dso, so
they will also become invalid once the pool is cleared and the dso
unloaded.  I think we should have a pool cleanup function to remove
the key/value pairs from the hash.

-- 
Philip Martin

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
For additional commands, e-mail: dev-help@subversion.tigris.org

Re: [PATCH] RA sessions should always be loaded from the same pool

Posted by Nix <ni...@esperi.org.uk>.
On Wed, 27 Jul 2005, Greg Hudson murmured woefully:
> On Wed, 2005-07-27 at 17:34 +0100, Nix wrote:
>> The cause of this is that a key in the auth_baton hash has been
>> invalidated and points at a no-longer-valid address. How, you might ask,
>> since all the keys are manifest constants?
> 
> Perhaps we should stop using a manifest constant, then.  We've certainly
> chosen to allocate memory in a passed-in pool rather than use a manifest
> constant in some past situations, for just this reason.
> 
> Can you say more about where these constants are coming from?

They are #defines which expand to string literals, such as
SVN_AUTH_PARAM_CONFIG, SVN_AUTH_PARAM_SERVER_GROUP, &c;
libsvn_ra_dav/session.c:svn_ra_dav__open() adds several of these via
svn_auth_set_parameter(), so there'll always be some in the baton as
long as an RA connection's been opened successfully.  And the pool that
the baton is allocated in may have a longer lifespan than the pool that
the shared object was allocated in.

(I still wonder how this ever works on anyone's machine; perhaps a lot
of OSes tend to put repeatedly dlopen()ed libraries at the same place
every time?)

>> So as a possibly more maintainable alternative, the hack below fixes
>> apr_dso_load() callers to always use tiny dedicated pools which are
>> never deallocated.
> 
> Your patch isn't very thread-safe.

It is thread-safe iff static allocation is thread-safe. Of course, the C
standard says nothing on this point; I can't see anything in POSIX,
either.  Curses, I thought there was something.

We'd have to wrap a mutex around the pool initialization.

>                                     We could use svn_ra_initialize() to
> set up the dedicated pool and a mutex protecting it, but because we
> introduced svn_ra_initialize() in 1.2, we can't require every client to
> call it.

Well, no. If old clients don't, they might crash. New ones should.

(Os isn't that good enough?)

> More importantly, it should be possible to dynamically load the
> Subversion client library and then unload it without leaving big
> footprints behind.  So, we should get our memory management story right,
> rather than applying a big hammer to the DSO system.

I'm not sure if the underlying bug here is that the auth baton is
persisting for too long, or that the DAV library's getting unloaded when
it shouldn't be (why should a `svn ls' unload it only to load it again?)
or that the bug is that we should never use string literals in
dlopen()ed shared libraries...

... but the latter fix feels wrong somehow.

-- 
`Tor employs several thousand editors who they keep in dank
 subterranean editing facilities not unlike Moria' -- James Nicoll 

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
For additional commands, e-mail: dev-help@subversion.tigris.org

Re: [PATCH] RA sessions should always be loaded from the same pool

Posted by Greg Hudson <gh...@MIT.EDU>.
On Wed, 2005-07-27 at 23:27 +0100, Nix wrote:
> > I gave up trying to follow the auth stuff.  However, at around line
> > 720 svn_ra_dav__open calls svn_auth_set_parameter and passes things
> > allocated from its pool and these get stored in the hash passed via
> > the callbacks argument, so it looks like the hash could well have a
> > longer lifetime than the dav session.

Yuck.  Data stored within auth_baton should be allocated in the same
pool as auth_baton, according to our general philosophy, and there's no
elegant way to make that happen when the data being stored is a void
pointer.  We could add a call to retrieve the auth baton's pool,
perhaps.

Failing that, cleaning up the parameters in a pool cleanup function
seems prudent.  Like Philip, though, I'm a little lost as to how the
auth system works.  Why is ra_dav responsible for auth parameters with
names that aren't clearly within an ra_dav namespace?

> > Interesting, what do you see as the advantage of --enable-dso?

Some of our package distributors use it to manage dependencies.  By
moving mod_dav_svn into its own package, for instance, the core
Subversion package doesn't have to depend on httpd.  Similarly for
libsvn_fs_base and Berkeley DB.

> Simply that I use svnserve a lot more than ra_dav, and my svn server
> is so small that keeping the number of shared libs loaded down counts
> (or at least it counted in the days before prelink).

I'm surprised that this would matter.  Loading a shared library doesn't
necessitate actually reading in the whole thing from disk into memory,
although I suppose it does mean resolving from symbols.


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
For additional commands, e-mail: dev-help@subversion.tigris.org

Re: [PATCH] RA sessions should always be loaded from the same pool

Posted by Nix <ni...@esperi.org.uk>.
On Wed, 27 Jul 2005, Philip Martin uttered the following:
> Nix <ni...@esperi.org.uk> writes:
> 
>> On Wed, 27 Jul 2005, Philip Martin announced authoritatively:
>>> The DSO will get unloaded when the pool used to load it is
>>> cleared/destroyed.  Does the parameters hash persist longer than the
>>> DSO?  That doesn't sound right to me, but then the auth stuff has so
>>> many layers I gave up trying to follow it some time ago :-(
>>
>> It feels wrong to me, too, but it appears to outlast the pool used by
>> the RA *loader*, which (from experiment ;) ) may not be the same pool.
> 
> I gave up trying to follow the auth stuff.  However, at around line
> 720 svn_ra_dav__open calls svn_auth_set_parameter and passes things
> allocated from its pool and these get stored in the hash passed via
> the callbacks argument, so it looks like the hash could well have a
> longer lifetime than the dav session.

That's my understanding, too. I seem to remember encountering a comment
saying that it was meant to last as long as the neon connection or
something like that...

Reproducing it is easy, anyway: `svn ls' under --enable-dso :)

>                                        Perhaps a pool cleanup function
> should clear the auth parameters?  I wonder if there are other cases
> where svn_auth_set_parameter does something similar?

None that I could find in the two cases where DSOs are unloaded (in
other cases, it doesn't seem to matter as much).

>>> I don't think --enable-dso gets much testing during development, the
>>> build broke in the run up to 1.2 and it took longer than usual to get
>>> noticed.
>>
>> Well, I never build without it, so if there's blatant breakage I'll
>> let you know :)
> 
> Interesting, what do you see as the advantage of --enable-dso?

Simply that I use svnserve a lot more than ra_dav, and my svn server
is so small that keeping the number of shared libs loaded down counts
(or at least it counted in the days before prelink).

I really should just upgrade that ancient crappy tiny server. But for
now I'll keep building with --enable-dso just so that I can tell when
it breaks like this :)

-- 
`Tor employs several thousand editors who they keep in dank
 subterranean editing facilities not unlike Moria' -- James Nicoll 

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
For additional commands, e-mail: dev-help@subversion.tigris.org

Re: [PATCH] RA sessions should always be loaded from the same pool

Posted by Philip Martin <ph...@codematters.co.uk>.
Nix <ni...@esperi.org.uk> writes:

> On Wed, 27 Jul 2005, Philip Martin announced authoritatively:
>> The DSO will get unloaded when the pool used to load it is
>> cleared/destroyed.  Does the parameters hash persist longer than the
>> DSO?  That doesn't sound right to me, but then the auth stuff has so
>> many layers I gave up trying to follow it some time ago :-(
>
> It feels wrong to me, too, but it appears to outlast the pool used by
> the RA *loader*, which (from experiment ;) ) may not be the same pool.

I gave up trying to follow the auth stuff.  However, at around line
720 svn_ra_dav__open calls svn_auth_set_parameter and passes things
allocated from its pool and these get stored in the hash passed via
the callbacks argument, so it looks like the hash could well have a
longer lifetime than the dav session.  Perhaps a pool cleanup function
should clear the auth parameters?  I wonder if there are other cases
where svn_auth_set_parameter does something similar?

>> I don't think --enable-dso gets much testing during development, the
>> build broke in the run up to 1.2 and it took longer than usual to get
>> noticed.
>
> Well, I never build without it, so if there's blatant breakage I'll
> let you know :)

Interesting, what do you see as the advantage of --enable-dso?

-- 
Philip Martin

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
For additional commands, e-mail: dev-help@subversion.tigris.org

Re: [PATCH] RA sessions should always be loaded from the same pool

Posted by Nix <ni...@esperi.org.uk>.
On Wed, 27 Jul 2005, Philip Martin announced authoritatively:
> Greg Hudson <gh...@MIT.EDU> writes:
> 
>> On Wed, 2005-07-27 at 17:34 +0100, Nix wrote:
>>> The cause of this is that a key in the auth_baton hash has been
>>> invalidated and points at a no-longer-valid address. How, you might ask,
>>> since all the keys are manifest constants?
[...]
>> Can you say more about where these constants are coming from?
> 
> I think Nix is referring to constants like SVN_AUTH_PARAM_CONFIG, each
> DSO most likely has it's own copy.

Indeed; since they're string constants in shared libraries, saving a
precognitive linker or an astonishingly powerful dynamic loader, each
DSO certainly will get its own copy. Which will vanish when that DSO's
unloaded.

> The stack trace is a bit odd, it shows svn_ra_dav__open calling
> apr_hash_set directly, but in the source code svn_ra_dav__open calls
> svn_auth_set_parameter which in turn calls apr_hash_set.

That's not unusual: GCC has inlined the one-line
svn_auth_set_parameter() into its caller.

>> More importantly, it should be possible to dynamically load the
>> Subversion client library and then unload it without leaving big
>> footprints behind.  So, we should get our memory management story right,
>> rather than applying a big hammer to the DSO system.
> 
> The DSO will get unloaded when the pool used to load it is
> cleared/destroyed.  Does the parameters hash persist longer than the
> DSO?  That doesn't sound right to me, but then the auth stuff has so
> many layers I gave up trying to follow it some time ago :-(

It feels wrong to me, too, but it appears to outlast the pool used by
the RA *loader*, which (from experiment ;) ) may not be the same pool.

> I don't think --enable-dso gets much testing during development, the
> build broke in the run up to 1.2 and it took longer than usual to get
> noticed.

Well, I never build without it, so if there's blatant breakage I'll
let you know :)

-- 
`Tor employs several thousand editors who they keep in dank
 subterranean editing facilities not unlike Moria' -- James Nicoll 

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
For additional commands, e-mail: dev-help@subversion.tigris.org

Re: [PATCH] RA sessions should always be loaded from the same pool

Posted by Philip Martin <ph...@codematters.co.uk>.
Greg Hudson <gh...@MIT.EDU> writes:

> On Wed, 2005-07-27 at 17:34 +0100, Nix wrote:
>> The cause of this is that a key in the auth_baton hash has been
>> invalidated and points at a no-longer-valid address. How, you might ask,
>> since all the keys are manifest constants?
>
> Perhaps we should stop using a manifest constant, then.  We've certainly
> chosen to allocate memory in a passed-in pool rather than use a manifest
> constant in some past situations, for just this reason.
>
> Can you say more about where these constants are coming from?

I think Nix is referring to constants like SVN_AUTH_PARAM_CONFIG, each
DSO most likely has it's own copy.

The stack trace is a bit odd, it shows svn_ra_dav__open calling
apr_hash_set directly, but in the source code svn_ra_dav__open calls
svn_auth_set_parameter which in turn calls apr_hash_set.

>> So as a possibly more maintainable alternative, the hack below fixes
>> apr_dso_load() callers to always use tiny dedicated pools which are
>> never deallocated.
>
> Your patch isn't very thread-safe.  We could use svn_ra_initialize() to
> set up the dedicated pool and a mutex protecting it, but because we
> introduced svn_ra_initialize() in 1.2, we can't require every client to
> call it.
>
> More importantly, it should be possible to dynamically load the
> Subversion client library and then unload it without leaving big
> footprints behind.  So, we should get our memory management story right,
> rather than applying a big hammer to the DSO system.

The DSO will get unloaded when the pool used to load it is
cleared/destroyed.  Does the parameters hash persist longer than the
DSO?  That doesn't sound right to me, but then the auth stuff has so
many layers I gave up trying to follow it some time ago :-(

I don't think --enable-dso gets much testing during development, the
build broke in the run up to 1.2 and it took longer than usual to get
noticed.

-- 
Philip Martin

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
For additional commands, e-mail: dev-help@subversion.tigris.org

Re: [PATCH] RA sessions should always be loaded from the same pool (was Re: Crash of svn-1.2.x in svn_ra_open())

Posted by Greg Hudson <gh...@MIT.EDU>.
On Wed, 2005-07-27 at 17:34 +0100, Nix wrote:
> The cause of this is that a key in the auth_baton hash has been
> invalidated and points at a no-longer-valid address. How, you might ask,
> since all the keys are manifest constants?

Perhaps we should stop using a manifest constant, then.  We've certainly
chosen to allocate memory in a passed-in pool rather than use a manifest
constant in some past situations, for just this reason.

Can you say more about where these constants are coming from?

> So as a possibly more maintainable alternative, the hack below fixes
> apr_dso_load() callers to always use tiny dedicated pools which are
> never deallocated.

Your patch isn't very thread-safe.  We could use svn_ra_initialize() to
set up the dedicated pool and a mutex protecting it, but because we
introduced svn_ra_initialize() in 1.2, we can't require every client to
call it.

More importantly, it should be possible to dynamically load the
Subversion client library and then unload it without leaving big
footprints behind.  So, we should get our memory management story right,
rather than applying a big hammer to the DSO system.


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
For additional commands, e-mail: dev-help@subversion.tigris.org

[PATCH] RA sessions should always be loaded from the same pool (was Re: Crash of svn-1.2.x in svn_ra_open())

Posted by Nix <ni...@esperi.org.uk>.
Back on Sun, 24 Jul 2005 I mentioned this apparent bug on the users
list:
> I've seen this as long as I've been using 1.2.x: as you might imagine, a
> crash in this routine is pretty disruptive :( notably, it stops me
> doing remote updates of DAV sources, like, well, subversion's. :)

I've tracked this down, but you might not have read that post (it
garnered no comment whatsoever), so here's some of the same info again,
a little more diagnostics, and a putative fix (I know it fixes the bug,
but not if the bug is fixed the right way).

The immediate symptoms are a crash like this, with multiple versions of
APR and on diverse glibc/linux-2.6 architectures:

> amaterasu 477 /usr/packages/subversion/subversion% gdb --args /usr/bin/.svn ls http://svn.collab.net/repos/svn/tags/1.2.1
[...]
> 0x7024495c in memcmp () from /lib/libc.so.6
> (gdb) bt
> #0  0x7024495c in memcmp () from /lib/libc.so.6
> #1  0x70125968 in find_entry (ht=0x41488, key=0x70615a78, klen=15, val=0x40fa0) at tables/apr_hash.c:260
> #2  0x70125b38 in apr_hash_set (ht=0x41488, key=0x70615a78, klen=-1, val=0x40fa0) at tables/apr_hash.c:338
> #3  0x706102d4 in svn_ra_dav__open (session=0x72ca8, repos_URL=0x72bd8 "http://svn.collab.net/repos/svn/tags/1.2.1", callbacks=0x72c18, callback_baton=0x72c30, config=0x400, pool=0x714b8)
>     at subversion/libsvn_ra_dav/session.c:710
> #4  0x7009d620 in svn_ra_open (session_p=0xeff1cdd8, repos_URL=0x72bd8 "http://svn.collab.net/repos/svn/tags/1.2.1", callbacks=0x72c18, callback_baton=0x72c30, config=0x40eb0,
>     pool=0x714b8) at subversion/libsvn_ra/ra_loader.c:277
> #5  0x7004a03c in svn_client__open_ra_session (ra_session=0xeff1cdd8, base_url=0x72bd8 "http://svn.collab.net/repos/svn/tags/1.2.1", base_dir=0x0, base_access=0x0, commit_items=0x0,
>     use_admin=0, read_only_wc=0, ctx=0x40e88, pool=0x714b8) at subversion/libsvn_client/ra.c:288
> #6  0x7004af10 in svn_client__ra_session_from_path (ra_session_p=0xeff1ceb4, rev_p=0xeff1ceb0, url_p=0xeff1ceac, path_or_url=0x714f0 "http://svn.collab.net/repos/svn/tags/1.2.1",
>     peg_revision_p=0xeff1cfa8, revision=0xeff1d1b0, ctx=0x72c08, pool=0x714b8) at subversion/libsvn_client/ra.c:887
> #7  0x70047f98 in svn_client_ls2 (dirents=0xeff1cf50, path_or_url=0x714f0 "http://svn.collab.net/repos/svn/tags/1.2.1", peg_revision=0xeff1cfa8, revision=0xeff1d1b0, recurse=0,
>     ctx=0x40e88, pool=0x714b8) at subversion/libsvn_client/ls.c:88
> #8  0x000163fc in svn_cl__ls (os=0x40ab8, baton=0xeff1d0e0, pool=0x40a80) at subversion/clients/cmdline/ls-cmd.c:325
> #9  0x00017d2c in main (argc=93184, argv=0x25000) at subversion/clients/cmdline/main.c:1449

The cause of this is that a key in the auth_baton hash has been
invalidated and points at a no-longer-valid address. How, you might ask,
since all the keys are manifest constants? Alas, that's pretty clear if
you stick a breakpoint on apr_hash_sets of this hash table and have a
look at the memory map each time up until the crash:

705e8000-705fe000 r-xp 00000000 fe:03 409821                             /usr/packages.bin/subversion/1.2.1/lib/libsvn_ra_dav-1.so.0.0.0
705fe000-7060e000 ---p 00016000 fe:03 409821                             /usr/packages.bin/subversion/1.2.1/lib/libsvn_ra_dav-1.so.0.0.0
7060e000-70610000 rwxp 00016000 fe:03 409821                             /usr/packages.bin/subversion/1.2.1/lib/libsvn_ra_dav-1.so.0.0.0

705e8000-705fe000 r-xp 00000000 fe:03 409821                             /usr/packages.bin/subversion/1.2.1/lib/libsvn_ra_dav-1.so.0.0.0
705fe000-7060e000 ---p 00016000 fe:03 409821                             /usr/packages.bin/subversion/1.2.1/lib/libsvn_ra_dav-1.so.0.0.0
7060e000-70610000 rwxp 00016000 fe:03 409821                             /usr/packages.bin/subversion/1.2.1/lib/libsvn_ra_dav-1.so.0.0.0

70600000-70616000 r-xp 00000000 fe:03 409821                             /usr/packages.bin/subversion/1.2.1/lib/libsvn_ra_dav-1.so.0.0.0
70616000-70626000 ---p 00016000 fe:03 409821                             /usr/packages.bin/subversion/1.2.1/lib/libsvn_ra_dav-1.so.0.0.0
70626000-70628000 rwxp 00016000 fe:03 409821                             /usr/packages.bin/subversion/1.2.1/lib/libsvn_ra_dav-1.so.0.0.0

Whoops. That's invalidated all the manifest constants that came from the
old shared library quite efficiently, and the next access to that
auth_baton will crash SVN (and does).

A smoking gun from a run with LD_DEBUG=files set:

      2256:     calling init: /usr/lib/libsvn_ra_dav-1.so.0
      2256:
      2256:     opening file=/usr/lib/libsvn_ra_dav-1.so.0 [0]; direct_opencount=1
[...]
      2256:     calling fini: /usr/lib/libsvn_ra_dav-1.so.0 [0]
[... ra_dav's auth_baton is now filled with dangling pointers, but nothing accesses
 them until ...]
      2256:     calling init: /usr/lib/libsvn_ra_dav-1.so.0
      2256:
      2256:     opening file=/usr/lib/libsvn_ra_dav-1.so.0 [0]; direct_opencount=1
[crash almost immediately afterwards]


This is happening because svn_client__open_ra_session() initiates an
apr_dso_load() of libsvn_ra_dav.so, and it is called repeatedly with
*different pools*. The net effect is to load and unload it repeatedly,
and POSIX certainly doesn't guarantee that it gets loaded at the same
address each time. (The address-space randomization work in OpenBSD,
Fedora, and recently the mainline kernel tries to *ensure* that it never
gets loaded at the same address more than once, but it was never
certain to work even under other POSIX-conforming OSes.)


It looks like subversion must be especially careful not to call
svn_ra_open() or svn_fs_open() (or anything else which eventually loads
DSOs) more than once if they use different pools.  (I know that under
POSIX-like OSes, calling apr_dso_load() more than once is harmless: I'm
not sure about Win32, and the APR docs are useless on this point as on
so many others.)

It would be good if APR supported disabling unloading of DSOs entirely:
SVN doesn't need it, and its effects are only damaging. (Personally, I
don't think dlclose() is *ever* a good idea, because of things like
this.)

So as a possibly more maintainable alternative, the hack below fixes
apr_dso_load() callers to always use tiny dedicated pools which are
never deallocated. It works (`svn ls
http://svn.collab.net/repos/svn/tags/1.2.1' no longer crashes, and `make
check' still passes) but it is possibly not the right thing to do. (I'm
a subversion and APR coding neophyte and so don't have the right
instincts in this area yet.)

2005-07-27  Nix  <ni...@esperi.org.uk>

	* libsvn_ra/ra_loader.c (load_ra_module): Allocate a never-freed
        pool for DSO loading.
	* libsvn_fs/fs-loader.c (load_module): Likewise.

diff -dpurN subversion-orig/subversion/libsvn_fs/fs-loader.c subversion/subversion/libsvn_fs/fs-loader.c
--- subversion-orig/subversion/libsvn_fs/fs-loader.c	2005-07-02 14:29:01.000000000 +0100
+++ subversion/subversion/libsvn_fs/fs-loader.c	2005-07-27 15:55:11.000000000 +0100
@@ -87,6 +87,12 @@ load_module (fs_init_func_t *initfunc, c
     const char *libname;
     const char *funcname;
     apr_status_t status;
+    static apr_pool_t *dedicated_dso_pool = NULL;
+
+    if (!dedicated_dso_pool)
+      {
+        dedicated_dso_pool = svn_pool_create (NULL);
+      }
 
     libname = apr_psprintf (pool, "libsvn_fs_%s-%d.so.0",
                             name, SVN_VER_MAJOR);
@@ -94,8 +100,8 @@ load_module (fs_init_func_t *initfunc, c
 
     /* Find/load the specified library.  If we get an error, assume
        the library doesn't exist.  The library will be unloaded when
-       pool is destroyed. */
-    status = apr_dso_load (&dso, libname, pool);
+       SVN is shut down. */
+    status = apr_dso_load (&dso, libname, dedicated_dso_pool);
     if (status)
       return SVN_NO_ERROR;
 
diff -dpurN subversion-orig/subversion/libsvn_ra/ra_loader.c subversion/subversion/libsvn_ra/ra_loader.c
--- subversion-orig/subversion/libsvn_ra/ra_loader.c	2005-07-02 14:28:56.000000000 +0100
+++ subversion/subversion/libsvn_ra/ra_loader.c	2005-07-27 15:55:23.000000000 +0100
@@ -125,6 +125,12 @@ load_ra_module (svn_ra__init_func_t *fun
     const char *funcname;
     const char *compat_funcname;
     apr_status_t status;
+    static apr_pool_t *dedicated_dso_pool = NULL;
+
+    if (!dedicated_dso_pool)
+      {
+        dedicated_dso_pool = svn_pool_create (NULL);
+      }
 
     libname = apr_psprintf (pool, "libsvn_ra_%s-%d.so.0",
                             ra_name, SVN_VER_MAJOR);
@@ -132,7 +138,7 @@ load_ra_module (svn_ra__init_func_t *fun
     compat_funcname = apr_psprintf (pool, "svn_ra_%s_init", ra_name);
 
     /* find/load the specified library */
-    status = apr_dso_load (&dso, libname, pool);
+    status = apr_dso_load (&dso, libname, dedicated_dso_pool);
     if (status)
       {
 #if 0
@@ -145,7 +151,7 @@ load_ra_module (svn_ra__init_func_t *fun
         /* Just ignore the error. Assume the library isn't present */
         return SVN_NO_ERROR;
       }
-    /* note: the library will be unloaded at pool cleanup */
+    /* note: the library will be unloaded when SVN is shut down */
 
     /* find the initialization routines */
     if (func)

-- 
`Tor employs several thousand editors who they keep in dank
 subterranean editing facilities not unlike Moria' -- James Nicoll 

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
For additional commands, e-mail: dev-help@subversion.tigris.org