You are viewing a plain text version of this content. The canonical link for it is here.

Posted to dev@subversion.apache.org by Eric Gillespie <ep...@google.com> on 2008/03/25 21:57:43 UTC

svn_cache review

> Index: subversion/include/svn_cache.h
> ===================================================================
> +/**
> + * Fetches a value indexed by @a key from @a cache into @a *value,
> + * setting @a found to TRUE iff it is in the cache.  The value is
> + * copied into @a pool using the copy function provided to the cache's
> + * constructor.
> + */

Say that you set found to FALSE if not found; this implies that
you just leave found alone.

> Index: subversion/libsvn_subr/cache.c
> ===================================================================
> +struct svn_cache_t {
> +  /* Maps from a key (of size CACHE->KLEN) to a struct cache_entry. */
> +  apr_hash_t *hash;
> +  apr_ssize_t klen;

Maybe it doesn't matter, but why can keys be only one size?
apr_hash_get takes the key size, rather than storing it in the
apr_hash_t itself.  This way you can use a string/length in
addition to a null-terminated string.

> +struct cache_entry {
> +  const void *key;
> +  void *value;

These should both be const.  It's the object pointed to you're
protecting, not the pointer, which you can and do write to.

> +/* Uses CACHE->dup_func to copy VALUE into *VALUE_P inside POOL, or
> +   just sets *VALUE_P to NULL if VALUE is NULL. */
> +static svn_error_t *
> +duplicate_value(void **value_p,
> +                svn_cache_t *cache,
> +                void *value,
> +                apr_pool_t *pool)
> +{
> +  if (value)
> +    SVN_ERR((cache->dup_func)(value_p, value, pool));
> +  else
> +    *value_p = NULL;
> +  return SVN_NO_ERROR;
> +}

You take care to handle NULL values here, but apr_hash can't
handle them anyway, right?  It uses a NULL value to remove a key.

> +/* Copies KEY into *KEY_P inside POOL, using CACHE->KLEN to figure out
> + * how. */
> +static const void *
> +duplicate_key(svn_cache_t *cache,
> +              const void *key,
> +              apr_pool_t *pool)

You don't copy to key_p, but return the copy.

> +svn_error_t *
> +svn_cache_get(void **value_p,
> +              svn_boolean_t *found,
> +              svn_cache_t *cache,
> +              const void *key,
> +              apr_pool_t *pool)
> +{
> +  void *entry_void;

What's the point of this?  entry = apr_hash_get(...) should work.

> +static void
> +erase_page(svn_cache_t *cache,
> +           struct cache_page *page)
> +{
> +  remove_page_from_list(page);
> +  struct cache_entry *e;

Statement before declaration.

> +svn_error_t *
> +svn_cache_set(svn_cache_t *cache,
> +              const void *key,
> +              void *value,
> +              apr_pool_t *pool)
> +{
> +  void *existing_entry;
> +  svn_error_t *err = SVN_NO_ERROR;
> +
> +  SVN_ERR(lock_cache(cache));
> +
> +  existing_entry = apr_hash_get(cache->hash, key, cache->klen);
> +
> +  /* Is it already here, but we can do the one-item-per-page
> +   * optimization? */
> +  if (existing_entry && cache->items_per_page == 1)
> +    {
> +      /* Special case!  ENTRY is the *only* entry on this page, so
> +       * why not wipe it (so as not to leak the previous value).
> +       */
> +      struct cache_entry *entry = existing_entry;
> +      struct cache_page *page = entry->page;
> +
> +      /* This can't be the partial page, items_per_page == NULL

"items_per_page == 1"

Reviewing the changes that use the cache next...

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
For additional commands, e-mail: dev-help@subversion.tigris.org

Re: svn_cache review

Posted by David Glasser <gl...@davidglasser.net>.

r30148, r31049, and r30150 address your concerns from this thread,
except for the const void * one, which I'm looking into now.

--dave

On Tue, Mar 25, 2008 at 5:08 PM, Eric Gillespie <ep...@google.com> wrote:
> > Index: subversion/libsvn_fs_fs/tree.c
>  > ===================================================================
>
>  >  /* Invalidate cache entries for PATH and any of its children. */
>  > -static void
>  > +static svn_error_t *
>  >  dag_node_cache_invalidate(svn_fs_root_t *root,
>  > -                          const char *path)
>  > +                          const char *path,
>  > +                          apr_pool_t *pool)
>  >  {
>  > -  fs_txn_root_data_t *frd;
>  > -  apr_size_t len = strlen(path);
>  > -  const char *key;
>  > -  dag_node_cache_t *item;
>  > +  struct fdic_baton b;
>  > +  svn_cache_t *cache;
>  > +  int i;
>  >
>  > +  b.path = path;
>  > +  b.pool = svn_pool_create(pool);
>  > +  b.list = apr_array_make(b.pool, 1, sizeof(const char *));
>  > +
>  >    assert(root->is_txn_root);
>  > +  locate_cache(&cache, NULL, root, NULL, b.pool);
>  >
>  > -  frd = root->fsap_data;
>  >
>  > -  for (item = frd->txn_node_list.next;
>  > -       item != &frd->txn_node_list;
>  > -       item = item->next)
>  > +  SVN_ERR(svn_cache_iter(NULL, cache, find_descendents_in_cache,
>  > +                         &b, b.pool));
>  > +
>  > +  for (i = 0; i < b.list->nelts; i++)
>  >      {
>  > -      key = item->key;
>  > -      if (strncmp(key, path, len) == 0 && (key[len] == '/' || !key[len]))
>  > -        item->node = NULL;
>  > +      const char *descendent = APR_ARRAY_IDX(b.list, i, const char *);
>  > +      SVN_ERR(svn_cache_set(cache, descendent, NULL, b.pool));
>
>  Use an iterpool for this loop?
>
>  > +make_txn_root(svn_fs_root_t **root_p,
>  > +              svn_fs_t *fs,
>  >                const char *txn,
>  >                svn_revnum_t base_rev,
>  >                apr_uint32_t flags,
>  > @@ -3801,11 +3775,14 @@
>  >    root->txn_flags = flags;
>  >    root->rev = base_rev;
>  >
>  > -  frd->txn_node_cache = apr_hash_make(root->pool);
>  > -  frd->txn_node_list.prev = &frd->txn_node_list;
>  > -  frd->txn_node_list.next = &frd->txn_node_list;
>  > +  /* Because this cache actually tries to invalidate elements, keep
>  > +     the number of elements per page down. */
>  > +  SVN_ERR(svn_cache_create(&(frd->txn_node_cache),
>  > +                           svn_fs_fs__dag_dup_for_cache, APR_HASH_KEY_STRING,
>  > +                           32, 20, FALSE, root->pool));
>
>  We don't have to go there right now, but it seems unlikely to me
>  that one cache size fits all; should we allow admins to tune them?
>
>  > Index: subversion/libsvn_fs_fs/fs.c
>  > ===================================================================
>
>  > @@ -114,6 +122,50 @@
>  >
>  >    ffd->shared = ffsd;
>  >
>  > +  /* Now do it all again for the caches; this key contains the path. */
>  > +  key = apr_pstrcat(pool, SVN_FSFS_SHARED_CACHES_USERDATA_PREFIX, ffd->uuid,
>  > +                    "/", fs->path, (char *) NULL);
>  > +  status = apr_pool_userdata_get(&val, key, common_pool);
>  > +  if (status)
>  > +    return svn_error_wrap_apr(status, _("Can't fetch FSFS shared caches data"));
>  > +  ffsc = val;
>  > +
>  > +  if (!ffsc)
>  > +    {
>  > +      ffsc = apr_pcalloc(common_pool, sizeof(*ffsc));
>  > +
>  > +      /* Make the cache for revision roots.  For the vast majority of
>  > +       * commands, this is only going to contain a few entries (svnadmin
>  > +       * dump/verify is an exception here), so to reduce overhead let's
>  > +       * try to keep it to just one page.  I estimate each entry has about
>  > +       * 72 bytes of overhead (svn_revnum_t key, svn_fs_id_t +
>  > +       * id_private_t + 3 strings for value, and the cache_entry); the
>  > +       * default pool size is 8192, so about a hundred should fit
>  > +       * comfortably. */
>  > +      SVN_ERR(svn_cache_create(&(ffsc->rev_root_id_cache),
>  > +                               dup_ids, sizeof(svn_revnum_t),
>  > +                               1, 100, TRUE, common_pool));
>  > +
>  > +      /* Rough estimate: revision DAG nodes have size around 320 bytes, so
>  > +       * let's put 16 on a page. */
>  > +      SVN_ERR(svn_cache_create(&(ffsc->rev_node_cache),
>  > +                               svn_fs_fs__dag_dup_for_cache,
>  > +                               APR_HASH_KEY_STRING,
>  > +                               1024, 16, TRUE, common_pool));
>  > +
>  > +      /* Very rough estimate: 1K per directory. */
>  > +      SVN_ERR(svn_cache_create(&(ffsc->dir_cache),
>  > +                               dup_dir_listing, APR_HASH_KEY_STRING,
>  > +                               1024, 8, TRUE, common_pool));
>
>  As we discussed in person, allocating these things in a
>  process-lifetime pool (common_pool in fs-loader.c) is probably
>  not a problem, but worth documenting.
>
>  > +/* We have two different "shared between all svn_fs_t objects for the
>  > +   same filesystem" structures, allocated in the common pool.  Why two?
>  [...]
>
>  Thanks for all the great comments; they really helped.
>
>  I had a lot less to say on this round, so I may have missed
>  potential problems if I didn't sufficiently understand some
>  surrounding context of existing code.  It all looked right, but
>  it would be nice to have another person as familiar with fsfs as
>  you to review this, too.
>



-- 
David Glasser | glasser@davidglasser.net | http://www.davidglasser.net/

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
For additional commands, e-mail: dev-help@subversion.tigris.org

Re: svn_cache review

Posted by Eric Gillespie <ep...@google.com>.

> Index: subversion/libsvn_fs_fs/tree.c
> ===================================================================

>  /* Invalidate cache entries for PATH and any of its children. */
> -static void
> +static svn_error_t *
>  dag_node_cache_invalidate(svn_fs_root_t *root,
> -                          const char *path)
> +                          const char *path,
> +                          apr_pool_t *pool)
>  {
> -  fs_txn_root_data_t *frd;
> -  apr_size_t len = strlen(path);
> -  const char *key;
> -  dag_node_cache_t *item;
> +  struct fdic_baton b;
> +  svn_cache_t *cache;
> +  int i;
>  
> +  b.path = path;
> +  b.pool = svn_pool_create(pool);
> +  b.list = apr_array_make(b.pool, 1, sizeof(const char *));
> +
>    assert(root->is_txn_root);
> +  locate_cache(&cache, NULL, root, NULL, b.pool);
>  
> -  frd = root->fsap_data;
>  
> -  for (item = frd->txn_node_list.next;
> -       item != &frd->txn_node_list;
> -       item = item->next)
> +  SVN_ERR(svn_cache_iter(NULL, cache, find_descendents_in_cache,
> +                         &b, b.pool));
> +
> +  for (i = 0; i < b.list->nelts; i++)
>      {
> -      key = item->key;
> -      if (strncmp(key, path, len) == 0 && (key[len] == '/' || !key[len]))
> -        item->node = NULL;
> +      const char *descendent = APR_ARRAY_IDX(b.list, i, const char *);
> +      SVN_ERR(svn_cache_set(cache, descendent, NULL, b.pool));

Use an iterpool for this loop?

> +make_txn_root(svn_fs_root_t **root_p,
> +              svn_fs_t *fs,
>                const char *txn,
>                svn_revnum_t base_rev,
>                apr_uint32_t flags,
> @@ -3801,11 +3775,14 @@
>    root->txn_flags = flags;
>    root->rev = base_rev;
>  
> -  frd->txn_node_cache = apr_hash_make(root->pool);
> -  frd->txn_node_list.prev = &frd->txn_node_list;
> -  frd->txn_node_list.next = &frd->txn_node_list;
> +  /* Because this cache actually tries to invalidate elements, keep
> +     the number of elements per page down. */
> +  SVN_ERR(svn_cache_create(&(frd->txn_node_cache),
> +                           svn_fs_fs__dag_dup_for_cache, APR_HASH_KEY_STRING,
> +                           32, 20, FALSE, root->pool));

We don't have to go there right now, but it seems unlikely to me
that one cache size fits all; should we allow admins to tune them?

> Index: subversion/libsvn_fs_fs/fs.c
> ===================================================================

> @@ -114,6 +122,50 @@
>  
>    ffd->shared = ffsd;
>  
> +  /* Now do it all again for the caches; this key contains the path. */
> +  key = apr_pstrcat(pool, SVN_FSFS_SHARED_CACHES_USERDATA_PREFIX, ffd->uuid,
> +                    "/", fs->path, (char *) NULL);
> +  status = apr_pool_userdata_get(&val, key, common_pool);
> +  if (status)
> +    return svn_error_wrap_apr(status, _("Can't fetch FSFS shared caches data"));
> +  ffsc = val;
> +
> +  if (!ffsc)
> +    {
> +      ffsc = apr_pcalloc(common_pool, sizeof(*ffsc));
> +
> +      /* Make the cache for revision roots.  For the vast majority of
> +       * commands, this is only going to contain a few entries (svnadmin
> +       * dump/verify is an exception here), so to reduce overhead let's
> +       * try to keep it to just one page.  I estimate each entry has about
> +       * 72 bytes of overhead (svn_revnum_t key, svn_fs_id_t +
> +       * id_private_t + 3 strings for value, and the cache_entry); the
> +       * default pool size is 8192, so about a hundred should fit
> +       * comfortably. */
> +      SVN_ERR(svn_cache_create(&(ffsc->rev_root_id_cache),
> +                               dup_ids, sizeof(svn_revnum_t),
> +                               1, 100, TRUE, common_pool));
> +
> +      /* Rough estimate: revision DAG nodes have size around 320 bytes, so
> +       * let's put 16 on a page. */
> +      SVN_ERR(svn_cache_create(&(ffsc->rev_node_cache),
> +                               svn_fs_fs__dag_dup_for_cache,
> +                               APR_HASH_KEY_STRING,
> +                               1024, 16, TRUE, common_pool));
> +
> +      /* Very rough estimate: 1K per directory. */
> +      SVN_ERR(svn_cache_create(&(ffsc->dir_cache),
> +                               dup_dir_listing, APR_HASH_KEY_STRING,
> +                               1024, 8, TRUE, common_pool));

As we discussed in person, allocating these things in a
process-lifetime pool (common_pool in fs-loader.c) is probably
not a problem, but worth documenting.

> +/* We have two different "shared between all svn_fs_t objects for the
> +   same filesystem" structures, allocated in the common pool.  Why two?
[...]

Thanks for all the great comments; they really helped.

I had a lot less to say on this round, so I may have missed
potential problems if I didn't sufficiently understand some
surrounding context of existing code.  It all looked right, but
it would be nice to have another person as familiar with fsfs as
you to review this, too.

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
For additional commands, e-mail: dev-help@subversion.tigris.org

Re: svn_cache review

Posted by Eric Gillespie <ep...@google.com>.

"David Glasser" <gl...@davidglasser.net> writes:

> >  > Index: subversion/libsvn_subr/cache.c
> >  > ===================================================================
> >  > +struct svn_cache_t {
> >  > +  /* Maps from a key (of size CACHE->KLEN) to a struct cache_entry. */
> >  > +  apr_hash_t *hash;
> >  > +  apr_ssize_t klen;
> >
> >  Maybe it doesn't matter, but why can keys be only one size?
> >  apr_hash_get takes the key size, rather than storing it in the
> >  apr_hash_t itself.  This way you can use a string/length in
> >  addition to a null-terminated string.
> 
> Can you give me an example of any hash in the Subversion codebase that
> doesn't always use the same klen argument for every call to
> apr_hash_get and apr_hash_set?

Nope, I have no examples.  Just wanted to see what you thought.

> >  > +/* Uses CACHE->dup_func to copy VALUE into *VALUE_P inside POOL, or
> >  > +   just sets *VALUE_P to NULL if VALUE is NULL. */
> >  > +static svn_error_t *
> >  > +duplicate_value(void **value_p,
> >  > +                svn_cache_t *cache,
> >  > +                void *value,
> >  > +                apr_pool_t *pool)
> >  > +{
> >  > +  if (value)
> >  > +    SVN_ERR((cache->dup_func)(value_p, value, pool));
> >  > +  else
> >  > +    *value_p = NULL;
> >  > +  return SVN_NO_ERROR;
> >  > +}
> >
> >  You take care to handle NULL values here, but apr_hash can't
> >  handle them anyway, right?  It uses a NULL value to remove a key.
> 
> Right, but the values in the hash are cache_entry structs, whose
> 'value' member can be NULL.  I've often found myself frustrated that
> APR hashes can't store NULL values; I didn't see any reason to
> continue that in this API.

Oh, of course; I see now.

Looks very nice!

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
For additional commands, e-mail: dev-help@subversion.tigris.org

Re: svn_cache review

Posted by David Glasser <gl...@davidglasser.net>.

On Tue, Mar 25, 2008 at 2:57 PM, Eric Gillespie <ep...@google.com> wrote:
> > Index: subversion/include/svn_cache.h
>  > ===================================================================
>  > +/**
>  > + * Fetches a value indexed by @a key from @a cache into @a *value,
>  > + * setting @a found to TRUE iff it is in the cache.  The value is
>  > + * copied into @a pool using the copy function provided to the cache's
>  > + * constructor.
>  > + */
>
>  Say that you set found to FALSE if not found; this implies that
>  you just leave found alone.

Right.

>  > Index: subversion/libsvn_subr/cache.c
>  > ===================================================================
>  > +struct svn_cache_t {
>  > +  /* Maps from a key (of size CACHE->KLEN) to a struct cache_entry. */
>  > +  apr_hash_t *hash;
>  > +  apr_ssize_t klen;
>
>  Maybe it doesn't matter, but why can keys be only one size?
>  apr_hash_get takes the key size, rather than storing it in the
>  apr_hash_t itself.  This way you can use a string/length in
>  addition to a null-terminated string.

Can you give me an example of any hash in the Subversion codebase that
doesn't always use the same klen argument for every call to
apr_hash_get and apr_hash_set?

>  > +struct cache_entry {
>  > +  const void *key;
>  > +  void *value;
>
>  These should both be const.  It's the object pointed to you're
>  protecting, not the pointer, which you can and do write to.

That makes sense.  I think I did that because of how apr_hash_this has
"const void **key" and "void **val" (ie, in my head somehow I was
thinking that non-const values made sense in APR), but this isn't even
relevant at all.

>  > +/* Uses CACHE->dup_func to copy VALUE into *VALUE_P inside POOL, or
>  > +   just sets *VALUE_P to NULL if VALUE is NULL. */
>  > +static svn_error_t *
>  > +duplicate_value(void **value_p,
>  > +                svn_cache_t *cache,
>  > +                void *value,
>  > +                apr_pool_t *pool)
>  > +{
>  > +  if (value)
>  > +    SVN_ERR((cache->dup_func)(value_p, value, pool));
>  > +  else
>  > +    *value_p = NULL;
>  > +  return SVN_NO_ERROR;
>  > +}
>
>  You take care to handle NULL values here, but apr_hash can't
>  handle them anyway, right?  It uses a NULL value to remove a key.

Right, but the values in the hash are cache_entry structs, whose
'value' member can be NULL.  I've often found myself frustrated that
APR hashes can't store NULL values; I didn't see any reason to
continue that in this API.

>  > +/* Copies KEY into *KEY_P inside POOL, using CACHE->KLEN to figure out
>  > + * how. */
>  > +static const void *
>  > +duplicate_key(svn_cache_t *cache,
>  > +              const void *key,
>  > +              apr_pool_t *pool)
>
>  You don't copy to key_p, but return the copy.

Oops.

>  > +svn_error_t *
>  > +svn_cache_get(void **value_p,
>  > +              svn_boolean_t *found,
>  > +              svn_cache_t *cache,
>  > +              const void *key,
>  > +              apr_pool_t *pool)
>  > +{
>  > +  void *entry_void;
>
>  What's the point of this?  entry = apr_hash_get(...) should work.

Er.  No point.

>  > +static void
>  > +erase_page(svn_cache_t *cache,
>  > +           struct cache_page *page)
>  > +{
>  > +  remove_page_from_list(page);
>  > +  struct cache_entry *e;
>
>  Statement before declaration.

Gah!  Thanks.

>  > +svn_error_t *
>  > +svn_cache_set(svn_cache_t *cache,
>  > +              const void *key,
>  > +              void *value,
>  > +              apr_pool_t *pool)
>  > +{
>  > +  void *existing_entry;
>  > +  svn_error_t *err = SVN_NO_ERROR;
>  > +
>  > +  SVN_ERR(lock_cache(cache));
>  > +
>  > +  existing_entry = apr_hash_get(cache->hash, key, cache->klen);
>  > +
>  > +  /* Is it already here, but we can do the one-item-per-page
>  > +   * optimization? */
>  > +  if (existing_entry && cache->items_per_page == 1)
>  > +    {
>  > +      /* Special case!  ENTRY is the *only* entry on this page, so
>  > +       * why not wipe it (so as not to leak the previous value).
>  > +       */
>  > +      struct cache_entry *entry = existing_entry;
>  > +      struct cache_page *page = entry->page;
>  > +
>  > +      /* This can't be the partial page, items_per_page == NULL
>
>  "items_per_page == 1"

Whoops.

--dave


-- 
David Glasser | glasser@davidglasser.net | http://www.davidglasser.net/

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
For additional commands, e-mail: dev-help@subversion.tigris.org