You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@subversion.apache.org by Stefan Fuhrmann <st...@wandisco.com> on 2013/06/06 12:02:39 UTC

Re: svn commit: r1477540 - in /subversion/trunk: ./ subversion/libsvn_subr/cache-membuffer.c

Hi Joe,

Thanks for the patch. Committed as r1490221.

-- Stefan^2.


On Tue, May 14, 2013 at 12:51 AM, Joe Swatosh <jo...@gmail.com> wrote:

> Small (possible?) compatibility improvements: suggest APR_UINT64_C()
> instead of ull suffix in subversion\libsvn_subr\cache-membuffer.c.
>
> --
> Joe
>
> On Tue, Apr 30, 2013 at 3:34 AM,  <st...@apache.org> wrote:
> > Author: stefan2
> > Date: Tue Apr 30 10:34:26 2013
> > New Revision: 1477540
> >
> > URL: http://svn.apache.org/r1477540
> > Log:
> > Cherry-pick merge r458643 from branches/cache-server to /trunk
> > (this accidentally didn't get included into the r1476664 merge).
> >
> > Not intended for 1.8 back-port.
> >
> > Modified:
> >     subversion/trunk/   (props changed)
> >     subversion/trunk/subversion/libsvn_subr/cache-membuffer.c
> >
> > Propchange: subversion/trunk/
> >
> ------------------------------------------------------------------------------
> >   Merged /subversion/branches/cache-server:r1458643
> >
> > Modified: subversion/trunk/subversion/libsvn_subr/cache-membuffer.c
> > URL:
> http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_subr/cache-membuffer.c?rev=1477540&r1=1477539&r2=1477540&view=diff
> >
> ==============================================================================
> > --- subversion/trunk/subversion/libsvn_subr/cache-membuffer.c (original)
> > +++ subversion/trunk/subversion/libsvn_subr/cache-membuffer.c Tue Apr 30
> 10:34:26 2013
> > @@ -98,7 +98,7 @@
> >   * comments in ensure_data_insertable_l2().
> >   *
> >   * To limit the entry size and management overhead, not the actual item
> keys
> > - * but only their MD5 checksums will not be stored. This is reasonably
> safe
> > + * but only their MD5-based hashes will be stored. This is reasonably
> safe
> >   * to do since users have only limited control over the full keys, even
> if
> >   * these contain folder paths. So, it is very hard to deliberately
> construct
> >   * colliding keys. Random checksum collisions can be shown to be
> extremely
> > @@ -884,9 +884,12 @@ get_group_index(svn_membuffer_t **cache,
> >  {
> >    svn_membuffer_t *segment0 = *cache;
> >
> > -  /* select the cache segment to use. they have all the same
> group_count */
> > -  *cache = &segment0[key[0] & (segment0->segment_count -1)];
> > -  return key[1] % segment0->group_count;
> > +  /* select the cache segment to use. they have all the same
> group_count.
> > +   * Since key may not be well-distributed, pre-fold it to a smaller but
> > +   * "denser" ranger.  The divisors are primes larger than the largest
> > +   * counts. */
> > +  *cache = &segment0[(key[1] % 2809637ull) & (segment0->segment_count -
> 1)];
> > +  return (key[0] % 5030895599ull) % segment0->group_count;
>
> Perhaps:
>
> +  *cache = &segment0[(key[1] % APR_UINT64_C(2809637)) &
> (segment0->segment_count - 1)];
> +  return (key[0] % APR_UINT64_C(5030895599)) % segment0->group_count;
>
> ?
>
> >  }
> >
> >  /* Reduce the hit count of ENTRY and update the accumulated hit info
> > @@ -1007,6 +1010,8 @@ find_entry(svn_membuffer_t *cache,
> >         */
> >        if (group->used == GROUP_SIZE)
> >          {
> > +          static int count = 0;
> > +
> >            /* every entry gets the same chance of being removed.
> >             * Otherwise, we free the first entry, fill it and
> >             * remove it again on the next occasion without considering
> > @@ -1026,6 +1031,7 @@ find_entry(svn_membuffer_t *cache,
> >                let_entry_age(cache, entry);
> >
> >            drop_entry(cache, entry);
> > +          printf("%d\n", ++count);
> >          }
> >
> >        /* initialize entry for the new key
> > @@ -2042,6 +2048,22 @@ membuffer_cache_set_partial(svn_membuffe
> >   * svn_cache__t instance.
> >   */
> >
> > +/* Stores the combined key value for the given key.  It will be used by
> > + * combine_key() to short-circuit expensive hash calculations.
> > + */
> > +typedef struct last_access_key_t
> > +{
> > +  /* result of key combining */
> > +  entry_key_t combined_key;
> > +
> > +  /* length of the key (or APR_HASH_KEY_STRING if not used) */
> > +  apr_size_t key_len;
> > +
> > +  /* the original key.  Only KEY_LEN bytes are valid.  We use uint32 for
> > +   * better compatibility with pseudo-md5 functions. */
> > +  apr_uint32_t key[64];
> > +} last_access_key_t;
> > +
> >  /* Internal cache structure (used in svn_cache__t.cache_internal)
> basically
> >   * holding the additional parameters needed to call the respective
> membuffer
> >   * functions.
> > @@ -2089,6 +2111,11 @@ typedef struct svn_membuffer_cache_t
> >     */
> >    int alloc_counter;
> >
> > +  /* cache for the last key used.
> > +   * Will be NULL for caches with short fix-sized keys.
> > +   */
> > +  last_access_key_t *last_access;
> > +
> >    /* if enabled, this will serialize the access to this instance.
> >     */
> >    svn_mutex__t *mutex;
> > @@ -2106,46 +2133,127 @@ typedef struct svn_membuffer_cache_t
> >   */
> >  #define ALLOCATIONS_PER_POOL_CLEAR 10
> >
> > -
> >  /* Basically calculate a hash value for KEY of length KEY_LEN, combine
> it
> >   * with the CACHE->PREFIX and write the result in CACHE->COMBINED_KEY.
> > + * This could replace combine_key() entirely but we actually use it only
> > + * when the quick path failed.
> >   */
> >  static void
> > -combine_key(svn_membuffer_cache_t *cache,
> > -            const void *key,
> > -            apr_ssize_t key_len)
> > +combine_long_key(svn_membuffer_cache_t *cache,
> > +                 const void *key,
> > +                 apr_ssize_t key_len)
> >  {
> > +  assert(cache->last_access);
> > +
> > +  /* handle variable-length keys */
> >    if (key_len == APR_HASH_KEY_STRING)
> >      key_len = strlen((const char *) key);
> >
> > -  if (key_len < 16)
> > +  /* same key as the last time? -> short-circuit */
> > +  if (   key_len == cache->last_access->key_len
> > +      && memcmp(key, cache->last_access->key, key_len) == 0)
> >      {
> > -      apr_uint32_t data[4] = { 0 };
> > -      memcpy(data, key, key_len);
> > +      memcpy(cache->combined_key, cache->last_access->combined_key,
> > +             sizeof(cache->combined_key));
> > +    }
> > +  else if (key_len >= 64)
> > +    {
> > +      /* relatively long key.  Use the generic, slow hash code for it */
> > +      apr_md5((unsigned char*)cache->combined_key, key, key_len);
> > +      cache->combined_key[0] ^= cache->prefix[0];
> > +      cache->combined_key[1] ^= cache->prefix[1];
> >
> > -      svn__pseudo_md5_15((apr_uint32_t *)cache->combined_key, data);
> > +      /* is the key short enough to cache the result? */
> > +      if (key_len <= sizeof(cache->last_access->key))
> > +        {
> > +          memcpy(cache->last_access->combined_key, cache->combined_key,
> > +                 sizeof(cache->combined_key));
> > +          cache->last_access->key_len = key_len;
> > +          memcpy(cache->last_access->key, key, key_len);
> > +        }
> >      }
> > -  else if (key_len < 32)
> > +  else
> >      {
> > -      apr_uint32_t data[8] = { 0 };
> > -      memcpy(data, key, key_len);
> > +      /* shorter keys use efficient hash code and *do* cache the
> results */
> > +      cache->last_access->key_len = key_len;
> > +      if (key_len < 16)
> > +        {
> > +          memset(cache->last_access->key, 0, 16);
> > +          memcpy(cache->last_access->key, key, key_len);
> > +
> > +          svn__pseudo_md5_15((apr_uint32_t *)cache->combined_key,
> > +                             cache->last_access->key);
> > +        }
> > +      else if (key_len < 32)
> > +        {
> > +          memset(cache->last_access->key, 0, 32);
> > +          memcpy(cache->last_access->key, key, key_len);
> > +
> > +          svn__pseudo_md5_31((apr_uint32_t *)cache->combined_key,
> > +                             cache->last_access->key);
> > +        }
> > +      else
> > +        {
> > +          memset(cache->last_access->key, 0, 64);
> > +          memcpy(cache->last_access->key, key, key_len);
> > +
> > +          svn__pseudo_md5_63((apr_uint32_t *)cache->combined_key,
> > +                             cache->last_access->key);
> > +        }
> > +
> > +      cache->combined_key[0] ^= cache->prefix[0];
> > +      cache->combined_key[1] ^= cache->prefix[1];
> > +
> > +      memcpy(cache->last_access->combined_key, cache->combined_key,
> > +             sizeof(cache->combined_key));
> > +    }
> > +}
> > +
> > +/* Basically calculate a hash value for KEY of length KEY_LEN, combine
> it
> > + * with the CACHE->PREFIX and write the result in CACHE->COMBINED_KEY.
> > + */
> > +static void
> > +combine_key(svn_membuffer_cache_t *cache,
> > +            const void *key,
> > +            apr_ssize_t key_len)
> > +{
> > +  /* copy of *key, padded with 0 */
> > +  apr_uint64_t data[2];
> >
> > -      svn__pseudo_md5_31((apr_uint32_t *)cache->combined_key, data);
> > +  /* short, fixed-size keys are the most common case */
> > +  if (key_len == 16)
> > +    {
> > +      data[0] = ((const apr_uint64_t *)key)[0];
> > +      data[1] = ((const apr_uint64_t *)key)[1];
> >      }
> > -  else if (key_len < 64)
> > +  else if (key_len == 8)
> >      {
> > -      apr_uint32_t data[16] = { 0 };
> > +      data[0] = ((const apr_uint64_t *)key)[0];
> > +      data[1] = 0;
> > +    }
> > +  else if (key_len != APR_HASH_KEY_STRING && key_len < 16)
> > +    {
> > +      data[0] = 0;
> > +      data[1] = 0;
> >        memcpy(data, key, key_len);
> > -
> > -      svn__pseudo_md5_63((apr_uint32_t *)cache->combined_key, data);
> >      }
> >    else
> >      {
> > -      apr_md5((unsigned char*)cache->combined_key, key, key_len);
> > +      /* longer or variably sized keys */
> > +      combine_long_key(cache, key, key_len);
> > +      return;
> >      }
> >
> > -  cache->combined_key[0] ^= cache->prefix[0];
> > -  cache->combined_key[1] ^= cache->prefix[1];
> > +  /* scramble key DATA.  All of this must be reversible to prevent key
> > +   * collisions.  So, we limit ourselves to xor and permutations. */
> > +  data[1] = (data[1] << 27) | (data[1] >> 37);
> > +  data[1] ^= data[0] & 0xffff;
> > +  data[0] ^= data[1] & 0xffffffffffff0000ull;
>
> Likewise:
>
> +  data[0] ^= data[1] & APR_UINT64_C(0xffffffffffff0000);
>
> ?
>
> > +  data[0] = (data[0] << 43) | (data[0] >> 21);
> > +
> > +  /* combine with this cache's namespace */
> > +  cache->combined_key[0] = data[0] ^ cache->prefix[0];
> > +  cache->combined_key[1] = data[1] ^ cache->prefix[1];
> >  }
> >
> >  /* Implement svn_cache__vtable_t.get (not thread-safe)
> > @@ -2560,6 +2668,18 @@ svn_cache__create_membuffer_cache(svn_ca
> >                         pool));
> >    memcpy(cache->prefix, checksum->digest, sizeof(cache->prefix));
> >
> > +  /* fix-length keys of 16 bytes or under don't need a buffer because we
> > +   * can use a very fast key combining algorithm. */
> > +  if ((klen == APR_HASH_KEY_STRING) ||  klen > sizeof(entry_key_t))
> > +    {
> > +      cache->last_access = apr_pcalloc(pool,
> sizeof(*cache->last_access));
> > +      cache->last_access->key_len = APR_HASH_KEY_STRING;
> > +    }
> > +  else
> > +    {
> > +      cache->last_access = NULL;
> > +    }
> > +
> >  #ifdef SVN_DEBUG_CACHE_MEMBUFFER
> >
> >    /* Initialize cache debugging support.
> >
> >
>



-- 
*Join one of our free daily demo sessions on* *Scaling Subversion for the
Enterprise <http://www.wandisco.com/training/webinars>*
*

*

Re: svn commit: r1477540 - in /subversion/trunk: ./ subversion/libsvn_subr/cache-membuffer.c

Posted by Joe Swatosh <jo...@gmail.com>.
On Thu, Jun 6, 2013 at 3:02 AM, Stefan Fuhrmann
<st...@wandisco.com> wrote:
> Hi Joe,
>
> Thanks for the patch. Committed as r1490221.
>
> -- Stefan^2.
>
>
> On Tue, May 14, 2013 at 12:51 AM, Joe Swatosh <jo...@gmail.com> wrote:
>>

Cool, Thanks!

Re: svn commit: r1477540 - in /subversion/trunk: ./ subversion/libsvn_subr/cache-membuffer.c

Posted by Joe Swatosh <jo...@gmail.com>.
On Thu, Jun 6, 2013 at 3:02 AM, Stefan Fuhrmann
<st...@wandisco.com> wrote:
> Hi Joe,
>
> Thanks for the patch. Committed as r1490221.
>
> -- Stefan^2.
>
>
> On Tue, May 14, 2013 at 12:51 AM, Joe Swatosh <jo...@gmail.com> wrote:
>>

Cool, Thanks!