You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by st...@apache.org on 2013/03/20 06:39:58 UTC
svn commit: r1458643 -
/subversion/branches/cache-server/subversion/libsvn_subr/cache-membuffer.c
Author: stefan2
Date: Wed Mar 20 05:39:58 2013
New Revision: 1458643
URL: http://svn.apache.org/r1458643
Log:
On the cache-server branch: Eliminate 70+% of the cache access overhead
caused by calculating expensive hash sums.
We use two approaches here. First, cache the combined_key (i.e. hash)
values from the last access and skip the calculation if the same key is
being used again. This is very effective because we often probe a cache
with a specific key and then set a value for that key.
Secondly, we leverage the fact that most keys in fsfs-format7 code will
be fixed-size 16 byte keys (80+% up from ~50%). Simply combining those
with the respective cache prefix will already produce a unique key. We
add some scrambling code to distribute their values somewhat more evenly
over the key space.
* subversion/libsvn_subr/cache-membuffer.c
(): update / correct global doc
(get_group_index): compensate for the worsened key value distribution
(last_access_key_t): new data structure
(svn_membuffer_cache_t): add cache for the last combine_key() result
(combine_long_key): split from combine_key(); use cached values
(combine_key): split; add fast code for short fixed-size keys
(svn_cache__create_membuffer_cache): update
Modified:
subversion/branches/cache-server/subversion/libsvn_subr/cache-membuffer.c
Modified: subversion/branches/cache-server/subversion/libsvn_subr/cache-membuffer.c
URL: http://svn.apache.org/viewvc/subversion/branches/cache-server/subversion/libsvn_subr/cache-membuffer.c?rev=1458643&r1=1458642&r2=1458643&view=diff
==============================================================================
--- subversion/branches/cache-server/subversion/libsvn_subr/cache-membuffer.c (original)
+++ subversion/branches/cache-server/subversion/libsvn_subr/cache-membuffer.c Wed Mar 20 05:39:58 2013
@@ -89,7 +89,7 @@
* comments in ensure_data_insertable().
*
* To limit the entry size and management overhead, not the actual item keys
- * but only their MD5 checksums will not be stored. This is reasonably safe
+ * but only their MD5-based hashes will be stored. This is reasonably safe
* to do since users have only limited control over the full keys, even if
* these contain folder paths. So, it is very hard to deliberately construct
* colliding keys. Random checksum collisions can be shown to be extremely
@@ -789,9 +789,12 @@ get_group_index(svn_membuffer_t **cache,
{
svn_membuffer_t *segment0 = *cache;
- /* select the cache segment to use. they have all the same group_count */
- *cache = &segment0[key[0] & (segment0->segment_count -1)];
- return key[1] % segment0->group_count;
+ /* select the cache segment to use. they have all the same group_count.
+ * Since key may not be well-distributed, pre-fold it to a smaller but
+ * "denser" ranger. The divisors are primes larger than the largest
+ * counts. */
+ *cache = &segment0[(key[1] % 2809637ull) & (segment0->segment_count - 1)];
+ return (key[0] % 5030895599ull) % segment0->group_count;
}
/* Reduce the hit count of ENTRY and update the accumulated hit info
@@ -912,6 +915,8 @@ find_entry(svn_membuffer_t *cache,
*/
if (group->used == GROUP_SIZE)
{
+ static int count = 0;
+
/* every entry gets the same chance of being removed.
* Otherwise, we free the first entry, fill it and
* remove it again on the next occasion without considering
@@ -931,6 +936,7 @@ find_entry(svn_membuffer_t *cache,
let_entry_age(cache, entry);
drop_entry(cache, entry);
+ printf("%d\n", ++count);
}
/* initialize entry for the new key
@@ -1826,6 +1832,22 @@ membuffer_cache_set_partial(svn_membuffe
* svn_cache__t instance.
*/
+/* Stores the combined key value for the given key. It will be used by
+ * combine_key() to short-circuit expensive hash calculations.
+ */
+typedef struct last_access_key_t
+{
+ /* result of key combining */
+ entry_key_t combined_key;
+
+ /* length of the key (or APR_HASH_KEY_STRING if not used) */
+ apr_size_t key_len;
+
+ /* the original key. Only KEY_LEN bytes are valid. We use uint32 for
+ * better compatibility with pseudo-md5 functions. */
+ apr_uint32_t key[64];
+} last_access_key_t;
+
/* Internal cache structure (used in svn_cache__t.cache_internal) basically
* holding the additional parameters needed to call the respective membuffer
* functions.
@@ -1873,6 +1895,11 @@ typedef struct svn_membuffer_cache_t
*/
int alloc_counter;
+ /* cache for the last key used.
+ * Will be NULL for caches with short fix-sized keys.
+ */
+ last_access_key_t *last_access;
+
/* if enabled, this will serialize the access to this instance.
*/
svn_mutex__t *mutex;
@@ -1890,46 +1917,127 @@ typedef struct svn_membuffer_cache_t
*/
#define ALLOCATIONS_PER_POOL_CLEAR 10
-
/* Basically calculate a hash value for KEY of length KEY_LEN, combine it
* with the CACHE->PREFIX and write the result in CACHE->COMBINED_KEY.
+ * This could replace combine_key() entirely but we actually use it only
+ * when the quick path failed.
*/
static void
-combine_key(svn_membuffer_cache_t *cache,
- const void *key,
- apr_ssize_t key_len)
+combine_long_key(svn_membuffer_cache_t *cache,
+ const void *key,
+ apr_ssize_t key_len)
{
+ assert(cache->last_access);
+
+ /* handle variable-length keys */
if (key_len == APR_HASH_KEY_STRING)
key_len = strlen((const char *) key);
- if (key_len < 16)
+ /* same key as the last time? -> short-circuit */
+ if ( key_len == cache->last_access->key_len
+ && memcmp(key, cache->last_access->key, key_len) == 0)
{
- apr_uint32_t data[4] = { 0 };
- memcpy(data, key, key_len);
+ memcpy(cache->combined_key, cache->last_access->combined_key,
+ sizeof(cache->combined_key));
+ }
+ else if (key_len >= 64)
+ {
+ /* relatively long key. Use the generic, slow hash code for it */
+ apr_md5((unsigned char*)cache->combined_key, key, key_len);
+ cache->combined_key[0] ^= cache->prefix[0];
+ cache->combined_key[1] ^= cache->prefix[1];
- svn__pseudo_md5_15((apr_uint32_t *)cache->combined_key, data);
+ /* is the key short enough to cache the result? */
+ if (key_len <= sizeof(cache->last_access->key))
+ {
+ memcpy(cache->last_access->combined_key, cache->combined_key,
+ sizeof(cache->combined_key));
+ cache->last_access->key_len = key_len;
+ memcpy(cache->last_access->key, key, key_len);
+ }
}
- else if (key_len < 32)
+ else
{
- apr_uint32_t data[8] = { 0 };
- memcpy(data, key, key_len);
+ /* shorter keys use efficient hash code and *do* cache the results */
+ cache->last_access->key_len = key_len;
+ if (key_len < 16)
+ {
+ memset(cache->last_access->key, 0, 16);
+ memcpy(cache->last_access->key, key, key_len);
+
+ svn__pseudo_md5_15((apr_uint32_t *)cache->combined_key,
+ cache->last_access->key);
+ }
+ else if (key_len < 32)
+ {
+ memset(cache->last_access->key, 0, 32);
+ memcpy(cache->last_access->key, key, key_len);
+
+ svn__pseudo_md5_31((apr_uint32_t *)cache->combined_key,
+ cache->last_access->key);
+ }
+ else
+ {
+ memset(cache->last_access->key, 0, 64);
+ memcpy(cache->last_access->key, key, key_len);
+
+ svn__pseudo_md5_63((apr_uint32_t *)cache->combined_key,
+ cache->last_access->key);
+ }
+
+ cache->combined_key[0] ^= cache->prefix[0];
+ cache->combined_key[1] ^= cache->prefix[1];
+
+ memcpy(cache->last_access->combined_key, cache->combined_key,
+ sizeof(cache->combined_key));
+ }
+}
+
+/* Basically calculate a hash value for KEY of length KEY_LEN, combine it
+ * with the CACHE->PREFIX and write the result in CACHE->COMBINED_KEY.
+ */
+static void
+combine_key(svn_membuffer_cache_t *cache,
+ const void *key,
+ apr_ssize_t key_len)
+{
+ /* copy of *key, padded with 0 */
+ apr_uint64_t data[2];
- svn__pseudo_md5_31((apr_uint32_t *)cache->combined_key, data);
+ /* short, fixed-size keys are the most common case */
+ if (key_len == 16)
+ {
+ data[0] = ((const apr_uint64_t *)key)[0];
+ data[1] = ((const apr_uint64_t *)key)[1];
}
- else if (key_len < 64)
+ else if (key_len == 8)
{
- apr_uint32_t data[16] = { 0 };
+ data[0] = ((const apr_uint64_t *)key)[0];
+ data[1] = 0;
+ }
+ else if (key_len != APR_HASH_KEY_STRING && key_len < 16)
+ {
+ data[0] = 0;
+ data[1] = 0;
memcpy(data, key, key_len);
-
- svn__pseudo_md5_63((apr_uint32_t *)cache->combined_key, data);
}
else
{
- apr_md5((unsigned char*)cache->combined_key, key, key_len);
+ /* longer or variably sized keys */
+ combine_long_key(cache, key, key_len);
+ return;
}
- cache->combined_key[0] ^= cache->prefix[0];
- cache->combined_key[1] ^= cache->prefix[1];
+ /* scramble key DATA. All of this must be reversible to prevent key
+ * collisions. So, we limit ourselves to xor and permutations. */
+ data[1] = (data[1] << 27) | (data[1] >> 37);
+ data[1] ^= data[0] & 0xffff;
+ data[0] ^= data[1] & 0xffffffffffff0000ull;
+ data[0] = (data[0] << 43) | (data[0] >> 21);
+
+ /* combine with this cache's namespace */
+ cache->combined_key[0] = data[0] ^ cache->prefix[0];
+ cache->combined_key[1] = data[1] ^ cache->prefix[1];
}
/* Implement svn_cache__vtable_t.get (not thread-safe)
@@ -2360,6 +2468,18 @@ svn_cache__create_membuffer_cache(svn_ca
pool));
memcpy(cache->prefix, checksum->digest, sizeof(cache->prefix));
+ /* fix-length keys of 16 bytes or under don't need a buffer because we
+ * can use a very fast key combining algorithm. */
+ if ((klen == APR_HASH_KEY_STRING) || klen > sizeof(entry_key_t))
+ {
+ cache->last_access = apr_pcalloc(pool, sizeof(*cache->last_access));
+ cache->last_access->key_len = APR_HASH_KEY_STRING;
+ }
+ else
+ {
+ cache->last_access = NULL;
+ }
+
#ifdef SVN_DEBUG_CACHE_MEMBUFFER
/* Initialize cache debugging support.