You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by st...@apache.org on 2012/09/21 21:56:42 UTC
svn commit: r1388654 - in /subversion/branches/10Gb/subversion/libsvn_fs_fs:
caching.c fs.h tree.c tree.h
Author: stefan2
Date: Fri Sep 21 19:56:41 2012
New Revision: 1388654
URL: http://svn.apache.org/viewvc?rev=1388654&view=rev
Log:
On the 10Gb branch: Implement a 1st level cache for DAG nodes as they
are the most frequently accessed data object in FSFS. Also, access
to them tends to have high locality.
* subversion/libsvn_fs_fs/fs.h
(fs_fs_dag_cache_t): declare new data structure
(fs_fs_data_t): add DAG 1st level cache as member
* subversion/libsvn_fs_fs/tree.h
(svn_fs_fs__create_dag_cache): declare new private API
* subversion/libsvn_fs_fs/caching.c
(svn_fs_fs__initialize_caches): initialize DAG 1st level cache
* subversion/libsvn_fs_fs/tree.c
(cache_entry_t,
BUCKET_COUNT,
fs_fs_dag_cache_t): define DAG node 1st level cache data structure
(svn_fs_fs__create_dag_cache): implement new private API
(unlock_cache,
lock_cache,
auto_clear_dag_cache,
cache_lookup): implement cache logic
(dag_node_cache_get): use both cache levels
(dag_node_cache_get_non_canonical): new variant of the former for
potentially non-canonical paths
(open_path): skip canonicalization for canonical paths
(get_dag): most paths are canonical, thus try optimistic lookup
Modified:
subversion/branches/10Gb/subversion/libsvn_fs_fs/caching.c
subversion/branches/10Gb/subversion/libsvn_fs_fs/fs.h
subversion/branches/10Gb/subversion/libsvn_fs_fs/tree.c
subversion/branches/10Gb/subversion/libsvn_fs_fs/tree.h
Modified: subversion/branches/10Gb/subversion/libsvn_fs_fs/caching.c
URL: http://svn.apache.org/viewvc/subversion/branches/10Gb/subversion/libsvn_fs_fs/caching.c?rev=1388654&r1=1388653&r2=1388654&view=diff
==============================================================================
--- subversion/branches/10Gb/subversion/libsvn_fs_fs/caching.c (original)
+++ subversion/branches/10Gb/subversion/libsvn_fs_fs/caching.c Fri Sep 21 19:56:41 2012
@@ -24,6 +24,7 @@
#include "fs_fs.h"
#include "id.h"
#include "dag.h"
+#include "tree.h"
#include "temp_serializer.h"
#include "../libsvn_fs/fs-loader.h"
@@ -32,6 +33,8 @@
#include "svn_private_config.h"
#include "svn_hash.h"
+#include "svn_pools.h"
+
#include "private/svn_debug.h"
#include "private/svn_subr_private.h"
@@ -308,6 +311,9 @@ svn_fs_fs__initialize_caches(svn_fs_t *f
SVN_ERR(init_callbacks(ffd->rev_node_cache, fs, no_handler, pool));
+ /* 1st level DAG node cache */
+ ffd->dag_node_cache = svn_fs_fs__create_dag_cache(svn_pool_create(pool));
+
/* Very rough estimate: 1K per directory. */
SVN_ERR(create_cache(&(ffd->dir_cache),
NULL,
Modified: subversion/branches/10Gb/subversion/libsvn_fs_fs/fs.h
URL: http://svn.apache.org/viewvc/subversion/branches/10Gb/subversion/libsvn_fs_fs/fs.h?rev=1388654&r1=1388653&r2=1388654&view=diff
==============================================================================
--- subversion/branches/10Gb/subversion/libsvn_fs_fs/fs.h (original)
+++ subversion/branches/10Gb/subversion/libsvn_fs_fs/fs.h Fri Sep 21 19:56:41 2012
@@ -217,6 +217,9 @@ typedef struct fs_fs_shared_data_t
apr_pool_t *common_pool;
} fs_fs_shared_data_t;
+/* Data structure for the 1st level DAG node cache. */
+typedef struct fs_fs_dag_cache_t fs_fs_dag_cache_t;
+
/* Private (non-shared) FSFS-specific data for each svn_fs_t object.
Any caches in here may be NULL. */
typedef struct fs_fs_data_t
@@ -241,8 +244,11 @@ typedef struct fs_fs_data_t
(svn_fs_id_t *). (Not threadsafe.) */
svn_cache__t *rev_root_id_cache;
+ /* Caches native dag_node_t* instances and acts as a 1st level cache */
+ fs_fs_dag_cache_t *dag_node_cache;
+
/* DAG node cache for immutable nodes. Maps (revision, fspath)
- to (dag_node_t *). */
+ to (dag_node_t *). This is the 2nd level cache for DAG nodes. */
svn_cache__t *rev_node_cache;
/* A cache of the contents of immutable directories; maps from
Modified: subversion/branches/10Gb/subversion/libsvn_fs_fs/tree.c
URL: http://svn.apache.org/viewvc/subversion/branches/10Gb/subversion/libsvn_fs_fs/tree.c?rev=1388654&r1=1388653&r2=1388654&view=diff
==============================================================================
--- subversion/branches/10Gb/subversion/libsvn_fs_fs/tree.c (original)
+++ subversion/branches/10Gb/subversion/libsvn_fs_fs/tree.c Fri Sep 21 19:56:41 2012
@@ -59,6 +59,7 @@
#include "temp_serializer.h"
#include "private/svn_mergeinfo_private.h"
+#include "private/svn_subr_private.h"
#include "private/svn_fs_util.h"
#include "private/svn_fspath.h"
#include "../libsvn_fs/fs-loader.h"
@@ -135,6 +136,194 @@ static svn_error_t *make_txn_root(svn_fs
/*** Node Caching ***/
+/* 1st level cache */
+
+/* An entry in the first-level cache. REVISION and PATH form the key that
+ will ultimately be matched.
+ */
+typedef struct cache_entry_t
+{
+ /* hash value derived from PATH, REVISION.
+ Used to short-circuit failed lookups. */
+ apr_uint32_t hash_value;
+
+ /* revision to which the NODE belongs */
+ svn_revnum_t revision;
+
+ /* path of the NODE */
+ char *path;
+
+ /* cached value of strlen(PATH). */
+ apr_size_t path_len;
+
+ /* the node allocated in the cache's pool. NULL for empty entries. */
+ dag_node_t *node;
+} cache_entry_t;
+
+/* Number of entries in the cache. Keep this low to keep pressure on the
+ CPU caches low as well. A binary value is most efficient. If we walk
+ a directory tree, we want enough entries to store nodes for all files
+ without overwriting the nodes for the parent folder. That way, there
+ will be no unnecessary misses (except for a few random ones caused by
+ hash collision).
+
+ The actual number of instances may be higher but entries that got
+ overwritten are no longer visible.
+ */
+enum { BUCKET_COUNT = 256 };
+
+/* The actual cache structure. All nodes will be allocated in POOL.
+ When the number of INSERTIONS (i.e. objects created form that pool)
+ exceeds a certain threshold, the pool will be cleared and the cache
+ with it.
+
+ To ensure that nodes returned from this structure remain valid, the
+ cache will get locked for the lifetime of the _receiving_ pools (i.e.
+ those in which we would allocate the node if there was no cache.).
+ The cache will only be cleared LOCK_COUNT is 0.
+ */
+struct fs_fs_dag_cache_t
+{
+ /* fixed number of (possibly empty) cache entries */
+ cache_entry_t buckets[BUCKET_COUNT];
+
+ /* pool used for all cache and node allocation */
+ apr_pool_t *pool;
+
+ /* number of entries created from POOL since the last cleanup */
+ apr_size_t insertions;
+
+ /* Property lookups etc. have a very high locality (75% re-hit).
+ Thus, remember the last hit location for optimistic lookup. */
+ apr_size_t last_hit;
+
+ /* receiving pool that added the latest lock. NULL if not known. */
+ apr_pool_t *last_registered;
+
+ /* lock counter */
+ apr_size_t lock_count;
+};
+
+fs_fs_dag_cache_t*
+svn_fs_fs__create_dag_cache(apr_pool_t *pool)
+{
+ fs_fs_dag_cache_t *result = apr_pcalloc(pool, sizeof(*result));
+ result->pool = pool;
+
+ return result;
+}
+
+/* Cleanup function to be called when a receiving pool gets cleared.
+ Unlocks the cache once.
+ */
+static apr_status_t
+unlock_cache(void *baton_void)
+{
+ fs_fs_dag_cache_t *cache = baton_void;
+ cache->lock_count--;
+ cache->last_registered = NULL;
+
+ return APR_SUCCESS;
+}
+
+/* Prevent the entries in CACHE from being destroyed, for as long as the
+ POOL lives.
+ */
+static void
+lock_cache(fs_fs_dag_cache_t* cache, apr_pool_t *pool)
+{
+ /* we only need to lock / unlock once per pool. Since we will often ask
+ for multiple nodes with the same pool, we can reduce the overhead.
+ However, if e.g. pools are being used in an alternating pattern,
+ we may lock the cache more than once for the same pool (and register
+ just as many cleanup actions).
+ */
+ if (pool != cache->last_registered)
+ {
+ apr_pool_cleanup_register(pool,
+ cache,
+ unlock_cache,
+ apr_pool_cleanup_null);
+ cache->lock_count++;
+ cache->last_registered = pool;
+ }
+}
+
+/* Clears and re-creates *CACHE at regular intervals
+ */
+static void
+auto_clear_dag_cache(fs_fs_dag_cache_t** cache)
+{
+ if ((*cache)->lock_count == 0 && (*cache)->insertions > BUCKET_COUNT)
+ {
+ apr_pool_t *pool = (*cache)->pool;
+ apr_pool_clear(pool);
+
+ *cache = svn_fs_fs__create_dag_cache(pool);
+ }
+}
+
+/* For the given REVISION and PATH, return the respective entry in CACHE.
+ If the entry is empty, its NODE member will be NULL and the caller
+ may then set it to the corresponding DAG node allocated in CACHE->POOL.
+ */
+static cache_entry_t *
+cache_lookup( fs_fs_dag_cache_t *cache
+ , svn_revnum_t revision
+ , const char *path)
+{
+ apr_size_t i, bucket_index;
+ apr_size_t path_len = strlen(path);
+ apr_uint32_t hash_value = revision;
+
+ /* optimistic lookup: hit the same bucket again? */
+ cache_entry_t *result = &cache->buckets[cache->last_hit];
+ if ( (result->revision == revision)
+ && (result->path_len == path_len)
+ && !memcmp(result->path, path, path_len))
+ {
+ return result;
+ }
+
+ /* need to do a full lookup. Calculate the hash value
+ (HASH_VALUE has been initialized to REVISION). */
+ for (i = 0; i + 4 <= path_len; i += 4)
+ hash_value = hash_value * 0xd1f3da69 + *(const apr_uint32_t*)(path + i);
+
+ for (; i < path_len; ++i)
+ hash_value = hash_value * 33 + path[i];
+
+ bucket_index = hash_value + (hash_value >> 16);
+ bucket_index = (bucket_index + (bucket_index >> 8)) % BUCKET_COUNT;
+
+ /* access the corresponding bucket and remember its location */
+ result = &cache->buckets[bucket_index];
+ cache->last_hit = bucket_index;
+
+ /* if it is *NOT* a match, clear the bucket, expect the caller to fill
+ in the node and count it as an insertion */
+ if ( (result->hash_value != hash_value)
+ || (result->revision != revision)
+ || (result->path_len != path_len)
+ || memcmp(result->path, path, path_len))
+ {
+ result->hash_value = hash_value;
+ result->revision = revision;
+ if (result->path_len < path_len)
+ result->path = apr_palloc(cache->pool, path_len + 1);
+ result->path_len = path_len;
+ memcpy(result->path, path, path_len + 1);
+
+ result->node = NULL;
+
+ cache->insertions++;
+ }
+
+ return result;
+}
+
+/* 2nd level cache */
+
/* Find and return the DAG node cache for ROOT and the key that
should be used for PATH. */
static void
@@ -160,7 +349,8 @@ locate_cache(svn_cache__t **cache,
}
/* Return NODE for PATH from ROOT's node cache, or NULL if the node
- isn't cached; the node is copied into POOL. */
+ isn't cached; read it from the FS. *NODE remains valid until POOL
+ gets cleared or destroyed.*/
static svn_error_t *
dag_node_cache_get(dag_node_t **node_p,
svn_fs_root_t *root,
@@ -168,24 +358,83 @@ dag_node_cache_get(dag_node_t **node_p,
apr_pool_t *pool)
{
svn_boolean_t found;
- dag_node_t *node;
+ dag_node_t *node = NULL;
svn_cache__t *cache;
const char *key;
SVN_ERR_ASSERT(*path == '/');
- locate_cache(&cache, &key, root, path, pool);
-
- SVN_ERR(svn_cache__get((void **) &node, &found, cache, key, pool));
- if (found && node)
+ if (!root->is_txn_root)
{
- /* Patch up the FS, since this might have come from an old FS
- * object. */
- svn_fs_fs__dag_set_fs(node, root->fs);
- *node_p = node;
+ /* immutable DAG node. use the global caches for it */
+
+ fs_fs_data_t *ffd = root->fs->fsap_data;
+ cache_entry_t *bucket;
+
+ auto_clear_dag_cache(&ffd->dag_node_cache);
+ lock_cache(ffd->dag_node_cache, pool);
+ bucket = cache_lookup(ffd->dag_node_cache, root->rev, path);
+ if (bucket->node == NULL)
+ {
+ locate_cache(&cache, &key, root, path, pool);
+ SVN_ERR(svn_cache__get((void **)&node, &found, cache, key,
+ ffd->dag_node_cache->pool));
+ if (found && node)
+ {
+ /* Patch up the FS, since this might have come from an old FS
+ * object. */
+ svn_fs_fs__dag_set_fs(node, root->fs);
+ bucket->node = node;
+ }
+ }
+ else
+ {
+ node = bucket->node;
+ }
}
else
- *node_p = NULL;
+ {
+ /* DAG is mutable / may become invalid. Use the TXN-local cache */
+
+ locate_cache(&cache, &key, root, path, pool);
+
+ SVN_ERR(svn_cache__get((void **) &node, &found, cache, key, pool));
+ if (found && node)
+ {
+ /* Patch up the FS, since this might have come from an old FS
+ * object. */
+ svn_fs_fs__dag_set_fs(node, root->fs);
+ }
+ }
+
+ *node_p = node;
+
+ return SVN_NO_ERROR;
+}
+
+/* Attempt a 1st level cache lookup for for PATH from ROOT's node cache.
+ Return the result in *NODE. If PATH is not canonical, or the node is
+ not found in the cache, set *NODE to NULL. *NODE remains valid until
+ POOL gets cleared or destroyed. */
+static svn_error_t *
+dag_node_cache_get_non_canonical(dag_node_t **node_p,
+ svn_fs_root_t *root,
+ const char *path,
+ apr_pool_t *pool)
+{
+ dag_node_t *node = NULL;
+ if (!root->is_txn_root && *path == '/')
+ {
+ fs_fs_data_t *ffd = root->fs->fsap_data;
+
+ auto_clear_dag_cache(&ffd->dag_node_cache);
+ lock_cache(ffd->dag_node_cache, pool);
+
+ node = cache_lookup(ffd->dag_node_cache, root->rev, path)->node;
+ }
+
+ *node_p = node;
+
return SVN_NO_ERROR;
}
@@ -592,7 +841,9 @@ open_path(parent_path_t **parent_path_p,
dag_node_t *here; /* The directory we're currently looking at. */
parent_path_t *parent_path; /* The path from HERE up to the root. */
const char *rest; /* The portion of PATH we haven't traversed yet. */
- const char *canon_path = svn_fs__canonicalize_abspath(path, pool);
+ const char *canon_path = svn_fs__is_canonical_abspath(path)
+ ? path
+ : svn_fs__canonicalize_abspath(path, pool);
const char *path_so_far = "/";
apr_pool_t *iterpool = svn_pool_create(pool);
@@ -817,19 +1068,29 @@ get_dag(dag_node_t **dag_node_p,
parent_path_t *parent_path;
dag_node_t *node;
- /* Canonicalize the input PATH. */
- path = svn_fs__canonicalize_abspath(path, pool);
-
- /* First we look for the DAG in our cache. */
- SVN_ERR(dag_node_cache_get(&node, root, path, pool));
+ /* First we look for the DAG in our cache. If we find a node, PATH
+ has been canonical. */
+ SVN_ERR(dag_node_cache_get_non_canonical(&node, root, path, pool));
if (! node)
{
- /* Call open_path with no flags, as we want this to return an error
- if the node for which we are searching doesn't exist. */
- SVN_ERR(open_path(&parent_path, root, path, 0, NULL, pool));
- node = parent_path->node;
+ /* Canonicalize the input PATH. */
+ if (!svn_fs__is_canonical_abspath(path))
+ {
+ path = svn_fs__canonicalize_abspath(path, pool);
+
+ /* Try again with the corrected path. */
+ SVN_ERR(dag_node_cache_get(&node, root, path, pool));
+ }
- /* No need to cache our find -- open_path() will do that for us. */
+ if (! node)
+ {
+ /* Call open_path with no flags, as we want this to return an
+ * error if the node for which we are searching doesn't exist. */
+ SVN_ERR(open_path(&parent_path, root, path, 0, NULL, pool));
+ node = parent_path->node;
+
+ /* No need to cache our find -- open_path() will do that for us. */
+ }
}
*dag_node_p = node;
Modified: subversion/branches/10Gb/subversion/libsvn_fs_fs/tree.h
URL: http://svn.apache.org/viewvc/subversion/branches/10Gb/subversion/libsvn_fs_fs/tree.h?rev=1388654&r1=1388653&r2=1388654&view=diff
==============================================================================
--- subversion/branches/10Gb/subversion/libsvn_fs_fs/tree.h (original)
+++ subversion/branches/10Gb/subversion/libsvn_fs_fs/tree.h Fri Sep 21 19:56:41 2012
@@ -23,12 +23,19 @@
#ifndef SVN_LIBSVN_FS_TREE_H
#define SVN_LIBSVN_FS_TREE_H
+#include "fs.h"
+
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
+/* In POOL, create an instance of a DAG node 1st level cache.
+ The POOL will be cleared at regular intervals. */
+fs_fs_dag_cache_t*
+svn_fs_fs__create_dag_cache(apr_pool_t *pool);
+
/* Set *ROOT_P to the root directory of revision REV in filesystem FS.
Allocate the structure in POOL. */
svn_error_t *svn_fs_fs__revision_root(svn_fs_root_t **root_p, svn_fs_t *fs,