You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by st...@apache.org on 2010/08/18 19:44:20 UTC
svn commit: r986817 - in
/subversion/branches/performance/subversion/libsvn_fs_fs: dag.c dag.h
fs_fs.c fs_fs.h temp_serializer.c temp_serializer.h
Author: stefan2
Date: Wed Aug 18 17:44:19 2010
New Revision: 986817
URL: http://svn.apache.org/viewvc?rev=986817&view=rev
Log:
Very often, a container is only accessed to process / use a single element
of that container. Copying these containers from cache as a whole can
be very expensive and result in O(N^2) runtime.
Therefore, provide partial getters for the two container types we cache
(dirs and manifests) and use them.
* subversion/libsvn_fs_fs/temp_serializer.h
(svn_fs_fs__get_sharded_offset, svn_fs_fs__extract_dir_entry):
declare partial / selective de-serialization functions
* subversion/libsvn_fs_fs/temp_serializer.c
(svn_fs_fs__get_sharded_offset, svn_fs_fs__extract_dir_entry):
implement them
* subversion/libsvn_fs_fs/fs_fs.h
(svn_fs_fs__rep_contents_dir_partial): declare a new private getter function
that returns the data more selectively than svn_fs_fs__rep_contents_dir
* subversion/libsvn_fs_fs/fs_fs.c
(svn_fs_fs__rep_contents_dir_partial): implement that getter
(get_packed_offset): simplify and speed up by using a partial getter
* subversion/libsvn_fs_fs/dag.h
(svn_fs_fs__dag_dir_entry): declare a new private getter function
* subversion/libsvn_fs_fs/dag.c
(svn_fs_fs__dag_dir_entry): implement the new getter using the new FSFS API
(dir_entry_id_from_node): simplify and speed up by using the new API
Modified:
subversion/branches/performance/subversion/libsvn_fs_fs/dag.c
subversion/branches/performance/subversion/libsvn_fs_fs/dag.h
subversion/branches/performance/subversion/libsvn_fs_fs/fs_fs.c
subversion/branches/performance/subversion/libsvn_fs_fs/fs_fs.h
subversion/branches/performance/subversion/libsvn_fs_fs/temp_serializer.c
subversion/branches/performance/subversion/libsvn_fs_fs/temp_serializer.h
Modified: subversion/branches/performance/subversion/libsvn_fs_fs/dag.c
URL: http://svn.apache.org/viewvc/subversion/branches/performance/subversion/libsvn_fs_fs/dag.c?rev=986817&r1=986816&r2=986817&view=diff
==============================================================================
--- subversion/branches/performance/subversion/libsvn_fs_fs/dag.c (original)
+++ subversion/branches/performance/subversion/libsvn_fs_fs/dag.c Wed Aug 18 17:44:19 2010
@@ -312,16 +312,10 @@ dir_entry_id_from_node(const svn_fs_id_t
const char *name,
apr_pool_t *pool)
{
- apr_hash_t *entries;
svn_fs_dirent_t *dirent;
apr_pool_t *subpool = svn_pool_create(pool);
- SVN_ERR(svn_fs_fs__dag_dir_entries(&entries, parent, subpool, pool));
- if (entries)
- dirent = apr_hash_get(entries, name, APR_HASH_KEY_STRING);
- else
- dirent = NULL;
-
+ SVN_ERR(svn_fs_fs__dag_dir_entry(&dirent, parent, name, subpool, pool));
*id_p = dirent ? svn_fs_fs__id_copy(dirent->id, pool) : NULL;
svn_pool_destroy(subpool);
@@ -440,6 +434,27 @@ svn_fs_fs__dag_dir_entries(apr_hash_t **
return svn_fs_fs__rep_contents_dir(entries, node->fs, noderev, pool);
}
+svn_error_t *
+svn_fs_fs__dag_dir_entry(svn_fs_dirent_t **dirent,
+ dag_node_t *node,
+ const char* name,
+ apr_pool_t *pool,
+ apr_pool_t *node_pool)
+{
+ node_revision_t *noderev;
+ SVN_ERR(get_node_revision(&noderev, node, node_pool));
+
+ if (noderev->kind != svn_node_dir)
+ return svn_error_create(SVN_ERR_FS_NOT_DIRECTORY, NULL,
+ _("Can't get entries of non-directory"));
+
+ /* Get a dirent hash for this directory. */
+ return svn_fs_fs__rep_contents_dir_partial((void **)dirent, node->fs,
+ noderev,
+ svn_fs_fs__extract_dir_entry,
+ (void *)name, pool);
+}
+
svn_error_t *
svn_fs_fs__dag_set_entry(dag_node_t *node,
Modified: subversion/branches/performance/subversion/libsvn_fs_fs/dag.h
URL: http://svn.apache.org/viewvc/subversion/branches/performance/subversion/libsvn_fs_fs/dag.h?rev=986817&r1=986816&r2=986817&view=diff
==============================================================================
--- subversion/branches/performance/subversion/libsvn_fs_fs/dag.h (original)
+++ subversion/branches/performance/subversion/libsvn_fs_fs/dag.h Wed Aug 18 17:44:19 2010
@@ -297,6 +297,17 @@ svn_error_t *svn_fs_fs__dag_dir_entries(
apr_pool_t *pool,
apr_pool_t *node_pool);
+/* Fetches the NODE's entries and returns a copy of the entry selected
+ by the key value given in NAME and set *DIRENT to a copy of that
+ entry. If such entry was found, the copy will be allocated in POOL.
+ Otherwise, the *DIRENT will be set to NULL. NODE_POOL is used for
+ any allocation of memory that needs to live as long as NODE lives.
+ */
+svn_error_t * svn_fs_fs__dag_dir_entry(svn_fs_dirent_t **dirent,
+ dag_node_t *node,
+ const char* name,
+ apr_pool_t *pool,
+ apr_pool_t *node_pool);
/* Set ENTRY_NAME in NODE to point to ID (with kind KIND), allocating
from POOL. NODE must be a mutable directory. ID can refer to a
Modified: subversion/branches/performance/subversion/libsvn_fs_fs/fs_fs.c
URL: http://svn.apache.org/viewvc/subversion/branches/performance/subversion/libsvn_fs_fs/fs_fs.c?rev=986817&r1=986816&r2=986817&view=diff
==============================================================================
--- subversion/branches/performance/subversion/libsvn_fs_fs/fs_fs.c (original)
+++ subversion/branches/performance/subversion/libsvn_fs_fs/fs_fs.c Wed Aug 18 17:44:19 2010
@@ -1868,19 +1868,24 @@ get_packed_offset(apr_off_t *rev_offset,
svn_stream_t *manifest_stream;
svn_boolean_t is_cached;
apr_int64_t shard;
+ apr_int64_t shard_pos;
apr_array_header_t *manifest;
apr_pool_t *iterpool;
shard = rev / ffd->max_files_per_dir;
- SVN_ERR(svn_cache__get((void **) &manifest, &is_cached,
- ffd->packed_offset_cache, &shard, pool));
+
+ /* position of the shard within the manifest */
+ shard_pos = rev % ffd->max_files_per_dir;
+
+ /* fetch exactly that element into *rev_offset, if the manifest is found
+ in the cache */
+ SVN_ERR(svn_cache__get_partial((void **) rev_offset, &is_cached,
+ ffd->packed_offset_cache, &shard,
+ svn_fs_fs__get_sharded_offset, &shard_pos,
+ pool));
if (is_cached)
- {
- *rev_offset = APR_ARRAY_IDX(manifest, rev % ffd->max_files_per_dir,
- apr_off_t);
return SVN_NO_ERROR;
- }
/* Open the manifest file. */
SVN_ERR(svn_stream_open_readonly(&manifest_stream,
@@ -3956,6 +3961,65 @@ svn_fs_fs__rep_contents_dir(apr_hash_t *
}
svn_error_t *
+svn_fs_fs__rep_contents_dir_partial(void **result_p,
+ svn_fs_t *fs,
+ node_revision_t *noderev,
+ svn_cache__partial_getter_func_t deserializer,
+ void *baton,
+ apr_pool_t *pool)
+{
+ fs_fs_data_t *ffd = fs->fsap_data;
+ apr_hash_t *entries;
+ svn_boolean_t found = FALSE;
+
+ /* Are we looking for an immutable directory? We could try the
+ * cache. */
+ if (! svn_fs_fs__id_txn_id(noderev->id))
+ {
+ const char *unparsed_id =
+ svn_fs_fs__id_unparse(noderev->id, pool)->data;
+
+ /* Cache lookup. Return on the requested part of the dir info. */
+ SVN_ERR(svn_cache__get_partial(result_p, &found, ffd->dir_cache,
+ unparsed_id, deserializer, baton,
+ pool));
+ }
+
+ if (! found)
+ {
+ char *serialized_entries;
+ apr_size_t serialized_len;
+
+ /* since we don't need the directory content later on, put it into
+ some sub-pool that will be reclaimed immedeately after exiting
+ this function successfully. Opon failure, it will live as long
+ as pool.
+ */
+ apr_pool_t *sub_pool = svn_pool_create(pool);
+
+ /* read the dir from the file system. It will probably be put it
+ into the cache for faster lookup in future calls. */
+ SVN_ERR(svn_fs_fs__rep_contents_dir(&entries, fs, noderev, sub_pool));
+
+ /* deserializer works on serialied data only. So, we need to provide
+ serialized dir entries */
+ SVN_ERR(svn_fs_fs__serialize_dir_entries(&serialized_entries,
+ &serialized_len,
+ entries,
+ sub_pool));
+ SVN_ERR(deserializer(result_p,
+ serialized_entries,
+ serialized_len,
+ baton,
+ pool));
+
+ apr_pool_destroy(sub_pool);
+ }
+
+ return SVN_NO_ERROR;
+}
+
+svn_error_t *
svn_fs_fs__get_proplist(apr_hash_t **proplist_p,
svn_fs_t *fs,
node_revision_t *noderev,
Modified: subversion/branches/performance/subversion/libsvn_fs_fs/fs_fs.h
URL: http://svn.apache.org/viewvc/subversion/branches/performance/subversion/libsvn_fs_fs/fs_fs.h?rev=986817&r1=986816&r2=986817&view=diff
==============================================================================
--- subversion/branches/performance/subversion/libsvn_fs_fs/fs_fs.h (original)
+++ subversion/branches/performance/subversion/libsvn_fs_fs/fs_fs.h Wed Aug 18 17:44:19 2010
@@ -105,6 +105,20 @@ svn_error_t *svn_fs_fs__rep_contents_dir
node_revision_t *noderev,
apr_pool_t *pool);
+/* Set *RESULT_P to the object created by DESERIALIZER when applied
+ to the dirent structs that contain the directory entries of node-
+ revision NODEREV in filesystem FS. A BATON will be passed to
+ the deserializer callback function to describe the object to find.
+ The returned object is allocated in POOL, which is also used for
+ temporary allocations. */
+svn_error_t *
+svn_fs_fs__rep_contents_dir_partial(void **result_p,
+ svn_fs_t *fs,
+ node_revision_t *noderev,
+ svn_cache__partial_getter_func_t deserializer,
+ void *baton,
+ apr_pool_t *pool);
+
/* Set *CONTENTS to be a readable svn_stream_t that receives the text
representation of node-revision NODEREV as seen in filesystem FS.
Use POOL for temporary allocations. */
Modified: subversion/branches/performance/subversion/libsvn_fs_fs/temp_serializer.c
URL: http://svn.apache.org/viewvc/subversion/branches/performance/subversion/libsvn_fs_fs/temp_serializer.c?rev=986817&r1=986816&r2=986817&view=diff
==============================================================================
--- subversion/branches/performance/subversion/libsvn_fs_fs/temp_serializer.c (original)
+++ subversion/branches/performance/subversion/libsvn_fs_fs/temp_serializer.c Wed Aug 18 17:44:19 2010
@@ -679,4 +679,89 @@ svn_fs_fs__deserialize_dir_entries(void
return SVN_NO_ERROR;
}
+/* Implements svn_cache__partial_getter_func_t for manifests.
+ */
+svn_error_t *
+svn_fs_fs__get_sharded_offset(void **out,
+ const char *data,
+ apr_size_t data_len,
+ void *baton,
+ apr_pool_t *pool)
+{
+ apr_off_t *manifest = (apr_off_t *)data;
+ apr_int64_t shard_pos = *(apr_int64_t *)baton;
+
+ *(apr_int64_t *)out = manifest[shard_pos];
+
+ return SVN_NO_ERROR;
+}
+
+/* Implements svn_cache__partial_getter_func_t for a directory contents hash.
+ */
+svn_error_t *
+svn_fs_fs__extract_dir_entry(void **out,
+ const char *data,
+ apr_size_t data_len,
+ void *baton,
+ apr_pool_t *pool)
+{
+ hash_data_t *hash_data = (hash_data_t *)data;
+ const char* name = baton;
+
+ /* resolve the reference to the entries array */
+ const svn_fs_dirent_t * const *entries =
+ svn_temp_deserializer__ptr(data, (const void **)&hash_data->entries);
+
+ /* binary search for the desired entry by name */
+ apr_size_t lower = 0;
+ apr_size_t upper = hash_data->count;
+ apr_size_t middle;
+
+ for (middle = upper / 2; lower < upper; middle = (upper + lower) / 2)
+ {
+ const svn_fs_dirent_t *entry =
+ svn_temp_deserializer__ptr(entries, (const void **)&entries[middle]);
+ const char* entry_name =
+ svn_temp_deserializer__ptr(entry, (const void **)&entry->name);
+
+ int diff = strcmp(entry_name, name);
+ if (diff < 0)
+ lower = middle + 1;
+ else
+ upper = middle;
+ }
+
+ /* de-serialize that entry or return NULL, if no match has been found */
+ *out = NULL;
+ if (lower < hash_data->count)
+ {
+ const svn_fs_dirent_t *source =
+ svn_temp_deserializer__ptr(entries, (const void **)&entries[lower]);
+
+ /* Entries have been serialized one-by-one, each time including all
+ * nestes structures and strings. Therefore, they occupy a single
+ * block of memory whose end-offset is either the beginning of the
+ * next entry or the end of the buffer
+ */
+ apr_size_t end_offset = lower + 1 < hash_data->count
+ ? ((apr_size_t*)entries)[lower+1]
+ : data_len;
+ apr_size_t size = end_offset - ((apr_size_t*)entries)[lower];
+
+ /* copy & deserialize the entry */
+ svn_fs_dirent_t *new_entry = apr_palloc(pool, size);
+ memcpy(new_entry, source, size);
+
+ svn_temp_deserializer__resolve(new_entry, (void **)&new_entry->name);
+ if (strcmp(new_entry->name, name) == 0)
+ {
+ svn_fs_fs__id_deserialize(new_entry, (svn_fs_id_t **)&new_entry->id);
+ *(svn_fs_dirent_t **)out = new_entry;
+ }
+ }
+
+ return SVN_NO_ERROR;
+}
+
+
Modified: subversion/branches/performance/subversion/libsvn_fs_fs/temp_serializer.h
URL: http://svn.apache.org/viewvc/subversion/branches/performance/subversion/libsvn_fs_fs/temp_serializer.h?rev=986817&r1=986816&r2=986817&view=diff
==============================================================================
--- subversion/branches/performance/subversion/libsvn_fs_fs/temp_serializer.h (original)
+++ subversion/branches/performance/subversion/libsvn_fs_fs/temp_serializer.h Wed Aug 18 17:44:19 2010
@@ -166,4 +166,27 @@ svn_fs_fs__deserialize_dir_entries(void
apr_size_t data_len,
apr_pool_t *pool);
+/**
+ * Implements @ref svn_cache__partial_getter_func_t for a single element
+ * identified by its offset in @a baton within a serialized manifest array.
+ */
+svn_error_t *
+svn_fs_fs__get_sharded_offset(void **out,
+ const char *data,
+ apr_size_t data_len,
+ void *baton,
+ apr_pool_t *pool);
+
+/**
+ * Implements @ref svn_cache__partial_getter_func_t for a single
+ * @ref svn_fs_dirent_t within a serialized directory contents hash,
+ * identified by its name in @a baton.
+ */
+svn_error_t *
+svn_fs_fs__extract_dir_entry(void **out,
+ const char *data,
+ apr_size_t data_len,
+ void *baton,
+ apr_pool_t *pool);
+
#endif