You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by st...@apache.org on 2010/08/18 19:44:20 UTC

svn commit: r986817 - in /subversion/branches/performance/subversion/libsvn_fs_fs: dag.c dag.h fs_fs.c fs_fs.h temp_serializer.c temp_serializer.h

Author: stefan2
Date: Wed Aug 18 17:44:19 2010
New Revision: 986817

URL: http://svn.apache.org/viewvc?rev=986817&view=rev
Log:
Very often, a container is only accessed to process / use a single element
of that container. Copying these containers from cache as a whole can
be very expensive and result in O(N^2) runtime.

Therefore, provide partial getters for the two container types we cache
(dirs and manifests) and use them.

* subversion/libsvn_fs_fs/temp_serializer.h
  (svn_fs_fs__get_sharded_offset, svn_fs_fs__extract_dir_entry):
   declare partial / selective de-serialization functions
* subversion/libsvn_fs_fs/temp_serializer.c
  (svn_fs_fs__get_sharded_offset, svn_fs_fs__extract_dir_entry):
   implement them

* subversion/libsvn_fs_fs/fs_fs.h
  (svn_fs_fs__rep_contents_dir_partial): declare a new private getter function
   that returns the data more selectively than svn_fs_fs__rep_contents_dir
* subversion/libsvn_fs_fs/fs_fs.c
  (svn_fs_fs__rep_contents_dir_partial): implement that getter
  (get_packed_offset): simplify and speed up by using a partial getter

* subversion/libsvn_fs_fs/dag.h
  (svn_fs_fs__dag_dir_entry): declare a new private getter function
* subversion/libsvn_fs_fs/dag.c
  (svn_fs_fs__dag_dir_entry): implement the new getter using the new FSFS API
  (dir_entry_id_from_node): simplify and speed up by using the new API

Modified:
    subversion/branches/performance/subversion/libsvn_fs_fs/dag.c
    subversion/branches/performance/subversion/libsvn_fs_fs/dag.h
    subversion/branches/performance/subversion/libsvn_fs_fs/fs_fs.c
    subversion/branches/performance/subversion/libsvn_fs_fs/fs_fs.h
    subversion/branches/performance/subversion/libsvn_fs_fs/temp_serializer.c
    subversion/branches/performance/subversion/libsvn_fs_fs/temp_serializer.h

Modified: subversion/branches/performance/subversion/libsvn_fs_fs/dag.c
URL: http://svn.apache.org/viewvc/subversion/branches/performance/subversion/libsvn_fs_fs/dag.c?rev=986817&r1=986816&r2=986817&view=diff
==============================================================================
--- subversion/branches/performance/subversion/libsvn_fs_fs/dag.c (original)
+++ subversion/branches/performance/subversion/libsvn_fs_fs/dag.c Wed Aug 18 17:44:19 2010
@@ -312,16 +312,10 @@ dir_entry_id_from_node(const svn_fs_id_t
                        const char *name,
                        apr_pool_t *pool)
 {
-  apr_hash_t *entries;
   svn_fs_dirent_t *dirent;
   apr_pool_t *subpool = svn_pool_create(pool);
 
-  SVN_ERR(svn_fs_fs__dag_dir_entries(&entries, parent, subpool, pool));
-  if (entries)
-    dirent = apr_hash_get(entries, name, APR_HASH_KEY_STRING);
-  else
-    dirent = NULL;
-
+  SVN_ERR(svn_fs_fs__dag_dir_entry(&dirent, parent, name, subpool, pool));
   *id_p = dirent ? svn_fs_fs__id_copy(dirent->id, pool) : NULL;
 
   svn_pool_destroy(subpool);
@@ -440,6 +434,27 @@ svn_fs_fs__dag_dir_entries(apr_hash_t **
   return svn_fs_fs__rep_contents_dir(entries, node->fs, noderev, pool);
 }
 
+svn_error_t *
+svn_fs_fs__dag_dir_entry(svn_fs_dirent_t **dirent,
+                         dag_node_t *node,
+                         const char* name,
+                         apr_pool_t *pool,
+                         apr_pool_t *node_pool)
+{
+  node_revision_t *noderev;
+  SVN_ERR(get_node_revision(&noderev, node, node_pool));
+
+  if (noderev->kind != svn_node_dir)
+    return svn_error_create(SVN_ERR_FS_NOT_DIRECTORY, NULL,
+                            _("Can't get entries of non-directory"));
+
+  /* Get a dirent hash for this directory. */
+  return svn_fs_fs__rep_contents_dir_partial((void **)dirent, node->fs,
+                                             noderev,
+                                             svn_fs_fs__extract_dir_entry,
+                                             (void *)name, pool);
+}
+
 
 svn_error_t *
 svn_fs_fs__dag_set_entry(dag_node_t *node,

Modified: subversion/branches/performance/subversion/libsvn_fs_fs/dag.h
URL: http://svn.apache.org/viewvc/subversion/branches/performance/subversion/libsvn_fs_fs/dag.h?rev=986817&r1=986816&r2=986817&view=diff
==============================================================================
--- subversion/branches/performance/subversion/libsvn_fs_fs/dag.h (original)
+++ subversion/branches/performance/subversion/libsvn_fs_fs/dag.h Wed Aug 18 17:44:19 2010
@@ -297,6 +297,17 @@ svn_error_t *svn_fs_fs__dag_dir_entries(
                                         apr_pool_t *pool,
                                         apr_pool_t *node_pool);
 
+/* Fetches the NODE's entries and returns a copy of the entry selected
+   by the key value given in NAME and set *DIRENT to a copy of that
+   entry. If such entry was found, the copy will be allocated in POOL.
+   Otherwise, the *DIRENT will be set to NULL. NODE_POOL is used for
+   any allocation of memory that needs to live as long as NODE lives.
+ */
+svn_error_t * svn_fs_fs__dag_dir_entry(svn_fs_dirent_t **dirent,
+                                       dag_node_t *node,
+                                       const char* name,
+                                       apr_pool_t *pool,
+                                       apr_pool_t *node_pool);
 
 /* Set ENTRY_NAME in NODE to point to ID (with kind KIND), allocating
    from POOL.  NODE must be a mutable directory.  ID can refer to a

Modified: subversion/branches/performance/subversion/libsvn_fs_fs/fs_fs.c
URL: http://svn.apache.org/viewvc/subversion/branches/performance/subversion/libsvn_fs_fs/fs_fs.c?rev=986817&r1=986816&r2=986817&view=diff
==============================================================================
--- subversion/branches/performance/subversion/libsvn_fs_fs/fs_fs.c (original)
+++ subversion/branches/performance/subversion/libsvn_fs_fs/fs_fs.c Wed Aug 18 17:44:19 2010
@@ -1868,19 +1868,24 @@ get_packed_offset(apr_off_t *rev_offset,
   svn_stream_t *manifest_stream;
   svn_boolean_t is_cached;
   apr_int64_t shard;
+  apr_int64_t shard_pos;
   apr_array_header_t *manifest;
   apr_pool_t *iterpool;
 
   shard = rev / ffd->max_files_per_dir;
-  SVN_ERR(svn_cache__get((void **) &manifest, &is_cached,
-                         ffd->packed_offset_cache, &shard, pool));
+
+  /* position of the shard within the manifest */
+  shard_pos = rev % ffd->max_files_per_dir;
+
+  /* fetch exactly that element into *rev_offset, if the manifest is found
+     in the cache */
+  SVN_ERR(svn_cache__get_partial((void **) rev_offset, &is_cached,
+                                 ffd->packed_offset_cache, &shard,
+                                 svn_fs_fs__get_sharded_offset, &shard_pos,
+                                 pool));
 
   if (is_cached)
-    {
-      *rev_offset = APR_ARRAY_IDX(manifest, rev % ffd->max_files_per_dir,
-                                  apr_off_t);
       return SVN_NO_ERROR;
-    }
 
   /* Open the manifest file. */
   SVN_ERR(svn_stream_open_readonly(&manifest_stream,
@@ -3956,6 +3961,65 @@ svn_fs_fs__rep_contents_dir(apr_hash_t *
 }
 
 svn_error_t *
+svn_fs_fs__rep_contents_dir_partial(void **result_p,
+                                    svn_fs_t *fs,
+                                    node_revision_t *noderev,
+                                    svn_cache__partial_getter_func_t deserializer,
+                                    void *baton,
+                                    apr_pool_t *pool)
+{
+  fs_fs_data_t *ffd = fs->fsap_data;
+  apr_hash_t *entries;
+  svn_boolean_t found = FALSE;
+
+  /* Are we looking for an immutable directory?  We could try the
+   * cache. */
+  if (! svn_fs_fs__id_txn_id(noderev->id))
+    {
+      const char *unparsed_id =
+          svn_fs_fs__id_unparse(noderev->id, pool)->data;
+
+      /* Cache lookup. Return on the requested part of the dir info. */
+      SVN_ERR(svn_cache__get_partial(result_p, &found, ffd->dir_cache,
+                                     unparsed_id, deserializer, baton,
+                                     pool));
+    }
+
+  if (! found)
+    {
+      char *serialized_entries;
+      apr_size_t serialized_len;
+
+      /* since we don't need the directory content later on, put it into
+         some sub-pool that will be reclaimed immedeately after exiting
+         this function successfully. Opon failure, it will live as long
+         as pool.
+       */
+      apr_pool_t *sub_pool = svn_pool_create(pool);
+
+      /* read the dir from the file system. It will probably be put it
+         into the cache for faster lookup in future calls. */
+      SVN_ERR(svn_fs_fs__rep_contents_dir(&entries, fs, noderev, sub_pool));
+
+      /* deserializer works on serialied data only. So, we need to provide
+         serialized dir entries */
+      SVN_ERR(svn_fs_fs__serialize_dir_entries(&serialized_entries,
+                                               &serialized_len,
+                                               entries,
+                                               sub_pool));
+      SVN_ERR(deserializer(result_p,
+                           serialized_entries,
+                           serialized_len,
+                           baton,
+                           pool));
+
+      apr_pool_destroy(sub_pool);
+    }
+
+  return SVN_NO_ERROR;
+}
+
+svn_error_t *
 svn_fs_fs__get_proplist(apr_hash_t **proplist_p,
                         svn_fs_t *fs,
                         node_revision_t *noderev,

Modified: subversion/branches/performance/subversion/libsvn_fs_fs/fs_fs.h
URL: http://svn.apache.org/viewvc/subversion/branches/performance/subversion/libsvn_fs_fs/fs_fs.h?rev=986817&r1=986816&r2=986817&view=diff
==============================================================================
--- subversion/branches/performance/subversion/libsvn_fs_fs/fs_fs.h (original)
+++ subversion/branches/performance/subversion/libsvn_fs_fs/fs_fs.h Wed Aug 18 17:44:19 2010
@@ -105,6 +105,20 @@ svn_error_t *svn_fs_fs__rep_contents_dir
                                          node_revision_t *noderev,
                                          apr_pool_t *pool);
 
+/* Set *RESULT_P to the object created by DESERIALIZER when applied
+   to the dirent structs that contain the directory entries of node-
+   revision NODEREV in filesystem FS.  A BATON will be passed to
+   the deserializer callback function to describe the object to find.
+   The returned object is allocated in POOL, which is also used for
+   temporary allocations. */
+svn_error_t *
+svn_fs_fs__rep_contents_dir_partial(void **result_p,
+                                    svn_fs_t *fs,
+                                    node_revision_t *noderev,
+                                    svn_cache__partial_getter_func_t deserializer,
+                                    void *baton,
+                                    apr_pool_t *pool);
+
 /* Set *CONTENTS to be a readable svn_stream_t that receives the text
    representation of node-revision NODEREV as seen in filesystem FS.
    Use POOL for temporary allocations. */

Modified: subversion/branches/performance/subversion/libsvn_fs_fs/temp_serializer.c
URL: http://svn.apache.org/viewvc/subversion/branches/performance/subversion/libsvn_fs_fs/temp_serializer.c?rev=986817&r1=986816&r2=986817&view=diff
==============================================================================
--- subversion/branches/performance/subversion/libsvn_fs_fs/temp_serializer.c (original)
+++ subversion/branches/performance/subversion/libsvn_fs_fs/temp_serializer.c Wed Aug 18 17:44:19 2010
@@ -679,4 +679,89 @@ svn_fs_fs__deserialize_dir_entries(void 
   return SVN_NO_ERROR;
 }
 
+/* Implements svn_cache__partial_getter_func_t for manifests.
+ */
+svn_error_t *
+svn_fs_fs__get_sharded_offset(void **out,
+                              const char *data,
+                              apr_size_t data_len,
+                              void *baton,
+                              apr_pool_t *pool)
+{
+  apr_off_t *manifest = (apr_off_t *)data;
+  apr_int64_t shard_pos = *(apr_int64_t *)baton;
+
+  *(apr_int64_t *)out = manifest[shard_pos];
+
+  return SVN_NO_ERROR;
+}
+
+/* Implements svn_cache__partial_getter_func_t for a directory contents hash.
+ */
+svn_error_t *
+svn_fs_fs__extract_dir_entry(void **out,
+                             const char *data,
+                             apr_size_t data_len,
+                             void *baton,
+                             apr_pool_t *pool)
+{
+  hash_data_t *hash_data = (hash_data_t *)data;
+  const char* name = baton;
+
+  /* resolve the reference to the entries array */
+  const svn_fs_dirent_t * const *entries =
+      svn_temp_deserializer__ptr(data, (const void **)&hash_data->entries);
+
+  /* binary search for the desired entry by name */
+  apr_size_t lower = 0;
+  apr_size_t upper = hash_data->count;
+  apr_size_t middle;
+
+  for (middle = upper / 2; lower < upper; middle = (upper + lower) / 2)
+    {
+      const svn_fs_dirent_t *entry =
+          svn_temp_deserializer__ptr(entries, (const void **)&entries[middle]);
+      const char* entry_name =
+          svn_temp_deserializer__ptr(entry, (const void **)&entry->name);
+
+      int diff = strcmp(entry_name, name);
+      if (diff < 0)
+        lower = middle + 1;
+      else
+        upper = middle;
+    }
+
+  /* de-serialize that entry or return NULL, if no match has been found */
+  *out = NULL;
+  if (lower < hash_data->count)
+    {
+      const svn_fs_dirent_t *source =
+          svn_temp_deserializer__ptr(entries, (const void **)&entries[lower]);
+
+      /* Entries have been serialized one-by-one, each time including all
+       * nestes structures and strings. Therefore, they occupy a single
+       * block of memory whose end-offset is either the beginning of the
+       * next entry or the end of the buffer
+       */
+      apr_size_t end_offset = lower + 1 < hash_data->count
+                            ? ((apr_size_t*)entries)[lower+1]
+                            : data_len;
+      apr_size_t size = end_offset - ((apr_size_t*)entries)[lower];
+
+      /* copy & deserialize the entry */
+      svn_fs_dirent_t *new_entry = apr_palloc(pool, size);
+      memcpy(new_entry, source, size);
+
+      svn_temp_deserializer__resolve(new_entry, (void **)&new_entry->name);
+      if (strcmp(new_entry->name, name) == 0)
+        {
+          svn_fs_fs__id_deserialize(new_entry, (svn_fs_id_t **)&new_entry->id);
+          *(svn_fs_dirent_t **)out = new_entry;
+        }
+    }
+
+  return SVN_NO_ERROR;
+}
+
+
 

Modified: subversion/branches/performance/subversion/libsvn_fs_fs/temp_serializer.h
URL: http://svn.apache.org/viewvc/subversion/branches/performance/subversion/libsvn_fs_fs/temp_serializer.h?rev=986817&r1=986816&r2=986817&view=diff
==============================================================================
--- subversion/branches/performance/subversion/libsvn_fs_fs/temp_serializer.h (original)
+++ subversion/branches/performance/subversion/libsvn_fs_fs/temp_serializer.h Wed Aug 18 17:44:19 2010
@@ -166,4 +166,27 @@ svn_fs_fs__deserialize_dir_entries(void 
                                    apr_size_t data_len,
                                    apr_pool_t *pool);
 
+/**
+ * Implements @ref svn_cache__partial_getter_func_t for a single element
+ * identified by its offset in @a baton within a serialized manifest array.
+ */
+svn_error_t *
+svn_fs_fs__get_sharded_offset(void **out,
+                              const char *data,
+                              apr_size_t data_len,
+                              void *baton,
+                              apr_pool_t *pool);
+
+/**
+ * Implements @ref svn_cache__partial_getter_func_t for a single 
+ * @ref svn_fs_dirent_t within a serialized directory contents hash,
+ * identified by its name in @a baton.
+ */
+svn_error_t *
+svn_fs_fs__extract_dir_entry(void **out,
+                             const char *data,
+                             apr_size_t data_len,
+                             void *baton,
+                             apr_pool_t *pool);
+
 #endif