You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@subversion.apache.org by st...@apache.org on 2013/09/05 22:51:19 UTC

svn commit: r1520419 - /subversion/trunk/subversion/libsvn_repos/dump.c

Author: stefan2
Date: Thu Sep  5 20:51:19 2013
New Revision: 1520419

URL: http://svn.apache.org/r1520419
Log:
Significant performance improvement to 'svnadmin verify' for repositories
with large directories.

As part of the verification of a directory touched in the currently checked
revision, we verify that we can access all entries.  Since most of them have
not changed, they will use the same svn_fs_id_t value as in some previous
revision.  We simply cache the kind for all IDs that we encounter and can
then skip the actual data access for all those repeated IDs.

Corrupted IDs or mismatching node kinds will still be detected.

* subversion/libsvn_repos/dump.c
  (edit_baton): add a reference to the optional nodeID cache instance
  (get_dump_editor): add cache to baton
  (svn_repos_dump_fs3): update caller
  (verify_directory_entry): use cache to shortcut dirent verification
  (verify_one_revision): cache reference pass through

  (serialize_node_kind,
   deserialize_node_kind): serialization support for the new cache
  (svn_repos_verify_fs3): create a temporary cache with a unique key space

Modified:
    subversion/trunk/subversion/libsvn_repos/dump.c

Modified: subversion/trunk/subversion/libsvn_repos/dump.c
URL: http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_repos/dump.c?rev=1520419&r1=1520418&r2=1520419&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_repos/dump.c (original)
+++ subversion/trunk/subversion/libsvn_repos/dump.c Thu Sep  5 20:51:19 2013
@@ -38,6 +38,7 @@
 
 #include "private/svn_mergeinfo_private.h"
 #include "private/svn_fs_private.h"
+#include "private/svn_cache.h"
 
 #define ARE_VALID_COPY_ARGS(p,r) ((p) && SVN_IS_VALID_REVNUM(r))
 
@@ -130,6 +131,13 @@ struct edit_baton
   /* reusable buffer for writing file contents */
   char buffer[SVN__STREAM_CHUNK_SIZE];
   apr_size_t bufsize;
+
+  /* map nodeID -> node kind.  May be NULL.
+     The key is the string representation of the node ID given in
+     directory entries.  If we find an entry in this cache, the
+     respective node has already been verified as readable and being
+     of the type stored as value in the cache. */
+  svn_cache__t *verified_dirents_cache;
 };
 
 struct dir_baton
@@ -960,6 +968,7 @@ get_dump_editor(const svn_delta_editor_t
                 svn_revnum_t oldest_dumped_rev,
                 svn_boolean_t use_deltas,
                 svn_boolean_t verify,
+                svn_cache__t *verified_dirents_cache,
                 apr_pool_t *pool)
 {
   /* Allocate an edit baton to be stored in every directory baton.
@@ -984,6 +993,7 @@ get_dump_editor(const svn_delta_editor_t
   eb->verify = verify;
   eb->found_old_reference = found_old_reference;
   eb->found_old_mergeinfo = found_old_mergeinfo;
+  eb->verified_dirents_cache = verified_dirents_cache;
 
   /* Set up the editor. */
   dump_editor->open_root = open_root;
@@ -1180,7 +1190,8 @@ svn_repos_dump_fs3(svn_repos_t *repos,
                               "", stream, &found_old_reference,
                               &found_old_mergeinfo, NULL,
                               notify_func, notify_baton,
-                              start_rev, use_deltas_for_rev, FALSE, subpool));
+                              start_rev, use_deltas_for_rev, FALSE,
+                              NULL, subpool));
 
       /* Drive the editor in one way or another. */
       SVN_ERR(svn_fs_revision_root(&to_root, fs, rev, subpool));
@@ -1292,9 +1303,34 @@ verify_directory_entry(void *baton, cons
 {
   struct dir_baton *db = baton;
   svn_fs_dirent_t *dirent = (svn_fs_dirent_t *)val;
-  char *path = svn_relpath_join(db->path, (const char *)key, pool);
+  char *path;
   apr_hash_t *dirents;
   svn_filesize_t len;
+  svn_string_t *unparsed_id;
+
+  /* most directory entries will be unchanged from previous revs.
+     We should find those in the cache and they must match the
+     type defined in the DIRENT. */
+  if (db->edit_baton->verified_dirents_cache)
+    {
+      svn_node_kind_t kind;
+      svn_boolean_t found;
+      unparsed_id = svn_fs_unparse_id(dirent->id, pool);
+
+      SVN_ERR(svn_cache__get((void **)&kind, &found,
+                             db->edit_baton->verified_dirents_cache,
+                             unparsed_id->data, pool));
+
+      if (found)
+        return kind == dirent->kind
+             ? SVN_NO_ERROR
+             : svn_error_createf(SVN_ERR_NODE_UNEXPECTED_KIND, NULL,
+                                 _("Unexpected node kind %d for '%s'. "
+                                   "Expected kind was %d."),
+                                 dirent->kind, path, kind);
+    }
+
+  path = svn_relpath_join(db->path, (const char *)key, pool);
 
   /* since we can't access the directory entries directly by their ID,
      we need to navigate from the FS_ROOT to them (relatively expensive
@@ -1316,6 +1352,11 @@ verify_directory_entry(void *baton, cons
                              dirent->kind, path);
   }
 
+  /* remember ID, kind pair */
+  if (db->edit_baton->verified_dirents_cache)
+    SVN_ERR(svn_cache__set(db->edit_baton->verified_dirents_cache,
+                           unparsed_id->data, &dirent->kind, pool));
+
   return SVN_NO_ERROR;
 }
 
@@ -1359,6 +1400,7 @@ verify_one_revision(svn_fs_t *fs,
                     svn_revnum_t start_rev,
                     svn_cancel_func_t cancel_func,
                     void *cancel_baton,
+                    svn_cache__t *verified_dirents_cache,
                     apr_pool_t *scratch_pool)
 {
   const svn_delta_editor_t *dump_editor;
@@ -1377,6 +1419,7 @@ verify_one_revision(svn_fs_t *fs,
                           notify_func, notify_baton,
                           start_rev,
                           FALSE, TRUE, /* use_deltas, verify */
+                          verified_dirents_cache,
                           scratch_pool));
   SVN_ERR(svn_delta_get_cancellation_editor(cancel_func, cancel_baton,
                                             dump_editor, dump_edit_baton,
@@ -1424,6 +1467,30 @@ verify_fs2_notify_func(svn_revnum_t revi
                             notify_baton->notify, pool);
 }
 
+/* cache entry (de-)serialization support for svn_node_kind_t. */
+svn_error_t *
+serialize_node_kind(void **data,
+                    apr_size_t *data_len,
+                    void *in,
+                    apr_pool_t *pool)
+{
+  *data_len = sizeof(svn_node_kind_t);
+  *data = in;
+
+  return SVN_NO_ERROR;
+}
+
+svn_error_t *
+deserialize_node_kind(void **out,
+                      void *data,
+                      apr_size_t data_len,
+                      apr_pool_t *pool)
+{
+  *(svn_node_kind_t *)out = *(svn_node_kind_t *)data;
+
+  return SVN_NO_ERROR;
+}
+
 svn_error_t *
 svn_repos_verify_fs3(svn_repos_t *repos,
                      svn_revnum_t start_rev,
@@ -1444,6 +1511,7 @@ svn_repos_verify_fs3(svn_repos_t *repos,
   struct verify_fs2_notify_func_baton_t *verify_notify_baton = NULL;
   svn_error_t *err;
   svn_boolean_t found_corruption = FALSE;
+  svn_cache__t *verified_dirents_cache = NULL;
 
   /* Determine the current youngest revision of the filesystem. */
   SVN_ERR(svn_fs_youngest_rev(&youngest, fs, pool));
@@ -1505,13 +1573,26 @@ svn_repos_verify_fs3(svn_repos_t *repos,
       svn_error_clear(err);
     }
 
+  if (svn_cache__get_global_membuffer_cache())
+    SVN_ERR(svn_cache__create_membuffer_cache
+                                 (&verified_dirents_cache,
+                                  svn_cache__get_global_membuffer_cache(),
+                                  serialize_node_kind,
+                                  deserialize_node_kind,
+                                  APR_HASH_KEY_STRING,
+                                  svn_uuid_generate(pool),
+                                  SVN_CACHE__MEMBUFFER_DEFAULT_PRIORITY,
+                                  FALSE,
+                                  pool));
+
   for (rev = start_rev; rev <= end_rev; rev++)
     {
       svn_pool_clear(iterpool);
 
       /* Wrapper function to catch the possible errors. */
-      err = verify_one_revision(fs, rev, notify_func, notify_baton, start_rev,
-                                cancel_func, cancel_baton, iterpool);
+      err = verify_one_revision(fs, rev, notify_func, notify_baton,
+                                start_rev, cancel_func, cancel_baton,
+                                verified_dirents_cache, iterpool);
 
       if (err)
         {