You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@subversion.apache.org by Max Bowsher <ma...@ukf.net> on 2004/03/31 18:12:44 UTC

[PATCH & RFC] svn load --bdb-txn-nosync

This patch is mainly a load of boring updating of callers to new argument
specs, in order to be able to conditionally call
DB_ENV->set_flags(DB_TXN_NOSYNC).

What I need comments on is: I've modified 2 public interfaces, creating
svn_repos_open2 and svn_fs_open_berkeley2. I'm wondering whether anyone has
any other pending modifications that could be combined into the same API
change.

Some statistics: This makes svnadmin load 5 times faster for me.



Max.




Index: subversion/svnadmin/main.c
===================================================================
--- subversion/svnadmin/main.c (revision 9243)
+++ subversion/svnadmin/main.c (working copy)
@@ -124,9 +124,10 @@
 static svn_error_t *
 open_repos (svn_repos_t **repos,
             const char *path,
+            svn_boolean_t no_sync,
             apr_pool_t *pool)
 {
-  SVN_ERR (svn_repos_open (repos, path, pool));
+  SVN_ERR (svn_repos_open2 (repos, path, no_sync, pool));
   svn_fs_set_warning_func (svn_repos_fs (*repos), warning_func, NULL);
   return SVN_NO_ERROR;
 }
@@ -282,7 +283,7 @@
      "was previously empty, its UUID will, by default, be changed to the\n"
      "one specified in the stream.  Progress feedback is sent to
stdout.\n",
      {'q', svnadmin__ignore_uuid, svnadmin__force_uuid,
-      svnadmin__parent_dir} },
+      svnadmin__parent_dir, svnadmin__bdb_txn_nosync} },

     {"lstxns", subcommand_lstxns, {0},
      "usage: svnadmin lstxns REPOS_PATH\n\n"
@@ -384,7 +385,7 @@
   svn_revnum_t youngest, revision;
   apr_pool_t *subpool = svn_pool_create (pool);

-  SVN_ERR (open_repos (&repos, opt_state->repository_path, pool));
+  SVN_ERR (open_repos (&repos, opt_state->repository_path, FALSE, pool));
   fs = svn_repos_fs (repos);
   SVN_ERR (svn_fs_youngest_rev (&youngest, fs, pool));

@@ -447,7 +448,7 @@
   svn_revnum_t lower = SVN_INVALID_REVNUM, upper = SVN_INVALID_REVNUM;
   svn_revnum_t youngest;

-  SVN_ERR (open_repos (&repos, opt_state->repository_path, pool));
+  SVN_ERR (open_repos (&repos, opt_state->repository_path, FALSE, pool));
   fs = svn_repos_fs (repos);
   SVN_ERR (svn_fs_youngest_rev (&youngest, fs, pool));

@@ -535,7 +536,8 @@
   svn_repos_t *repos;
   svn_stream_t *stdin_stream, *stdout_stream = NULL;

-  SVN_ERR (open_repos (&repos, opt_state->repository_path, pool));
+  SVN_ERR (open_repos (&repos, opt_state->repository_path,
opt_state->bdb_txn_nosync,
+        pool));

   /* Read the stream from STDIN.  Users can redirect a file. */
   SVN_ERR (create_stdio_stream (&stdin_stream,
@@ -564,7 +566,7 @@
   apr_array_header_t *txns;
   int i;

-  SVN_ERR (open_repos (&repos, opt_state->repository_path, pool));
+  SVN_ERR (open_repos (&repos, opt_state->repository_path, FALSE, pool));
   fs = svn_repos_fs (repos);
   SVN_ERR (svn_fs_list_transactions (&txns, fs, pool));

@@ -596,7 +598,7 @@
   /* Since db transactions may have been replayed, it's nice to tell
      people what the latest revision is.  It also proves that the
      recovery actually worked. */
-  SVN_ERR (open_repos (&repos, opt_state->repository_path, pool));
+  SVN_ERR (open_repos (&repos, opt_state->repository_path, FALSE, pool));
   SVN_ERR (svn_fs_youngest_rev (&youngest_rev, svn_repos_fs (repos),
pool));
   printf ("The latest repos revision is %"
           SVN_REVNUM_T_FMT ".\n", youngest_rev);
@@ -666,7 +668,7 @@
   int i;
   apr_pool_t *subpool = svn_pool_create (pool);

-  SVN_ERR (open_repos (&repos, opt_state->repository_path, pool));
+  SVN_ERR (open_repos (&repos, opt_state->repository_path, FALSE, pool));
   fs = svn_repos_fs (repos);

   SVN_ERR (svn_opt_parse_all_args (&args, os, pool));
@@ -752,7 +754,7 @@
                                        NULL, pool));

   /* Open the filesystem  */
-  SVN_ERR (open_repos (&repos, opt_state->repository_path, pool));
+  SVN_ERR (open_repos (&repos, opt_state->repository_path, FALSE, pool));

   /* If we are bypassing the hooks system, we just hit the filesystem
      directly. */
@@ -785,7 +787,7 @@

   /* This whole process is basically just a dump of the repository
      with no interest in the output. */
-  SVN_ERR (open_repos (&repos, opt_state->repository_path, pool));
+  SVN_ERR (open_repos (&repos, opt_state->repository_path, FALSE, pool));
   SVN_ERR (svn_fs_youngest_rev (&youngest, svn_repos_fs (repos), pool));
   SVN_ERR (create_stdio_stream (&stderr_stream, apr_file_open_stderr,
pool));
   SVN_ERR (svn_repos_dump_fs (repos, NULL, stderr_stream,
Index: subversion/include/svn_fs.h
===================================================================
--- subversion/include/svn_fs.h (revision 9243)
+++ subversion/include/svn_fs.h (working copy)
@@ -134,7 +134,19 @@
  * NOTE: you probably don't want to use this directly, especially not
  * if it's immediately preceded by a call to @c svn_fs_new().  Take a
  * look at @c svn_repos_open() instead.
+ *
+ * If @a no_sync is true, ask the filesystem backend to avoid synchronous
+ * disk I/O.
  */
+svn_error_t *svn_fs_open_berkeley2 (svn_fs_t *fs, const char *path,
+    svn_boolean_t no_sync);
+
+/**
+ * @deprecated Provided for backward compatibility with the 1.0.0 API.
+ *
+ * Similar to svn_fs_open_berkeley2(), but with the @a no_sync
+ * parameter always set to @c FALSE.
+ */
 svn_error_t *svn_fs_open_berkeley (svn_fs_t *fs, const char *path);


Index: subversion/include/svn_repos.h
===================================================================
--- subversion/include/svn_repos.h (revision 9243)
+++ subversion/include/svn_repos.h (working copy)
@@ -88,8 +88,20 @@
  * Acquires a shared lock on the repository, and attaches a cleanup
  * function to @a pool to remove the lock.  If no lock can be acquired,
  * returns error, with undefined effect on @a *repos_p.  If an exclusive
- * lock is present, this blocks until it's gone.
+ * lock is present, this blocks until it's gone. If @a no_sync is true,
+ * ask the filesystem backend to avoid synchronous disk I/O.
  */
+svn_error_t *svn_repos_open2 (svn_repos_t **repos_p,
+                              const char *path,
+                              svn_boolean_t no_sync,
+                              apr_pool_t *pool);
+
+/**
+ * @deprecated Provided for backward compatibility with the 1.0.0 API.
+ *
+ * Similar to svn_repos_open2(), but with the @a no_sync
+ * parameter always set to @c FALSE.
+ */
 svn_error_t *svn_repos_open (svn_repos_t **repos_p,
                              const char *path,
                              apr_pool_t *pool);
Index: subversion/libsvn_fs/fs.c
===================================================================
--- subversion/libsvn_fs/fs.c (revision 9243)
+++ subversion/libsvn_fs/fs.c (working copy)
@@ -591,7 +591,7 @@


 svn_error_t *
-svn_fs_open_berkeley (svn_fs_t *fs, const char *path)
+svn_fs_open_berkeley2 (svn_fs_t *fs, const char *path, svn_boolean_t
no_sync)
 {
   svn_error_t *svn_err;
   const char *path_native;
@@ -617,6 +617,13 @@
                                     0666));
   if (svn_err) goto error;

+  if (no_sync)
+  {
+    svn_err = BDB_WRAP (fs, "setting DB_TXN_NOSYNC",
+                       fs->env->set_flags (fs->env, DB_TXN_NOSYNC, 1));
+    if (svn_err) goto error;
+  }
+
   /* Open the various databases.  */
   svn_err = BDB_WRAP (fs, "opening 'nodes' table",
                      svn_fs__bdb_open_nodes_table (&fs->nodes,
@@ -658,6 +665,12 @@
   return svn_err;
 }

+svn_error_t *
+svn_fs_open_berkeley (svn_fs_t *fs, const char *path)
+{
+  return svn_fs_open_berkeley2(fs, path, FALSE);
+}
+
 
 /* Copying a live Berkeley DB-base filesystem.  */

Index: subversion/libsvn_repos/repos.c
===================================================================
--- subversion/libsvn_repos/repos.c (revision 9243)
+++ subversion/libsvn_repos/repos.c (working copy)
@@ -1010,6 +1010,7 @@
            const char *path,
            int locktype,
            svn_boolean_t open_fs,
+           svn_boolean_t no_sync,
            apr_pool_t *pool)
 {
   svn_repos_t *repos;
@@ -1043,7 +1044,7 @@

   /* Open up the Berkeley filesystem only after obtaining the lock. */
   if (open_fs)
-    SVN_ERR (svn_fs_open_berkeley (repos->fs, repos->db_path));
+    SVN_ERR (svn_fs_open_berkeley2 (repos->fs, repos->db_path, no_sync));

   *repos_p = repos;
   return SVN_NO_ERROR;
@@ -1071,9 +1072,10 @@


 svn_error_t *
-svn_repos_open (svn_repos_t **repos_p,
-                const char *path,
-                apr_pool_t *pool)
+svn_repos_open2 (svn_repos_t **repos_p,
+                 const char *path,
+                 svn_boolean_t no_sync,
+                 apr_pool_t *pool)
 {
   /* Fetch a repository object initialized with a shared read/write
      lock on the database. */
@@ -1081,6 +1083,7 @@
   SVN_ERR (get_repos (repos_p, path,
                       APR_FLOCK_SHARED,
                       TRUE,     /* open the db into repos->fs. */
+                      no_sync,
                       pool));

   return SVN_NO_ERROR;
@@ -1088,6 +1091,15 @@


 svn_error_t *
+svn_repos_open (svn_repos_t **repos_p,
+                const char *path,
+                apr_pool_t *pool)
+{
+  return svn_repos_open2(repos_p, path, FALSE, pool);
+}
+
+
+svn_error_t *
 svn_repos_delete (const char *path,
                   apr_pool_t *pool)
 {
@@ -1144,6 +1156,7 @@
   SVN_ERR (get_repos (&repos, path,
                       APR_FLOCK_EXCLUSIVE,
                       FALSE,    /* don't try to open the db yet. */
+                      FALSE,
                       subpool));

   /* Recover the database to a consistent state. */
@@ -1166,6 +1179,7 @@
   SVN_ERR (get_repos (&repos, path,
                       APR_FLOCK_SHARED,
                       FALSE,     /* Do not open fs. */
+                      FALSE,
                       pool));

   SVN_ERR (svn_fs_berkeley_logfiles (logfiles,
@@ -1278,6 +1292,7 @@
   SVN_ERR (get_repos (&src_repos, src_path,
                       APR_FLOCK_SHARED,
                       FALSE,    /* don't try to open the db yet. */
+                      FALSE,
                       pool));

   /* If we are going to clean logs, then get an exclusive lock on
@@ -1319,6 +1334,7 @@
   SVN_ERR (get_repos (&dst_repos, dst_path,
                       APR_FLOCK_EXCLUSIVE,
                       FALSE,    /* don't try to open the db yet. */
+                      FALSE,
                       pool));




---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
For additional commands, e-mail: dev-help@subversion.tigris.org

Re: [PATCH & RFC] svn load --bdb-txn-nosync

Posted by Max Bowsher <ma...@ukf.net>.
Philip Martin wrote:
> "Max Bowsher" <ma...@ukf.net> writes:
>
>> Perhaps load should be acquiring an exclusive lock? I doubt the loader
would
>> be particularly happy if someone managed to fit in a commit in the middle
of
>> a load.
>
> By "loader" do you mean the user, or the load process?

load process.
Sorry to be unclear.

> I've tried loading a dump, one that includes copies, and inserting
> commits (of items not affected by the dump) during the load doesn't
> cause any problems that I can see.  All the revisions adjusted to cope
> with the "extra" commits.  Obviously if I were to modify an item that
> is going to subsequently get modified/copied by the dump it would be
> different, but that's to be expected.
>
> If the repository administrator does take the repository off line, in
> order to get exclusive access, then editing the DB_CONFIG file doesn't
> seem like much of a problem to me.

Editing DB_CONFIG is harder to automate than a simple option.

I suggest resuming this discussion when the fs abstraction work is complete.

Max.


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
For additional commands, e-mail: dev-help@subversion.tigris.org

Re: [PATCH & RFC] svn load --bdb-txn-nosync

Posted by Philip Martin <ph...@codematters.co.uk>.
"Max Bowsher" <ma...@ukf.net> writes:

> Perhaps load should be acquiring an exclusive lock? I doubt the loader would
> be particularly happy if someone managed to fit in a commit in the middle of
> a load.

By "loader" do you mean the user, or the load process?

I've tried loading a dump, one that includes copies, and inserting
commits (of items not affected by the dump) during the load doesn't
cause any problems that I can see.  All the revisions adjusted to cope
with the "extra" commits.  Obviously if I were to modify an item that
is going to subsequently get modified/copied by the dump it would be
different, but that's to be expected.

If the repository administrator does take the repository off line, in
order to get exclusive access, then editing the DB_CONFIG file doesn't
seem like much of a problem to me.

-- 
Philip Martin

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
For additional commands, e-mail: dev-help@subversion.tigris.org

Re: [PATCH & RFC] svn load --bdb-txn-nosync

Posted by "C. Michael Pilato" <cm...@collab.net>.
"Max Bowsher" <ma...@ukf.net> writes:

> Additionally, cvs2svn could really make use of this.  I think it is
> worth it.
>
> Perhaps we should define some opaque pass through mechanism for
> backend-specific options. Perhaps an APR hash?

This concept is already part of the FS abstraction proposals.
Configuration hashes pass to the filesystem code to decide which
backend to use, and to provide options to that backend.

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
For additional commands, e-mail: dev-help@subversion.tigris.org

Re: [PATCH & RFC] svn load --bdb-txn-nosync

Posted by Max Bowsher <ma...@ukf.net>.
Philip Martin wrote:
> "Max Bowsher" <ma...@ukf.net> writes:
>
>> This patch is mainly a load of boring updating of callers to new argument
>> specs, in order to be able to conditionally call
>> DB_ENV->set_flags(DB_TXN_NOSYNC).
>>
>> What I need comments on is: I've modified 2 public interfaces, creating
>> svn_repos_open2 and svn_fs_open_berkeley2. I'm wondering whether anyone
has
>> any other pending modifications that could be combined into the same API
>> change.
>>
>> Some statistics: This makes svnadmin load 5 times faster for me.
>
> No log message.
>
> The repository administrator can achive the same thing by setting
> DB_TXN_NOSYNC in the DB_CONFIG file.  Your change makes it more
> convenient but I'm not sure that's enough to put this BDB specific
> flag in the libsvn_repos interface.

The alternative is quite messy and not easily automatable:
Edit DB_CONFIG, svnadmin recover, svnadmin load, Edit DB_CONFIG, svnadmin
recover.

Additionally, cvs2svn could really make use of this.
I think it is worth it.

Perhaps we should define some opaque pass through mechanism for
backend-specific options. Perhaps an APR hash?

>  How well does your change work if
> another process, one not using DB_TXN_NOSYNC, is accessing the
> database at the same time?

The bdb docs are not entirely clear, I believe the other process will cause
log flushes and disk syncs.

Perhaps load should be acquiring an exclusive lock? I doubt the loader would
be particularly happy if someone managed to fit in a commit in the middle of
a load.

Max.


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
For additional commands, e-mail: dev-help@subversion.tigris.org

Re: [PATCH & RFC] svn load --bdb-txn-nosync

Posted by Philip Martin <ph...@codematters.co.uk>.
"Max Bowsher" <ma...@ukf.net> writes:

> This patch is mainly a load of boring updating of callers to new argument
> specs, in order to be able to conditionally call
> DB_ENV->set_flags(DB_TXN_NOSYNC).
>
> What I need comments on is: I've modified 2 public interfaces, creating
> svn_repos_open2 and svn_fs_open_berkeley2. I'm wondering whether anyone has
> any other pending modifications that could be combined into the same API
> change.
>
> Some statistics: This makes svnadmin load 5 times faster for me.

No log message.

The repository administrator can achive the same thing by setting
DB_TXN_NOSYNC in the DB_CONFIG file.  Your change makes it more
convenient but I'm not sure that's enough to put this BDB specific
flag in the libsvn_repos interface.  How well does your change work if
another process, one not using DB_TXN_NOSYNC, is accessing the
database at the same time?

-- 
Philip Martin

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
For additional commands, e-mail: dev-help@subversion.tigris.org

Re: [PATCH & RFC] svn load --bdb-txn-nosync

Posted by "C. Michael Pilato" <cm...@collab.net>.
"Max Bowsher" <ma...@ukf.net> writes:

> This patch is mainly a load of boring updating of callers to new argument
> specs, in order to be able to conditionally call
> DB_ENV->set_flags(DB_TXN_NOSYNC).
> 
> What I need comments on is: I've modified 2 public interfaces, creating
> svn_repos_open2 and svn_fs_open_berkeley2. I'm wondering whether anyone has
> any other pending modifications that could be combined into the same API
> change.

There will almost certainly be changes in this area in coming days
(for fs API abstraction).  Do not commit this patch until that work
has been done.

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@subversion.tigris.org
For additional commands, e-mail: dev-help@subversion.tigris.org